from . import tool
import datetime
import html
import re
def html_only(conn, data, title, main_num):
curs = conn.cursor()
backlink = []
plus_data = ''
while 1:
in_data = re.search('((?:(?!<\/a>).)+)<\/a>', data)
if in_data:
in_data = in_data.groups()
if in_data[0]:
main_link = in_data[0]
sub_link = in_data[1]
else:
main_link = in_data[1]
sub_link = in_data[1]
curs.execute("select title from data where title = ?", [main_link])
if not curs.fetchall():
link_id = 'id="not_thing"'
backlink += [[title, main_link, 'no']]
else:
link_id = 'id=""'
backlink += [[title, main_link, '']]
data = re.sub('((?:(?!<\/a>).)+)<\/a>', '' + sub_link + '', data, 1)
else:
break
data = re.sub(').)+)>((?:(?!<\/a>).)+)<\/a>', data)
if in_data:
in_data = in_data.groups()
a_data = re.sub('href="((?:(?!").)+)"', '', in_data[0])
a_data = re.sub('id="((?:(?!").)+)"', '', a_data)
if re.search('=', a_data):
data = re.sub(').)+)>((?:(?!<\/a>).)+)<\/a>', '', data, 1)
else:
data = re.sub(').)+)>((?:(?!<\/a>).)+)<\/a>', '' + in_data[1] + '', data, 1)
else:
break
data = re.sub('