1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
| import urllib2 import re
def find_words_internet(key_word): yl_domain = 'https://www.youlai.cn' find_num = 0 match_result = None while find_num < 10 and match_result is None: find_num += 1 print('思考中:' + str(find_num)) html = urllib2.urlopen(yl_domain + "/cse/search?q=" + key_word + "&page=" + str(find_num)).read() match_result = re.search(r'.<a.*href="(.*ask/[^"]*)".*', html, re.M | re.I) if match_result is not None: the_text = match_result.group(1) html = urllib2.urlopen(yl_domain + the_text).read() match_result = re.search(r'<div class=\"text\">([^div]*)</div>', html, re.M | re.I | re.DOTALL) if match_result is None: match_result = re.search(r'<div class=\"p_text\">([^div]*)</div>', html, re.M | re.I | re.DOTALL) if match_result is None: match_result = re.search(r'<div class=\"docYes\">.*<p>([^</p>]*)</p>', html, re.M | re.I | re.DOTALL) if match_result is not None: the_text = match_result.group(1) the_text = re.sub(r'<[^>]+>', '', the_text) return the_text.strip().strip(' ') return ""
if __name__ == "__main__": keyword = ""; print("问:" + keyword) print("答:" + find_words_internet(keyword))
|