1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# coding=utf-8
import urllib2
import re


def find_words_internet(key_word):
# 查找答案列表
yl_domain = 'https://www.youlai.cn'
find_num = 0
match_result = None
while find_num < 10 and match_result is None:
find_num += 1
print('思考中:' + str(find_num))
html = urllib2.urlopen(yl_domain + "/cse/search?q=" + key_word + "&page=" + str(find_num)).read()
match_result = re.search(r'.<a.*href="(.*ask/[^"]*)".*', html, re.M | re.I)
if match_result is not None:
the_text = match_result.group(1)
# 获取第一个答案详情
html = urllib2.urlopen(yl_domain + the_text).read()
match_result = re.search(r'<div class=\"text\">([^div]*)</div>', html, re.M | re.I | re.DOTALL)
if match_result is None:
match_result = re.search(r'<div class=\"p_text\">([^div]*)</div>', html, re.M | re.I | re.DOTALL)
if match_result is None:
match_result = re.search(r'<div class=\"docYes\">.*<p>([^</p>]*)</p>', html, re.M | re.I | re.DOTALL)
if match_result is not None:
the_text = match_result.group(1)
the_text = re.sub(r'<[^>]+>', '', the_text)
return the_text.strip().strip(' ')
return ""


if __name__ == "__main__":
keyword = "";
print("问:" + keyword)
print("答:" + find_words_internet(keyword))