zhouhaihua00 2019-12-15
有道词典的web接口,实际上可以用爬虫模拟,输入key,拼接为有道词典接口的formdata,爬取返回值,实际为Ajax动态生成的translation,这样外部来看实现了翻译接口的模拟,相当于爬虫模拟浏览器调用了有道词典web接口,其实讲真的话来说,直接调用有道web接口,传json参数就可以了,不用这么费事,但爬虫模拟了人登陆web,输入关键词,获得翻译结果的过程。
浏览器输入操作,解析有道词典翻译的web接口url和格式


#爬虫模拟调用有道词典web接口调用
from urllib import request
from urllib import parse
import re
class YoudaoTranslator:
def __init__(self, key):
self.key = key
def __getData(self):
# 构造 有道词典web接口所需的Form data
formdata = {
"i": self.key,
"from": "AUTO",
"to": "AUTO" ,
"smartresult": "dict",
"client": "fanyideskweb",
"salt": "15763837022114",
"sign": "2b12fd214e066f53bc3455a126d7a509",
"ts": "1576383702211",
"bv": "5575008ba9785f184b106838a72d6536",
"doctype": "json",
"version": "2.1",
"keyfrom": "fanyi.web",
"action": "FY_BY_REALTlME"
}
data = parse.urlencode(formdata).encode(encoding="utf-8")
return data
def __getPage(self):
#获得模拟浏览器请求,获得Ajax返回值
header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36"}
url = "http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule"
req = request.Request(url, data=YoudaoTranslator.__getData(self), headers=header)
res = request.urlopen(req).read().decode()
return res
def __Pat(self):
#解析ajax返回json字符串,正则匹配获取翻译值
pat = r‘"tgt":"(.*?)"}]]‘
result = re.findall(pat, YoudaoTranslator.__getPage(self))
print(result[0])
return result
def Translator(self):
YoudaoTranslator.__Pat(self)
if __name__ == ‘__main__‘:
i = YoudaoTranslator("人格心理学")
i.Translator()然后是运行结果
