Python3--批量爬取数据之调用百度api进行翻译
生活随笔
收集整理的這篇文章主要介紹了
Python3--批量爬取数据之调用百度api进行翻译
小編覺(jué)得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
上代碼:
#************************************************************ #文件功能:利用百度翻譯將英文名翻譯成中文 #************************************************************import csv,requests,random from fake_useragent import UserAgent import hashlib import json import time import urllib.parseclass Baidufanyi:"""docstring for ClassName"""def __init__(self, appid, appSecret, ip_list):self.url = 'https://fanyi-api.baidu.com/api/trans/vip/translate'self.ip_list = ip_list #ip列表self.appid = appid #應(yīng)用IDself.appSecret = appSecret #應(yīng)用密鑰self.langFrom = 'en' #翻譯前語(yǔ)言self.langTo = 'zh' #翻譯后語(yǔ)言'''功能:將數(shù)據(jù)url編碼注釋:param queryText:待翻譯的文字return:返回url編碼過(guò)的數(shù)據(jù)'''def getUrlEncodeData(self,queryText):salt = '2' #我們將隨機(jī)數(shù)固定為2sign_str = self.appid + queryText + salt + self.appSecretsign_str = sign_str.encode('utf-8')sign = hashlib.md5(sign_str).hexdigest()payload = {'q': queryText,'from': self.langFrom,'to': self.langTo,'appid': self.appid,'salt': salt,'sign': sign} # 注意是get請(qǐng)求,不是請(qǐng)求data = urllib.parse.urlencode(payload)return data''''' 解析頁(yè)面,輸出翻譯結(jié)果 :param html: 翻譯返回的頁(yè)面內(nèi)容 :return: None ''' def parseHtml(self, html):data = json.loads(html) print ('-------------------------') translationResult = data['trans_result'][0]['dst'] if isinstance(translationResult, list): translationResult = translationResult[0] print (translationResult) return translationResultdef get_translateResult(self,queryText):data = self.getUrlEncodeData(queryText) #獲取url編碼過(guò)的數(shù)據(jù)target_url = self.url + '?' + data #構(gòu)造目標(biāo)urlprint('target_url為:'+target_url)headers = {'User-Agent':'str(UserAgent().random)'}try:proxies = get_randomIp(self.ip_list)req = requests.get(target_url,proxies=proxies,headers=headers,timeout=10) #構(gòu)造請(qǐng)求except:print('運(yùn)行錯(cuò)誤,暫停20秒')proxies = get_randomIp(self.ip_list)req = requests.get(target_url,proxies=proxies,headers=headers) #再次進(jìn)行構(gòu)造請(qǐng)求req.encoding='utf-8'html = req.texttranslateResult = self.parseHtml(html) #解析,顯示翻譯結(jié)果return translateResult#獲取IP列表并檢驗(yàn)IP的有效性 def get_ipList(): f=open('IP.txt','r') ip_list=f.readlines() f.close() return ip_list#從IP列表中獲取隨機(jī)IP def get_randomIp(ip_list): proxy_ip = random.choice(ip_list) proxy_ip=proxy_ip.strip('\n') proxies = {'http': proxy_ip} return proxies#功能:獲取需要翻譯的文件內(nèi)容 def reader_file(filePath): reader=[] with open(filePath,'r',encoding='utf-8') as csvfile: spanreader = csv.reader(csvfile,delimiter='|',quoting=csv.QUOTE_MINIMAL) for row in spanreader: if row: reader.append(row) return reader#功能:將信息寫入文件 def write_file(filePath,row): with open(filePath,'a+',encoding='utf-8',newline='') as csvfile: spanreader = csv.writer(csvfile,delimiter='|',quoting=csv.QUOTE_MINIMAL) spanreader.writerow(row)#主程序 def main():print('程序開始運(yùn)行!')appid = appid #應(yīng)用IDappSecret = appSecret #應(yīng)用密鑰filePath = 'baidubaike.csv' #需要翻譯的文件ip_list = get_ipList()fanyi = Baidufanyi(appid,appSecret,ip_list)reader = reader_file(filePath)for row in reader:translateResult = '翻譯成功后的結(jié)果' #翻譯成功后的結(jié)果if not row[6]:print('現(xiàn)在翻譯的英文名是:'+row[0])translateResult = fanyi.get_translateResult(row[0])print('翻譯成功后的結(jié)果是:'+translateResult)row[6] = translateResultwrite_file('baidubaike_notChinese.csv',row) #將爬取過(guò)的內(nèi)容存入test.csv文件else:write_file('baidubaike_Chinese.csv',row) #將未進(jìn)行爬取的內(nèi)容存進(jìn)test_.csv文件print('信息爬取成功,程序運(yùn)行結(jié)束')if __name__ == '__main__':main()總結(jié)
以上是生活随笔為你收集整理的Python3--批量爬取数据之调用百度api进行翻译的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問(wèn)題。
- 上一篇: Python3--批量爬取数据之调用有道
- 下一篇: Python3--批量爬取数据之调金山词