Python爬虫学习获取腾讯新闻并存入Excel
生活随笔
收集整理的這篇文章主要介紹了
Python爬虫学习获取腾讯新闻并存入Excel
小編覺得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
import requests
import json
import xlwt
import osheaders={'referer': 'https://news.qq.com/','user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
}
rows=0 # 行數(shù)def getPage(url):try:re=requests.get(url,headers=headers)re.encoding=re.apparent_encodingreturn json.loads(re.text)except:print(re.status_code)# 獲取新聞
def parse_news(text):global rows # 聲明rows為全局變量try:content=text['data']['list'] # 熱點(diǎn)精選except:content = text['data'] # 今日要聞和今日話題for item in content:source=item['url']title=item['title']sheet.write(rows,0,title)sheet.write(rows,1,source)rows+=1if __name__ == '__main__':today_news_url='https://i.news.qq.com/trpc.qqnews_web.pc_base_srv.base_http_proxy/NinjaPageContentSync?pull_urls=news_top_2018'today_topic_url='https://i.news.qq.com/trpc.qqnews_web.pc_base_srv.base_http_proxy/NinjaPageContentSync?pull_urls=today_topic_2018'# 創(chuàng)建excelExcel_book=xlwt.Workbook()# 添加一個(gè)sheetsheet=Excel_book.add_sheet('騰訊新聞')# 獲取今日要聞print("開始獲取今日要聞>>>")today_news=getPage(today_news_url)sheet.write(rows,0,'以下為今日要聞')rows+=1parse_news(today_news)print("...今日要聞獲取完畢")# 獲取今日話題print("開始獲取今日話題>>>")today_topic=getPage(today_topic_url)sheet.write(rows,0,"以下為今日話題")rows+=1parse_news(today_topic)print("...今日話題獲取完畢")# 熱點(diǎn)精選page = int(input("請(qǐng)輸入你想獲得的熱點(diǎn)精選頁(yè)數(shù):"))print("開始獲取熱點(diǎn)精選>>>")sheet.write(rows,0,"以下為熱點(diǎn)精選")rows+=1for i in range(page):hot_news_url = 'https://i.news.qq.com/trpc.qqnews_web.kv_srv.kv_srv_http_proxy/list?sub_srv_id=24hours&srv_id=pc&offset=' + str(i*20) + '&limit=20&strategy=1&ext={%22pool%22:[%22top%22],%22is_filter%22:7,%22check_type%22:true}'hot_news=getPage(hot_news_url)parse_news(hot_news)print("...第%d頁(yè)獲取完畢"%(i+1))Excel_book.save('09_騰訊新聞.xlsx')print("保存完畢!路徑為{}\\09_騰訊新聞.xlsx".format(os.getcwd()))
?
總結(jié)
以上是生活随笔為你收集整理的Python爬虫学习获取腾讯新闻并存入Excel的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問(wèn)題。
- 上一篇: Python操作Json、Csv、Exc
- 下一篇: websocket python爬虫_p