當前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

抓取L4d2地图信息

發布時間：2024/3/24 编程问答 25 豆豆

生活随笔收集整理的這篇文章主要介紹了抓取L4d2地图信息小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

程序用途：從 www.orangetage.com/map/ 獲取指定頁數的所有地圖的信息儲存到txt文件
效果演示

# -*- coding: utf-8 -*- # Version: Python 3.9.5 # Author: TRIX # Date: 2021-09-07 21:22:40 # Use: 從http://www.orangetage.com/map/獲取指定頁數的所有地圖的信息儲存到txt文件 from logging import basicConfig,DEBUG,debug,CRITICAL,disable # Import debugging module #disable(CRITICAL)# Remove # when the program is completed basicConfig(level=DEBUG, format='%(levelname)s: %(message)s. [%(lineno)d]%(filename)s <%(asctime)s>',filename='debug.log',filemode='w')# Set debugging mode. Replace print() with debug()import requests,bs4 def get_html(page):#獲取每頁的htmlpages_list=[]#判斷頁數類型if '-' in page:#如果頁數類似 12-32pages_list.extend(page.split('-'))for n in range(int(pages_list[0])+1,int(pages_list[-1])):pages_list.insert(1,n)else:pages_list.append(page)#如果頁數類似 15#頁數轉頁面源代碼#如果第一頁是1 且只有一頁if pages_list[0]=='1' and len(pages_list)==1:pages_list[0]=requests.get('http://www.orangetage.com/map/index.html')#如果第一頁是1 且不只一頁elif pages_list[0]=='1' and len(pages_list)!=1:pages_list[0]=requests.get('http://www.orangetage.com/map/index.html')for i,e in enumerate(pages_list[1:],1):pages_list[i]=requests.get('http://www.orangetage.com/map/{}.html'.format(e))#如果第一頁不是1 且只有一頁elif pages_list[0]!='1' and len(pages_list)==1:pages_list[0]=requests.get('http://www.orangetage.com/map/{}.html'.format(pages_list[0]))#如果第一頁不是1 且不只一頁elif pages_list[0]!='1' and len(pages_list)!=1:for i,e in enumerate(pages_list,0):pages_list[i]=requests.get('http://www.orangetage.com/map/{}.html'.format(e))#設置編碼格式for i,n in enumerate(pages_list):#print(pages_list[i].apparent_encoding)#網頁編碼格式得知是gbkpages_list[i].encoding='gbk'#設置編碼為gbkpages_list[i]=pages_list[i].textreturn pages_listdef get_map(page):#尋找地圖pages_list=get_html(page)#debug(len(pages_list))#記錄調試日志map_url_list=[]#尋找頁面里的地圖urlfor i,n in enumerate(pages_list):pages_list[i]=bs4.BeautifulSoup(n,'lxml').select('div[class="list_img"] > a')#處理html 尋找 <div class="list_img"> 里的 <a>for x,url_tag in enumerate(pages_list[i]):map_url_list.append(url_tag.get('href'))#尋找 <a> 里的 url#debug(pages_list)page_count=1map_count=1map_list=['' for map_i,n in enumerate(map_url_list)]#地圖所有相關信息組成列表#尋找地圖頁面里的地圖信息和下載地址for map_i,n in enumerate(map_url_list):map_url_list[map_i]=requests.get(n)map_url_list[map_i].encoding='gbk'map_url_list[map_i]=map_url_list[map_i].text#獲得地圖htmlmap_list[map_i]+='------第{}頁-第{}個地圖------\n'.format(page_count,map_count)map_count+=1if map_count==9:map_count=1page_count+=1#獲取地圖信息map_info_list=bs4.BeautifulSoup(map_url_list[map_i],'lxml').select('span[style="font-family:微軟雅黑;"]')#尋找 <span style="font-family:微軟雅黑;"> 里的地圖信息if len(map_info_list)!=1:map_list[map_i]+='地圖簡介：'for info_i,info_tag in enumerate(map_info_list):map_info_list[info_i]=info_tag.text.replace('\r\n','\n')#處理地圖信息map_list[map_i]+=map_info_list[info_i]+'\n'#獲取地圖下載地址map_link_list=bs4.BeautifulSoup(map_url_list[map_i],'lxml').select('ul[class="l xz_a wrap blue"] > li > a')#尋找 <ul class="l xz_a wrap blue"> 里的 <li> 里的 <a>#debug(map_link_list)for link_i,url_tag in enumerate(map_link_list):map_list[map_i]+=url_tag.text+'：'map_list[map_i]+=url_tag.get('href')+'\n'#添加下載地址到map_list對應元素map_list[map_i]+='\n\n'with open('l4d2_maps_info.txt','w',encoding='utf-8') as f:#寫入txtf.write('在第{}頁一共找到{}個地圖\n\n\n'.format(page,len(map_url_list)))for map_i,n in enumerate(map_url_list):f.write(map_list[map_i])get_map('18-25')

總結

以上是生活随笔為你收集整理的抓取L4d2地图信息的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇： FRM 数量分析笔记之概率分布
下一篇： IT猿助手宝宝取名助手 App Te