python-css反爬之svg映射
生活随笔
收集整理的這篇文章主要介紹了
python-css反爬之svg映射
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
目標網站:http://www.porters.vip/confusion/food.html
詳細解析可查看:?https://blog.csdn.net/BigBoy_Coder/article/details/104748253
import requests from parsel import Selector import redef getSvgMapping():svg_url = 'http://www.porters.vip/confusion/font/food.svg'css_url = 'http://www.porters.vip/confusion/css/food.css'svg_resp = requests.get(svg_url).textcss_resp = requests.get(css_url).textsvg_list = re.findall(r'<text x=".*?" y="(.*?)">(.*?)</text>', svg_resp)# print(svg_list) # [('38', '154669136497975167479825383996313925720573'), ('83', '560862462805204755437571121437458524985017'), ('120', '671260781104096663000892328440489239185923'), ('164', '684431081139502796807382')]css_list = re.findall(r'\.(\w{6}) \{\s+background: -(.*?)px -(.*?)px;\s+\}',css_resp)# print(css_list) # [('vhk08k', '274', '141'), ('vhk6zl', '7', '15'), ('vhk0ao', '133', '97'), ('vhk9or', '330', '141'), ('vhkfln', '428', '15'), ('vhkbvu', '386', '97'), ('vhk84t', '176', '141'), ('vhkvxd', '246', '141'), ('vhkqsc', '288', '141'), ('vhkjj4', '316', '141'), ('vhk0f1', '316', '97')]svg_dict = {i[0]: i[1] for i in svg_list}# print(svg_dict) # {'38': '154669136497975167479825383996313925720573', '83': '560862462805204755437571121437458524985017', '120': '671260781104096663000892328440489239185923', '164': '684431081139502796807382'}css_dict = {i[0]:[i[1],i[2]] for i in css_list}# print(css_dict) # {'vhk08k': ['274', '141'], 'vhk6zl': ['7', '15'], 'vhk0ao': ['133', '97'], 'vhk9or': ['330', '141'], 'vhkfln': ['428', '15'], 'vhkbvu': ['386', '97'], 'vhk84t': ['176', '141'], 'vhkvxd': ['246', '141'], 'vhkqsc': ['288', '141'], 'vhkjj4': ['316', '141'], 'vhk0f1': ['316', '97']}font_size = int(re.search(r'<style>.*?font-size:(\d+)px.*?</style>',svg_resp).group(1)) # 14num_dict = {}for i in css_dict.items():for j in svg_dict.keys():if int(i[1][1]) < int(j):break# 偏移量offset = int(i[1][0]) //font_size# 看到的數值rel_num = svg_dict[j][offset:offset+1]num_dict[i[0]] = rel_numreturn num_dictdef spider():url = 'http://www.porters.vip/confusion/food.html'resp = requests.get(url).textnum_dict = getSvgMapping()# {{'vhk08k': '0', 'vhk6zl': '1', 'vhk0ao': '1', 'vhk9or': '2', 'vhkfln': '3', 'vhkbvu': '4', 'vhk84t': '5', 'vhkvxd': '6', 'vhkqsc': '7', 'vhkjj4': '8', 'vhk0f1': '9'}# 全文替換for i in num_dict.items():resp = resp.replace(f'<d class="{i[0]}"></d>', i[1])html = Selector(resp)col_details = html.xpath('//div[@class="col details"]')for i in col_details:title = i.xpath('./div[1]/text()').extract_first().strip()score = i.xpath('./div[2]/span[2]/text()').extract()score += i.xpath('./div[2]/span[3]/text()').extract()score += i.xpath('./div[2]/span[4]/span[1]/text()').extract()score += i.xpath('./div[2]/span[4]/span[2]/text()').extract()score += i.xpath('./div[2]/span[4]/span[3]/text()').extract()address = ''.join(i.xpath('./div[3]/span/text()').extract())characteristic = i.xpath('./div[4]/span/text()').extract_first()phonenum = ''.join(i.xpath('./div[5]//text()').extract())print(title, ' '.join(score),address, characteristic, phonenum)# 柳州螺螄粉 100條評論 人均:12 口味:8.7 環境:7.4 服務:7.6 地址:中山大道浦西路28號商鋪 特色:脆爽酸筍,熱辣紅油,香蔥蘿卜,吃完還想吃 電話:400-51771spider()?
總結
以上是生活随笔為你收集整理的python-css反爬之svg映射的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 2019年末逆向复习系列之Boss直聘C
- 下一篇: python调用其它文件函数或类