日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問 生活随笔!

生活随笔

當(dāng)前位置: 首頁 > 编程资源 > 编程问答 >内容正文

编程问答

爬虫实战:12306登录

發(fā)布時間:2023/12/20 编程问答 32 豆豆
生活随笔 收集整理的這篇文章主要介紹了 爬虫实战:12306登录 小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.

爬蟲實(shí)戰(zhàn):破解點(diǎn)觸驗(yàn)證碼,實(shí)現(xiàn)12306登錄

1.目標(biāo)

實(shí)現(xiàn)12306登錄,獲取登錄cookies

2.技術(shù)點(diǎn)

  • 1.借用第三方打碼平臺,進(jìn)行圖片驗(yàn)證碼識別
  • 2.破解selenium webdriver反爬

3.思路

  • 1.輸入賬號密碼
  • 2.獲取驗(yàn)證圖片
  • 3.識別圖片,獲取坐標(biāo)
  • 4.圖片驗(yàn)證
  • 5.登錄
  • 6.滑動滑塊

4.環(huán)境

python + selenium + 超級鷹

5.代碼

  • 1.12306登錄.py
# @author: zly # @function: Touch verification code # @time: 2020-09-15 # @copyright: All Rights Reversedimport time import randomfrom selenium import webdriver from selenium.webdriver.common.action_chains import ActionChainsfrom chaojiying import Chaojiying_Client from constants import *class MakeTrack:"""Track generator, need to pass a distance parameter"""def __init__(self, distance=DISTANCE):self.distance = distancedef segmentate(self, s):"""Track splitter, the size of each piece of track is not dividedReturns a list object of a track block:params --> Tracks to be segmented, int"""if SEGMENTNUM1 <= abs(s) < SEGMENTNUM2:s = [round(s / 3) - 3, round(s / 3) + 3]elif abs(s) >= SEGMENTNUM2:s = [round(s / 5) - 5, round(s / 5) - 3,round(s / 5),round(s / 5) + 3, round(s / 5) + 5]else:s = [round(s)]return sdef make_track(self):"""Make sliding track to simulate human normal movementReturn a list object of sliding track"""track = []current = v0 = 0while self.distance > current:# 隨機(jī)事件,隨機(jī)加速度,生成隨機(jī)位移t = random.randint(1, 4) / 2a = random.randint(1, 3)# 速度、位移v0 += a * ts = v0 * t + 0.5 * a * t ** 2# 將不和規(guī)則的較大的位移進(jìn)行分割seg = self.segmentate(round(s))track.extend(seg)current += s# 對不超過目標(biāo)位移或者不足位移做補(bǔ)償while True:if sum(track) == self.distance:breakelif sum(track) > self.distance:track.pop()else:track.append(self.distance - sum(track))if len(track) > TRACKMAXLENGTH:self.make_track()return trackclass Login12306(Chaojiying_Client):""":paramusername 12306賬號 --> strpassword 12306密碼 --> strcusername 超級鷹賬號 --> strcpassword 超級鷹密碼 --> strsoft_id 軟件ID --> strcodetype 驗(yàn)證類型 --> intpath 驗(yàn)證碼圖片路徑 --> strThere are three to config your init configration1. by set constant2. by set config dict3. Direct set init configration"""def __init__(self, username=None, password=None,cusername=None, cpassword=None, soft_id=None,codetype=None, path=None,*args, **kwargs):# 配置優(yōu)化,可以字典的形式傳遞參數(shù)if kwargs.get('configs', 'None'):# 連接超級鷹,初始化super().__init__(username=kwargs['configs'].get('cusername', ''),password=kwargs['configs'].get('cpassword', ''),soft_id=kwargs['configs'].get('soft_id', ''))self.username = kwargs['configs'].get('username', '')self.password = kwargs['configs'].get('password', '')self.cusername = kwargs['configs'].get('cusername', '')self.cpassword = kwargs['configs'].get('cpassword', '')self.soft_id = kwargs['configs'].get('soft_id', '')self.codetype = kwargs['configs'].get('codetype', '')self.path = kwargs['configs'].get('path', '')elif USERNAME:self.username = USERNAMEself.password = PASSWORDself.cusername = CUSERNAMEself.cpassword = CPASSWORDself.soft_id = SOFTIDself.codetype = CODETIPEself.path = PATHelse:# 連接超級鷹,初始化super().__init__(username=cusername,password=cpassword,soft_id=soft_id)self.username = usernameself.password = passwordself.cusername = cusernameself.cpassword = cpasswordself.soft_id = soft_idself.codetype = codetypeself.path = pathself.run@propertydef run(self):"""You can call the run method directly for login verification,or you can also call other methods to achieve this function:returnReturn false means login verification failedReturn true means login verification success"""self.driver = self.prepares()self.driver.get('https://kyfw.12306.cn/otn/resources/login.html')self.driver.implicitly_wait(IMPLICITLYWAIT)self.driver.maximize_window()time.sleep(1)# 1.輸入賬號密碼self.input_user_pwd(username=self.username, password=self.password)# 2.獲取驗(yàn)證圖片self.get_pic()while True:# 3.識別圖片,獲取坐標(biāo)position, pic_id = self.get_position(codetype=self.codetype)if not position:position, pic_id = self.get_position(codetype=self.codetype)# 4.圖片驗(yàn)證self.img_click(position)# 5.登錄login = self.login(pic_id)if not login:self.driver.refresh()self.input_user_pwd(username=self.username, password=self.password)self.get_pic()continue# 6.滑動滑塊return True if self.slide() else Falsedef prepares(self):"""Break through 12306 webriverReturns a webdrive after anti pickling"""# 12306通過圖片驗(yàn)證之后依然登陸不上,其中的原因是有webdriver反扒# 要想突破反扒,就必須修改帶有webdrive的標(biāo)志,我們用selenium打開的瀏覽器# 上面往往都會顯示 Chrome正受到自動測試軟件的控制# 因此我們需要修改Options和selenium瀏覽器的js標(biāo)志navigator# selenium控制的瀏覽器默認(rèn)是true/false,正常的是undefinedoptions = webdriver.ChromeOptions()options.add_experimental_option("excludeSwitches", ["enable-automation"])options.add_experimental_option('useAutomationExtension', False)driver = webdriver.Chrome(options=options)driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument",{"source": "Object.defineProperty(""navigator, 'webdriver', ""{get: () => undefined})"})return driverdef input_user_pwd(self, username=None, password=None):"""Enter 12306 account and password@username: 12306賬號 --> str, defalut is None@password: 12306密碼 --> str, defalut is NoneThe return 0 here has no effect, it just means the end of the function"""# 切換至賬號密碼登錄self.driver.find_element_by_xpath('//li[@class="login-hd-account"]/a').click()# 這里需要睡1-2秒,否則會報錯,加載js,瀏覽器js沒有代碼快time.sleep(2)# 輸入賬號密碼self.driver.find_element_by_id('J-userName').send_keys(username)self.driver.find_element_by_id('J-password').send_keys(password)return 0def get_pic(self):"""Get touch captcha imageThe return 0 here has no effect, it just means the end of the function"""# 截圖self.driver.find_element_by_id('J-loginImg').screenshot(self.path)return 0def get_position(self, codetype=None):"""Get the touch coordinates of super Eagle verification@soft_id: 軟件ID --> str, defalut is None@codetype: 驗(yàn)證類型 --> int, defalut is None:returna list object [position, pic_id]"""# 發(fā)送圖片,獲取坐標(biāo)是verify_data = self.PostPic(self.path, codetype)print(verify_data)# 如果成功獲取坐標(biāo)則格式化,否則return Noneif verify_data['err_no'] == 0:temp = verify_data['pic_str'].split('|')position = [i.split(',') for i in temp]return [position, verify_data['pic_id']]else:self.ReportError(verify_data['pic_id'])return [None, verify_data['pic_id']]def img_click(self, position):"""Get the touch coordinates of super Eagle verification@position: 點(diǎn)觸坐標(biāo) --> Nested list, [['55', '55'], ['88', '88']...]The return 0 here has no effect, it just means the end of the function"""# 要點(diǎn)觸的圖片element = self.driver.find_element_by_id('J-loginImg')# 按照坐標(biāo)值點(diǎn)擊for k in position:# x、y需要int的原因:move_to_element_with_offset中x、y只能是int型x = int(k[0])y = int(k[1])ActionChains(self.driver).move_to_element_with_offset(element, x, y).click().perform()return 0def login(self, pic_id=None):"""Its role is to log in and get cookiesReturn true means the verification is successful, otherwise it fails"""# 登錄,獲取cookiesself.driver.find_element_by_id('J-login').click()# 判斷圖片驗(yàn)證是否驗(yàn)證成功verify_tag = self.driver.find_element_by_xpath('//*[@class="lgcode-error"]')# 看verify_tag的display屬性是否可見,可見則表示驗(yàn)證失敗if verify_tag.is_displayed():# 別浪費(fèi)錢,向超級鷹報個錯self.ReportError(pic_id)print("圖片驗(yàn)證失敗,報錯成功")return Falseprint("圖片驗(yàn)證成功")time.sleep(3)return Truedef slide(self):"""Sliding verification,if it's successful return cookies, or return False"""try:# 定位滑塊element = self.driver.find_element_by_id('nc_1_n1z')# 生成軌跡track = MakeTrack().make_track()# 滑動ActionChains(self.driver).click_and_hold(element).perform()[ActionChains(self.driver).move_by_offset(i, 0).perform() for i in track]ActionChains(self.driver).release(element).perform()# 時間取決于網(wǎng)速time.sleep(5)except Exception as e:# stale element reference: element is not attached to the page document# 頁面刷新導(dǎo)致獲取不到元素,若能夠滑動通過此錯誤無需再管,不是每次都會發(fā)生print(str(e))time.sleep(10)self.driver.quit()return False# 判斷是否登陸成功try:self.driver.find_element_by_xpath('//*[@class="btn btn-primary ok"]').click()cookies = self.driver.get_cookies()print("恭喜您登陸成功")print(cookies)time.sleep(10)self.driver.quit()return Trueexcept Exception as e:print(str(e))print("恭喜您登陸失敗,再來一次吧")time.sleep(10)self.driver.quit()return Falseconfigs = {'username': '', # 12306賬號'password': '', # 12306密碼'cusername': '', # 超級鷹賬號'cpassword': '', # 超級鷹密碼'soft_id': '', # 軟件ID'codetype': 9004, # 驗(yàn)證類型'path': '' # 驗(yàn)證碼圖片路徑 }Login12306(configs=configs)
  • 2、chaojiying.py
import requests from hashlib import md5class Chaojiying_Client(object):def __init__(self, username, password, soft_id):self.username = usernamepassword = password.encode('utf8')self.password = md5(password).hexdigest()self.soft_id = soft_idself.base_params = {'user': self.username,'pass2': self.password,'softid': self.soft_id,}self.headers = {'Connection': 'Keep-Alive','User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',}def PostPic(self, path, codetype):"""path: 圖片路徑codetype: 題目類型 參考 http://www.chaojiying.com/price.html"""with open(path, 'rb') as f:imagecontent = f.read()params = {'codetype': codetype,}params.update(self.base_params)files = {'userfile': ('ccc.jpg', imagecontent)}r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)return r.json()def ReportError(self, im_id):"""im_id:報錯題目的圖片ID"""params = {'id': im_id,}params.update(self.base_params)r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)return r.json()
  • 3、contants.py
# 12306賬號 USERNAME = ''# 12306密碼 PASSWORD = ''# 超級鷹賬號 CUSERNAME = ''# 超級鷹密碼 CPASSWORD = ''# 軟件ID SOFTID = ''# 驗(yàn)證類型 CODETIPE = ''# 驗(yàn)證碼圖片路徑 PATH = ''# 滑塊滑動的距離,單位:px DISTANCE = 425# 軌跡分割規(guī)定大小 SEGMENTNUM1 = 30 SEGMENTNUM2 = 50# 軌跡最大段數(shù) TRACKMAXLENGTH = 30# # 顯性等待時間,單位:s IMPLICITLYWAIT = 10

溫馨提示:千萬不要干壞事喲~~,否則抓進(jìn)局里后果自負(fù)…

總結(jié)

以上是生活随笔為你收集整理的爬虫实战:12306登录的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網(wǎng)站內(nèi)容還不錯,歡迎將生活随笔推薦給好友。