#coding:u8
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import requests
import base64
import re
import time
import os
import _threaddef getImage(name):login_url="https://kyfw.12306.cn/otn/resources/login.html"driver = webdriver.Chrome()driver.get(login_url)count=0while 1:count+=1if not os.path.exists(name):os.mkdir(name) driver.find_element_by_class_name("login-hd-account").click()try: time.sleep(2)img_element =WebDriverWait(driver, 100).until(EC.presence_of_element_located((By.ID, "J-loginImg")))base64_str=img_element.get_attribute("src").split(",")[-1]imgdata=base64.b64decode(base64_str)pname=os.path.join("verify",name+str(count)+".jpg")print(pname)with open(pname,'wb') as file:file.write(imgdata)except Exception as e:print(u"網(wǎng)絡(luò)開小差,請稍后嘗試") driver.refresh()
print("start")
try:_thread.start_new_thread(getImage,("THREAD-1",))_thread.start_new_thread(getImage,("THREAD-2",)) _thread.start_new_thread(getImage,("THREAD-3",))_thread.start_new_thread(getImage,("THREAD-4",))
except Exception as e:print("啟動失敗")
while 1:pass
爬蟲開了4個線程,將驗證圖片放入4個文件夾中,請自行歸并到一個文件夾下,供切圖使用 2、切圖
#coding:u8
import cv2
import os
if not os.path.exists("12306_dataset_obj"):os.mkdir("12306_dataset_obj")
if not os.path.exists("12306_dataset_word"):os.mkdir("12306_dataset_word")
for batch,file_name in enumerate(os.listdir("verify")):print(u"處理第{:>9}張圖".format(batch+1))im=cv2.imread("verify/{}".format(file_name),1)patch=[im[0:25,120:290],im[41:108,5:72],im[41:108,77:144],im[41:108,149:216],im[41:108,221:288],im[113:180,5:72],im[113:180,77:144],im[113:180,149:216],im[113:180,221:288]]for i,item in enumerate(patch):if i is not 0:cv2.imwrite("12306_dataset_obj/{}.jpg".format(batch*8+i),item)else:cv2.imwrite("12306_dataset_word/{}.jpg".format(batch+1),item)