以極速漫畫為例
http://www.1kkk.com/?
這類的網(wǎng)站驗(yàn)證碼使用點(diǎn)擊旋轉(zhuǎn),來登錄。
筆者提出的思路簡單暴力,直接想辦法獲得所有正確圖片,然后在登錄時爬取這4張動漫圖片,做旋轉(zhuǎn)對比得出點(diǎn)擊次數(shù)
查看網(wǎng)頁數(shù)據(jù)加載可以看到在每次點(diǎn)擊“換一組”就會加載新的圖片 查看圖片加載鏈接?http://www.1kkk.com/image3.ashx?t=1537269577000?消除多余的參數(shù)(時間戳)
- 訪問?http://www.1kkk.com/image3.ashx?
- 每次訪問都將獲得不同的圖片,這些就是網(wǎng)站的驗(yàn)證碼圖片
- 筆者爬取http://www.1kkk.com/image3.ashx網(wǎng)站循環(huán)500次拿到500張大圖(16張小圖)
- 循環(huán)爬到500張圖片
import requests
headers = {"User-Agent": "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)"}def pic_data():url = 'http://www.1kkk.com/image3.ashx'for i in range(500):response = requests.get(url, headers=headers)filename = './image/%s' % str(i) + '.png'with open(filename, 'wb') as f:f.write(response.content)print(i)if __name__ == '__main__':pic_data()
?
- 使用Image模塊中的crop方法來切割圖片,得到每一種頭像都有一張圖片
- 循環(huán)500張大圖得到2000張小圖(每張圖切割出第一排)
from PIL import Imagey = 0
for i in range(1,500):im = Image.open('./images/' + str(i) + '.png')region = im.crop(( 0, 0, 76, 76))region.save("./images/" + str(y) + '.png')y += 1region = im.crop((76, 0, 152, 76))region.save("./images/" + str(y) + '.png')y += 1region = im.crop((152, 0, 228, 76))region.save("./images/" + str(y) + '.png')y += 1region = im.crop((228, 0, 304, 76))region.save("./images/" + str(y) + '.png')y += 1print(y)
- 坐標(biāo)(左,上,右,下)大小是因?yàn)榇髨D304* 304像素,切割成小圖時就是76*76
- 得到2000張小圖后,其中必定有相同的圖片
- 去重使用VisiPics軟件去除相同的圖片(筆者2000圖去重剩下518)
- https://jingyan.baidu.com/article/215817f7e30cbd1eda14238e.html
- 剩下的圖片還是有相同的,這些就是有可能是 方向不同
- 這樣的圖片還是一樣的,還需要要進(jìn)一步去重
- 使用直方圖去重
- 參考了http://blog.sina.com.cn/s/blog_70cff2d70102wfgz.html?https://blog.csdn.net/sinat_35059932/article/details/60580536
- 重命名圖片名方便去重(強(qiáng)迫癥)
import osy = 0for i in range(1,2000):a = './images/' + str(i) + '.png'if os.path.isfile(a):y += 1b = './images/'+ str(y) + '.png'if not os.path.isfile(b):os.rename(a, b)
-去重
import math
import operator
import os
from functools import reducefrom PIL import Imagefor i in range(1,519):try:h1 = Image.open('./images/'+ str(i) +'.png').histogram()except:continuefor j in range(i+1,519):try:h2 = Image.open('./images/' + str(j) + '.png').histogram()except:continueresult = math.sqrt(reduce(operator.add, list(map(lambda a,b: (a-b)**2, h1, h2)))/len(h1) )print(i,j)if result < 7 :print('-----------------------------')print(i,j)print(result)os.remove('./images/'+ str(j) + '.png')
- 使用直方圖去重,還是有相同的圖片筆者還剩221,就需要我們手動將圖片方向矯正,以便之后的對比及再次去重
- 將圖片矯正再次使用VisiPics軟件去重,筆者還剩151張圖,這些圖應(yīng)該就是網(wǎng)站數(shù)據(jù)庫的圖了。
- 再次重新按順序命名這些圖片,方便對比時的循環(huán)(也可以使用os模塊獲取名字)
- 重命名
import osy = 0for i in range(1,2000):a = './images/' + str(i) + '.png'if os.path.isfile(a):y += 1b = './images/'+ str(y) + '.png'if not os.path.isfile(b):os.rename(a, b)
import math
import operator
import os
import time
from functools import reduce
from io import BytesIOfrom PIL import Image
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWaitclass CrackGeetest():def __init__(self):self.url = 'http://www.1kkk.com/'self.browser = webdriver.Chrome()self.wait = WebDriverWait(self.browser, 5)def __del__(self):self.browser.close()def get_geetest_button(self):"""獲取登陸頭像按鈕:return:"""button = self.wait.until(EC.element_to_be_clickable((By.XPATH, '/html/body/header/div/div[2]/a/img')))button.click()def get_position(self,a):"""獲取驗(yàn)證碼位置:return: 驗(yàn)證碼位置元組"""time.sleep(1)if a == 1:self.img1 = self.wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/section[3]/div/div/div/div/div/div[2]')))location = self.img1.locationif a == 2:self.img2 = self.wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/section[3]/div/div/div/div/div/div[3]')))location = self.img2.locationif a == 3:self.img3 = self.wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/section[3]/div/div/div/div/div/div[4]')))location = self.img3.locationif a == 4:self.img4 = self.wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/section[3]/div/div/div/div/div/div[5]')))location = self.img4.locationsize = {'height': 76, 'width': 76}top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size['width']return (top, bottom, left, right)def get_screenshot(self):"""獲取網(wǎng)頁截圖:return: 截圖對象"""screenshot = self.browser.get_screenshot_as_png()screenshot = Image.open(BytesIO(screenshot))return screenshotdef get_geetest_image(self, name='captcha.png', sum='0',screensho=''):"""獲取驗(yàn)證碼圖片:return: 圖片對象"""top, bottom, left, right = self.get_position(sum)print('驗(yàn)證碼位置', top, bottom, left, right)captcha = screensho.crop((left, top, right, bottom))print(left, top, right, bottom)captcha.save(name)return captchadef open(self):"""打開網(wǎng)頁輸入用戶名密碼:return: None"""self.browser.get(self.url)def is_pixel(self, image1, image2):"""像素匹配:param image1:圖片1:param image2: 圖片2:return: True或False"""for x in range(10, 60):for y in range(10, 60):pixel1 = image1.load()[x, y]pixel2 = image2.load()[x, y]threshold = 100if not abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1]) < threshold and abs(pixel1[2] - pixel2[2]) < threshold:# print(x,y)return Falsereturn Truedef pic_contrast(self, auth_code1, auth_code2, im):"""圖片詳細(xì)對比:param auth_code1: 網(wǎng)站的小圖:param auth_code2: 數(shù)據(jù)庫圖片:param im: 存儲點(diǎn)擊次數(shù):return: im"""for angle in range(4):# 像素對比if self.is_pixel(auth_code1.rotate(90 * angle), auth_code2):print('現(xiàn)在旋轉(zhuǎn)角度:%d' %(90 * angle))im.append(0 if (4 - angle) == 4 else (4 - angle))print('點(diǎn)擊次數(shù):%d' % (0 if (4 - angle) == 4 else (4 - angle)))print('------------------------')breakdef click_code(self,im):"""循環(huán)對比圖片:param im:存儲點(diǎn)擊次數(shù):return:"""for i in range(1, 5):try:h1 = Image.open(str(i) + '.png').histogram()auth_code1 = Image.open(str(i) + '.png')except:continuefor j in range(1, len(os.listdir('./images'))):try:h2 = Image.open('./images/' + str(j) + '.png').histogram()auth_code2 = Image.open('./images/' + str(j) + '.png')except:continueresult = math.sqrt(reduce(operator.add, list(map(lambda a, b: (a - b) ** 2, h1, h2))) / len(h1))# 圖片相似if result < 20:print('--------------------------')print(i,j)# 詳細(xì)對比self.pic_contrast(auth_code1, auth_code2, im)def set_name_passwd(self, name, passwd):"""填寫賬戶、密碼、點(diǎn)擊登陸:param name: 賬戶:param passwd: 密碼:return: """# 用戶名輸入框username = self.wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/section[3]/div/div/div/div/p[2]/input')))# 密碼輸入框password = self.wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/section[3]/div/div/div/div/p[3]/input')))# 清除用戶名輸入框username.clear()# 填寫用戶名數(shù)據(jù)username.send_keys(name)# 清除密碼輸入框password.clear()# 填寫密碼password.send_keys(passwd)time.sleep(2)# 獲取登陸按鈕login = self.wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="btnLogin"]')))# 點(diǎn)擊登陸login.click()time.sleep(10)def crack(self, name, passwd):# # 輸入用戶名密碼self.open()self.get_geetest_button()# # 獲取驗(yàn)證碼圖片time.sleep(1)# 獲取網(wǎng)頁截圖screenshot = self.get_screenshot()# 將待旋轉(zhuǎn)圖片截取下來self.get_geetest_image('1.png',1,screenshot)self.get_geetest_image('2.png',2,screenshot)self.get_geetest_image('3.png',3,screenshot)self.get_geetest_image('4.png',4,screenshot)# 存儲點(diǎn)擊次數(shù)im = []# 待旋轉(zhuǎn)圖片img = [self.img1, self.img2, self.img3, self.img4]# 判斷次數(shù)self.click_code(im)print(im)# 點(diǎn)擊旋轉(zhuǎn)for index in range(4):for _ in range(im[index]):img[index].click()time.sleep(2)# 填寫用戶名/密碼self.set_name_passwd(name=name, passwd=passwd)if __name__ == '__main__':crack = CrackGeetest()crack.crack(name='yang', passwd='123456')
?
- 使用筆者代碼請注意代碼層級
?
?
總結(jié)
以上是生活随笔為你收集整理的点击旋转验证码破解的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔網(wǎng)站內(nèi)容還不錯,歡迎將生活随笔推薦給好友。