當(dāng)前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

机器学习实战案例—验证码（CAPTCHA）识别基于Logistic

發(fā)布時間：2024/3/13 编程问答 24 豆豆

生活随笔收集整理的這篇文章主要介紹了机器学习实战案例—验证码（CAPTCHA）识别基于Logistic 小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.

基于Logistic驗證碼識別

基于邏輯回歸（Logistic）圖像處理實現(xiàn)數(shù)字驗證碼的識別
- 一、準(zhǔn)備數(shù)據(jù)（制作驗證碼）
- 二、對圖形進行處理
- 三、將數(shù)據(jù)帶入邏輯回歸進行建模
- 四、得到模形，進行圖像驗證
相關(guān)技術(shù)

基于邏輯回歸（Logistic）圖像處理實現(xiàn)數(shù)字驗證碼的識別

一、準(zhǔn)備數(shù)據(jù)（制作驗證碼）

通過在畫布獲取的隨機數(shù)字（隨機顏色），在添加干擾元素（噪點，噪線）

from PIL import Image from PIL import ImageDraw,ImageFont import randomdef getRandomColor():"""獲取一個隨機顏色RGB格式的（r,g,b）如果獲取到白色將其傳為黑色"""r = random.randint(0, 255)g = random.randint(0, 255)b = random.randint(0, 255)if (r, g, b) == (0, 0, 0):(r, g, b) = (255, 255, 255)return (r, g, b)def getRandmStr():"""獲取一個隨機數(shù)字且每個數(shù)字顏色也是隨機的"""random_num =str(random.randint(0,9))return random_numdef generate_captcha():#獲取一個Image對象，參數(shù)分別是RGB模式，寬150，高50，背景為白色image =Image.new('RGB',(150,50),(255,255,255))#獲取一個畫筆對象，將圖片對象傳過去draw = ImageDraw.Draw(image)#獲取一個font字體對象參數(shù)是ttf的字體文件的目錄，以及字體的大小font = ImageFont.truetype("LiberationSans-Bold.ttf",size=32)label =""for i in range(5):random_char =getRandmStr()label += random_char#在圖片上寫東西，參數(shù)是：定位，字符串，顏色，字體draw.text((10+i*30,0),random_char,getRandomColor(),font=font)##噪點噪線width =150height =30#畫線for i in range(4):x1= random.randint(0,width)x2= random.randint(0,width)y1 =random.randint(0,height)y2 =random.randint(0,height)draw.line((x1,y1,x2,y2),fill=(0,0,0))# 畫點for i in range(5):draw.point([random.randint(0, width), random.randint(0, height)], fill=getRandomColor())x = random.randint(0, width)y = random.randint(0, height)#arc 弧線draw.arc((x, y, x + 4, y + 4), 0, 90, fill=(0, 0, 0))#保存到硬盤，名為test.png格式的圖片image.save(open(''.join(['captcha_images/',label,'.png']),'wb'),'png')if __name__ == '__main__':for i in range(150):generate_captcha()

二、對圖形進行處理

【1】二值化：將圖像從RGB3通道轉(zhuǎn)化成Gray1通道，然后將Gray灰度值（0～255）轉(zhuǎn)化為二值圖（0，1）
0：黑色 1：白色
【2】降噪：通過孤立點，對二值化的圖片進行降噪
【3】圖片切割：根據(jù)像素格進行切割

from PIL import Image import numpy as np import matplotlib.pyplot as plt import os ##binarization二值化 def binarization(path):img =Image.open(path)#img.convert('L')將img轉(zhuǎn)化為灰度圖img_gray =img.convert('L')img_gray =np.array(img_gray)w,h =img_gray.shapefor x in range(w):for y in range(h):gray = img_gray[x,y]if gray <= 254:img_gray[x,y] = 0else:img_gray[x,y] = 1plt.figure('')plt.imshow(img_gray,cmap='gray')plt.axis('off')plt.show()return img_graydef noiseReduction(img_gray,label):"""降噪"""height,width =img_gray.shapefor x in range(height):for y in range(width):cont =0#白色的點不用管if img_gray[x,y]==1:continueelse:#判斷該點周圍黑點的個數(shù)try:if img_gray[x-1,y-1]==0:cont +=1except:passtry:if img_gray[x-1,y]==0:cont +=1except:passtry:if img_gray[x-1,y+1]==0:cont +=1except:passtry:if img_gray[x,y-1]==0:cont +=1except:passtry:if img_gray[x,y+1]==0:cont +=1except:passtry:if img_gray[x+1,y-1]==0:cont +=1except:passtry:if img_gray[x+1,y]==0:cont +=1except:passtry:if img_gray[x+1,y+1]==0:cont +=1except:pass#如果該點周圍黑點個數(shù)小于4個，將其去掉## 周圍少于4點就算是噪點if cont < 4:img_gray[x,y] =1plt.figure('')plt.imshow(img_gray,cmap='gray')plt.axis('off')plt.savefig(''.join(['clean_captcha_img/',label,'.png']))def create_dir():"創(chuàng)建存放0-9數(shù)字的文件夾"for i in range(10):os.mkdir(''.join(['cut_number/',str(i)]))def img_2_clean():"""將原數(shù)據(jù)進行二值化和降噪處理"""captchas= os.listdir(''.join(['captcha_images/']))for captcha in captchas:label =captcha.split('.')[0]img_path =''.join(['captcha_images/',captcha])im = binarization(img_path)noiseReduction(im,label) def clean_to_cut():"將二值化，降噪后的文件名分開，將前部分傳給cutImg"captchas = os.listdir(''.join(['clean_captcha_img/']))for captcha in captchas:label =captcha.split('.')[0]cutImg(label) def cutImg(label):"""將處理過的文件名進行切割"""labels =list(label)img =Image.open(''.join(['clean_captcha_img/',label,'.png']))for i in range(5):pic =img.crop((100*(1+i),170,100*(1+i)+100,280))plt.imshow(pic)seq =get_save_seq(label[i])pic.save(''.join(['cut_number/',str(label[i]),'/',str(seq),'.png']))def get_save_seq(num):"""將切割的數(shù)字進行分類存入文件中"""numlist = os.listdir(''.join(['cut_number/', num, '/']))if len(numlist) == 0 or numlist is None:return 0else:max_file = 0for file in numlist:if int(file.split('.')[0]) > max_file:max_file = int(file.split('.')[0])return int(max_file) + 1 if __name__ == '__main__':create_dir()img_2_clean()clean_to_cut()

三、將數(shù)據(jù)帶入邏輯回歸進行建模

【1】把切割好的數(shù)據(jù)，按照X(二維數(shù)組)，Y(一維數(shù)組)的方式傳入LogisticRegression.fit()函數(shù)進行擬合；（可以通過網(wǎng)格搜索GridSearch來進行調(diào)參）
【2】通過jonlib包將創(chuàng)建好的模形保存到本地，之后驗證數(shù)據(jù)速度很快

四、得到模形，進行圖像驗證

【0】根據(jù)步驟一，生成測試圖片

【1】根據(jù)步驟二，重復(fù)操作新的圖像
【2】對切割好的每一個圖像都進行獨立的預(yù)測（有點像手寫識別系統(tǒng)識別數(shù)字）
【3】把最后預(yù)測的結(jié)果進行拼接

import os from PIL import Image import numpy as npfrom sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.externals import joblibfrom captcha_logistic import *def load_data():X,Y =[],[]cut_list = os.listdir('cut_number')for numC in cut_list:num_list_dir = ''.join(['cut_number/',str(numC),'/'])nums_dir =os.listdir(num_list_dir)#制作訓(xùn)練集for num_file in nums_dir:img =Image.open(''.join(['cut_number/',str(numC),'/',num_file]))img_gray = img.convert('L')img_array =np.array(img_gray)w,h =img_array.shapefor x in range(w):for y in range(h):gray =img_array[x,y]if gray <=254:img_array[x,y] = 0else:img_array[x,y] =1img_re =img_array.reshape(1,-1)X.append(img_re[0])Y.append(int(numC))return np.array(X),np.array(Y)def generate_model(X,Y):X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.3)log_clf =LogisticRegression(multi_class='ovr',solver='sag',max_iter=10000)log_clf.fit(X_train,Y_train)joblib.dump(log_clf,'captcha_model/captcha_model.model')def get_model():model =joblib.load('captcha_model/captcha_model.model')return modeldef capthca_predict():path ='captcha_images/unknown.png'pre_img_gray =binarization(path)noiseReduction(pre_img_gray,'unknown')#切割測試圖片labels =['0','1','2','3','4']img =Image.open(''.join(['clean_captcha_img/unknown.png']))for i in range(5):pic = img.crop((100 * (1 + i), 170, 100 * (1 + i) + 100, 280))plt.imshow(pic)pic.save(''.join(['captcha_predict', labels[i], '.png']))result = ''model = get_model()for i in range(5):path = ''.join(['captcha_predict', labels[i], '.png'])img = Image.open(path)img_gray = img.convert('L')img_array = np.array(img_gray)w, h = img_array.shapefor x in range(w):for y in range(h):gray = img_array[x, y]if gray <= 240:img_array[x, y] = 0else:img_array[x, y] = 1img_re = img_array.reshape(1, -1)# -1 指n列X = img_re[0]y_pre = model.predict([X])result = ''.join([result, str(y_pre[0])])return result if __name__ == '__main__':X,Y =load_data()#generate_model(X,Y)model = get_model()result = capthca_predict()print(result)