深度学习实战之车牌识别项目
車牌識別項目
不經常上線,既然有人想要代碼,現在我將它發出來,不過需要你們下載,具體下載方法,我想你們都懂得。
https://download.csdn.net/download/weixin_43648821/11423543
本博客關于深度學習完成后,做的有一個小項目,基本已完成。環境配置:Win7、Python3.7、Opencv4.10。作本項目,一是為總結,二是方便以后的回顧。
車牌圖像處理原理
一、是讀取圖像,對圖像進行預處理,包括(具有先后順序):壓縮圖像、轉換為灰度圖像、灰度拉伸、開運算(去噪聲)、將灰度圖像和開運算后圖像取差分圖、整張圖像二值化、canny邊緣檢測、閉運算、開運算、再次開運算(這三步是為了保留車牌區域,并消除其他區域)、定位車牌位置(找輪廓、畫輪廓、取前三個輪廓進行排序、找出最大的區域);
二、是框處車牌號;
三、分割車牌號和背景,分割包括:創建掩膜、創建背景和前景、分割;
四、將分割出來的車牌進行二值化,生成黑白圖像;
五、分割出車牌號碼中的文字、數字和字母,放入特定的文件夾;
六、對分割出來的文字、數字和字母圖像尺寸進行處理,以方便后面測試。
測試所用圖像
下面是相關代碼
主函數
def carimg_make(img):# 預處理圖像rect, afterimg = preprocessing(img) # 其實包括了對車牌定位print("rect:", rect)# 框出車牌cv2.rectangle(afterimg, (rect[0], rect[1]), (rect[2], rect[3]), (0, 255, 0), 2)cv2.imshow('afterimg1', afterimg)# 分割車牌與背景cutimg = cut_license(afterimg, rect)cv2.imshow('cutimg', cutimg)# 二值化生成黑白圖thresh = lice_binarization(cutimg)cv2.imshow('thresh', thresh)cv2.waitKey(0)# 分割字符'''判斷底色和字色'''# 記錄黑白像素總和white = [] # 記錄每一列的白色像素總和black = [] # 記錄每一列的黑色像素總和height = thresh.shape[0] # 263width = thresh.shape[1] # 400white_max = 0 # 僅保存每列,取列中白色最多的像素總數black_max = 0 # 僅保存每列,取列中黑色最多的像素總數# 計算每一列的黑白像素總和for i in range(width):line_white = 0 # 這一列白色總數line_black = 0 # 這一列黑色總數for j in range(height):if thresh[j][i] == 255:line_white += 1if thresh[j][i] == 0:line_black += 1white_max = max(white_max, line_white)black_max = max(black_max, line_black)white.append(line_white)black.append(line_black)print('white_max', white_max)print('black_max', black_max)# arg為true表示黑底白字,False為白底黑字arg = Trueif black_max < white_max:arg = False# 分割車牌的數字n = 1start = 1end = 2s_width = 28s_height = 28temp = 1while n < width - 2:n += 1# 判斷是白底黑字還是黑底白字 0.05參數對應上面的0.95 可作調整if (white[n] if arg else black[n]) > (0.05 * white_max if arg else 0.05 * black_max):#這點沒有理解透徹start = nend = find_end(start, arg, black, white, width, black_max, white_max)n = endprint("start" + str(start))print("end" + str(end))# 思路就是從左開始檢測匹配字符,若寬度(end - start)小與20則認為是左側白條 pass掉 繼續向右識別,否則說明是# 省份簡稱,剪切,壓縮 保存,還有一個當后五位有數字 1 時,他的寬度也是很窄的,所以就直接認為是數字 1 不需要再# 做預測了(不然很窄的 1 截切 壓縮后寬度是被拉伸的),shutil.copy()函數是當檢測# 到這個所謂的 1 時,從樣本庫中拷貝一張 1 的圖片給當前temp下標下的字符# if end - start > 5:# print("end - start" + str(end - start))if end - start > 5:cj = thresh[1:height, start:end]print("result/%s.jpg" % (n))cv2.imwrite('img/{0}.bmp'.format(n), cj)#對分割出的數字、字母進行裁剪b_img = cv2.resize(cj, None, fx=5, fy=3)contours, hierarchy = cv2.findContours(b_img.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)block = []for c in contours:# 找出輪廓的左上點和右下點,由此計算它的面積和長度比r = find_rectangle(c) # 里面是輪廓的左上點和右下點a = (r[2] - r[0]) * (r[3] - r[1]) # 面積s = (r[2] - r[0]) / (r[3] - r[1]) # 長度比block.append([c, r, a, s])block1 = sorted(block, key=lambda block: block[2])[-1:]# rect = cv2.minAreaRect(block2)# box1 = np.int0(cv2.boxPoints(rect))box = block1[0][1]y_mia = box[0] # y_miax_min = box[1] # x_miny_max = box[2] # y_maxx_max = box[3] # x_maxcropImg = b_img[x_min:x_max, y_mia:y_max] # crop the imagecv2.imwrite('img_test/{0}.bmp'.format(n), cropImg)cv2.imshow('cutlicense', cj)cv2.imshow("charecter",cropImg)cv2.waitKey(0)cv2.waitKey(0)cv2.destroyAllWindows()預處理函數
def preprocessing(img):'''預處理函數'''m=400 * img.shape[0] / img.shape[1]#壓縮圖像img=cv2.resize(img,(400,int(m)),interpolation=cv2.INTER_CUBIC)#BGR轉換為灰度圖像gray_img=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)print('gray_img.shape',gray_img.shape)#灰度拉伸#如果一幅圖像的灰度集中在較暗的區域而導致圖像偏暗,可以用灰度拉伸功能來拉伸(斜率>1)物體灰度區間以改善圖像;# 同樣如果圖像灰度集中在較亮的區域而導致圖像偏亮,也可以用灰度拉伸功能來壓縮(斜率<1)物體灰度區間以改善圖像質量stretchedimg=stretching(gray_img)#進行灰度拉伸,是因為可以改善圖像的質量print('stretchedimg.shape',stretchedimg.shape)'''進行開運算,用來去除噪聲'''r=15h=w=r*2+1kernel=np.zeros((h,w),np.uint8)cv2.circle(kernel,(r,r),r,1,-1)#開運算openingimg=cv2.morphologyEx(stretchedimg,cv2.MORPH_OPEN,kernel)#獲取差分圖,兩幅圖像做差 cv2.absdiff('圖像1','圖像2')strtimg=cv2.absdiff(stretchedimg,openingimg)cv2.imshow("stretchedimg",stretchedimg)cv2.imshow("openingimg1",openingimg)cv2.imshow("strtimg",strtimg)cv2.waitKey(0)#圖像二值化binaryimg=allbinaryzation(strtimg)cv2.imshow("binaryimg",binaryimg)cv2.waitKey(0)#canny邊緣檢測canny=cv2.Canny(binaryimg,binaryimg.shape[0],binaryimg.shape[1])cv2.imshow("canny",canny)cv2.waitKey(0)'''保留車牌區域,消除其他區域,從而定位車牌'''#進行閉運算kernel=np.ones((5,23),np.uint8)closingimg=cv2.morphologyEx(canny,cv2.MORPH_CLOSE,kernel)cv2.imshow("closingimg",closingimg)#進行開運算openingimg=cv2.morphologyEx(closingimg,cv2.MORPH_OPEN,kernel)cv2.imshow("openingimg2",openingimg)#再次進行開運算kernel=np.ones((11,6),np.uint8)openingimg=cv2.morphologyEx(openingimg,cv2.MORPH_OPEN,kernel)cv2.imshow("openingimg3",openingimg)cv2.waitKey(0)#消除小區域,定位車牌位置rect=locate_license(openingimg,img)#rect包括輪廓的左上點和右下點,長寬比以及面積return rect,img車牌定位函數
def locate_license(img,afterimg):'''定位車牌號'''contours,hierarchy=cv2.findContours(img,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)img_copy = afterimg.copy()img_cont = cv2.drawContours(img_copy,contours,-1,(255,0,0),6)cv2.imshow("img_cont",img_cont)cv2.waitKey(0)#找出最大的三個區域block=[]for c in contours:#找出輪廓的左上點和右下點,由此計算它的面積和長度比r=find_rectangle(c)#里面是輪廓的左上點和右下點a=(r[2]-r[0])*(r[3]-r[1]) #面積s=(r[2]-r[0])/(r[3]-r[1]) #長度比block.append([r,a,s])#選出面積最大的3個區域block=sorted(block,key=lambda b: b[1])[-3:]#使用顏色識別判斷找出最像車牌的區域maxweight,maxindex=0,-1for i in range(len(block)):#len(block)=3b=afterimg[block[i][0][1]:block[i][0][3],block[i][0][0]:block[i][0][2]]#BGR轉HSVhsv=cv2.cvtColor(b,cv2.COLOR_BGR2HSV)lower=np.array([100,50,50])upper=np.array([140,255,255])#根據閾值構建掩膜mask=cv2.inRange(hsv,lower,upper)#統計權值w1=0for m in mask:w1+=m/255w2=0for n in w1:w2+=n#選出最大權值的區域if w2>maxweight:maxindex=imaxweight=w2return block[maxindex][0]灰度拉伸函數
def stretching(img):'''圖像拉伸函數'''maxi=float(img.max())mini=float(img.min())for i in range(img.shape[0]):for j in range(img.shape[1]):img[i,j]=(255/(maxi-mini)*img[i,j]-(255*mini)/(maxi-mini))return img整個圖像的二值化函數
def allbinaryzation(img):'''二值化處理函數'''maxi=float(img.max())mini=float(img.min())x=maxi-((maxi-mini)/2)#二值化,返回閾值ret 和 二值化操作后的圖像threshret,thresh=cv2.threshold(img,x,255,cv2.THRESH_BINARY)# thresh = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,5,2)#返回二值化后的黑白圖像return thresh圖像分割函數
def cut_license(afterimg,rect):'''圖像分割函數'''#轉換為寬度和高度rect[2]=rect[2]-rect[0]rect[3]=rect[3]-rect[1]rect_copy=tuple(rect.copy())#tuple是一個元組print("rect_copy",rect_copy)rect=[0,0,0,0]#創建掩膜mask=np.zeros(afterimg.shape[:2],np.uint8)#創建背景模型 大小只能為13*5,行數只能為1,單通道浮點型bgdModel=np.zeros((1,65),np.float64)#創建前景模型fgdModel=np.zeros((1,65),np.float64)#分割圖像cv2.grabCut(afterimg,mask,rect_copy,bgdModel,fgdModel,5,cv2.GC_INIT_WITH_RECT)mask2=np.where((mask==2)|(mask==0),0,1).astype('uint8')img_show=afterimg*mask2[:,:,np.newaxis]return img_show車牌區域圖像的二值化函數
def lice_binarization(licenseimg):'''車牌圖片二值化'''#車牌變為灰度圖像gray_img=cv2.cvtColor(licenseimg,cv2.COLOR_BGR2GRAY)#均值濾波 去除噪聲kernel=np.ones((3,3),np.float32)/9gray_img=cv2.filter2D(gray_img,-1,kernel)#二值化處理ret,thresh=cv2.threshold(gray_img,120,255,cv2.THRESH_BINARY)return thresh分割圖像
def find_end(start,arg,black,white,width,black_max,white_max):end=start+1for m in range(start+1,width-1):if (black[m] if arg else white[m])>(0.95*black_max if arg else 0.95*white_max):end=mbreakreturn end矩形輪廓
def find_rectangle(contour):'''尋找矩形輪廓'''y,x=[],[]for p in contour:y.append(p[0][0])x.append(p[0][1])return [min(y),min(x),max(y),max(x)]結果展示
1、下圖是車牌圖像處理各個過程的輸出:
2、車牌號分割圖像所示
3、對分割后的圖像進行裁剪,結果表明,對于漢字的裁剪效果不是理想,原因是分割后的漢字“魯”,上下出現分離,導致在裁剪時,誤判最上面的白色部分面積最大,從而出現失誤,有待改進。
卷積神經網絡進行訓練和測試
一、定義卷積層;
二、定義全連接層;
三、創建模型和權重參數的文件夾;
五、再次遍歷圖片文件夾,生成圖片輸入數據和標簽;
六、構建訓練模型,該模型包括兩個卷積層和一個全連接層,采用Adam梯度下降優化算法;
七、創建圖,進行迭代訓練;
八、通過tensorflow提供的API接口tf.train.Saver()來保存訓練好的模型以及權重參數等;
九、將分割出來的文字、數字和字母圖像,通過訓練模型和權重進行測試。
訓練函數
# !/usr/bin/python3.7 # -*- coding: utf-8 -*-import sys import os import time import random from PIL import Image import numpy as np import tensorflow as tfSIZE = 1280 WIDTH = 32 HEIGHT = 40 NUM_CLASSES = 34 iterations = 50#存放模型和權重參數的文件夾 SAVER_DIR = "train-saver_me/digits/" LETTERS_DIGITS = ( "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z") license_num = ""time_begin = time.time()# 定義輸入節點 x = tf.placeholder(tf.float32, shape=[None, SIZE]) y_ = tf.placeholder(tf.float32, shape=[None, NUM_CLASSES]) x_image = tf.reshape(x, [-1, WIDTH, HEIGHT, 1])# 定義卷積函數 def conv_layer(inputs, W, b, conv_strides, kernel_size, pool_strides, padding):L1_conv = tf.nn.conv2d(inputs, W, strides=conv_strides, padding=padding)L1_relu = tf.nn.relu(L1_conv + b)return tf.nn.max_pool(L1_relu, ksize=kernel_size, strides=pool_strides, padding='SAME')# 定義全連接層函數 def full_connect(inputs, W, b):return tf.nn.relu(tf.matmul(inputs, W) + b)# 定義訓練網絡 def carnum_train():iterations = 50time_begin = time.time()# 第一次遍歷圖片文件夾是為了獲取圖片總數input_count = 0for i in range(0, NUM_CLASSES):dir = './train_images/training-set/%s/' % i # 這里可以改成你自己的圖片文件夾for rt, dirs, files in os.walk(dir):for filename in files:input_count += 1# 定義對應維數和各維長度的數組input_images = np.array([[0] * SIZE for i in range(input_count)])input_labels = np.array([[0] * NUM_CLASSES for i in range(input_count)])# 第二次遍歷圖片文件夾是為了生成圖片數據和標簽,里面都是一堆0和1,因為都是二值化圖像index = 0for i in range(0, NUM_CLASSES):dir = './train_images/training-set/%s/' % i # 這里可以改成你自己的圖片目錄,i為分類標簽for rt, dirs, files in os.walk(dir):for filename in files:filename = dir + filenameimg = Image.open(filename)width = img.size[0]height = img.size[1]for h in range(0, height):for w in range(0, width):# 通過這樣的處理,使數字的線條變細,有利于提高識別準確率if img.getpixel((w, h)) > 230: # img.getpixel遍歷一張圖像的所有像素點input_images[index][w + h * width] = 0else:input_images[index][w + h * width] = 1input_labels[index][i] = 1index += 1# 第一次遍歷圖片目錄是為了獲取圖片總數val_count = 0for i in range(0, NUM_CLASSES):dir = './train_images/validation-set/%s/' % i # 這里可以改成你自己的圖片目錄,i為分類標簽for rt, dirs, files in os.walk(dir):for filename in files:val_count += 1# 定義對應維數和各維長度的數組val_images = np.array([[0] * SIZE for i in range(val_count)])val_labels = np.array([[0] * NUM_CLASSES for i in range(val_count)])# 第二次遍歷圖片目錄是為了生成圖片數據和標簽index = 0for i in range(0, NUM_CLASSES):dir = './train_images/validation-set/%s/' % i # 這里可以改成你自己的圖片目錄,i為分類標簽for rt, dirs, files in os.walk(dir):for filename in files:filename = dir + filenameimg = Image.open(filename)width = img.size[0]height = img.size[1]for h in range(0, height):for w in range(0, width):# 通過這樣的處理,使數字的線條變細,有利于提高識別準確率if img.getpixel((w, h)) > 230:val_images[index][w + h * width] = 0else:val_images[index][w + h * width] = 1val_labels[index][i] = 1index += 1with tf.Session() as sess:# 第一個卷積層W_conv1 = tf.Variable(tf.truncated_normal([8, 8, 1, 16], stddev=0.1), name="W_conv1")b_conv1 = tf.Variable(tf.constant(0.1, shape=[16]), name="b_conv1")conv_strides = [1, 1, 1, 1]kernel_size = [1, 2, 2, 1]pool_strides = [1, 2, 2, 1]L1_pool = conv_layer(x_image, W_conv1, b_conv1, conv_strides, kernel_size, pool_strides, padding='SAME')# 第二個卷積層W_conv2 = tf.Variable(tf.truncated_normal([5, 5, 16, 32], stddev=0.1), name="W_conv2")b_conv2 = tf.Variable(tf.constant(0.1, shape=[32]), name="b_conv2")conv_strides = [1, 1, 1, 1]kernel_size = [1, 1, 1, 1]pool_strides = [1, 1, 1, 1]L2_pool = conv_layer(L1_pool, W_conv2, b_conv2, conv_strides, kernel_size, pool_strides, padding='SAME')# 全連接層W_fc1 = tf.Variable(tf.truncated_normal([16 * 20 * 32, 512], stddev=0.1), name="W_fc1")b_fc1 = tf.Variable(tf.constant(0.1, shape=[512]), name="b_fc1")h_pool2_flat = tf.reshape(L2_pool, [-1, 16 * 20 * 32])h_fc1 = full_connect(h_pool2_flat, W_fc1, b_fc1)# dropoutkeep_prob = tf.placeholder(tf.float32)h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)W_fc2 = tf.Variable(tf.truncated_normal([512, NUM_CLASSES], stddev=0.1), name="W_fc2")b_fc2 = tf.Variable(tf.constant(0.1, shape=[NUM_CLASSES]), name="b_fc2")# 定義優化器和訓練opy_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2# 求交叉熵cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))# Adam優化train_step = tf.train.AdamOptimizer((1e-4)).minimize(cross_entropy)correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))# 運行整個網絡sess.run(tf.global_variables_initializer())time_elapsed = time.time() - time_beginprint("讀取圖片文件耗費時間:%d秒" % time_elapsed)time_begin = time.time()print("一共讀取了 %s 個訓練圖像, %s 個標簽" % (input_count, input_count))# 設置每次訓練op的輸入個數和迭代次數,這里為了支持任意圖片總數,定義了一個余數remainder,# 譬如,如果每次訓練op的輸入個數為60,圖片總數為150張,則前面兩次各輸入60張,最后一次輸入30張(余數30)batch_size = 60iterations = iterationsbatches_count = int(input_count / batch_size)remainder = input_count % batch_sizeprint("訓練數據集分成 %s 批, 前面每批 %s 個數據,最后一批 %s 個數據" % (batches_count + 1, batch_size, remainder))# 執行訓練迭代for it in range(iterations):# 這里的關鍵是要把輸入數組轉為np.arrayfor n in range(batches_count): # n是從0到72train_step.run(feed_dict={x: input_images[n * batch_size:(n + 1) * batch_size],y_: input_labels[n * batch_size:(n + 1) * batch_size], keep_prob: 0.5})if remainder > 0:start_index = batches_count * batch_sizetrain_step.run(feed_dict={x: input_images[start_index:input_count - 1],y_: input_labels[start_index:input_count - 1], keep_prob: 0.5})# 每完成五次迭代,判斷準確度是否已達到100%,達到則退出迭代循環iterate_accuracy = 0if it % 5 == 0:iterate_accuracy = accuracy.eval(feed_dict={x: val_images, y_: val_labels, keep_prob: 1.0})print('第 %d 次訓練迭代: 準確率 %0.5f%%' % (it, iterate_accuracy * 100))if iterate_accuracy >= 0.9999 and it >= iterations:breakprint('完成訓練!')time_elapsed = time.time() - time_beginprint("訓練耗費時間:%d秒" % time_elapsed)time_begin = time.time()# 保存訓練結果if not os.path.exists(SAVER_DIR):print('不存在訓練數據保存目錄,現在創建保存目錄')os.makedirs(SAVER_DIR)# 初始化saversaver = tf.train.Saver() # 這是tensorflow提供的API接口,用來保存和還原一個神經網絡saver_path = saver.save(sess, "%smodel.ckpt" % (SAVER_DIR))測試函數
def carnum_test():license_num = ""saver = tf.train.import_meta_graph("%smodel.ckpt.meta" % (SAVER_DIR))with tf.Session() as sess:model_file = tf.train.latest_checkpoint(SAVER_DIR)saver.restore(sess, model_file)# 第一個卷積層W_conv1 = sess.graph.get_tensor_by_name("W_conv1:0") # sess.graph.get_tensor_by_name獲取模型訓練過程中的變量和參數名b_conv1 = sess.graph.get_tensor_by_name("b_conv1:0")conv_strides = [1, 1, 1, 1]kernel_size = [1, 2, 2, 1]pool_strides = [1, 2, 2, 1]L1_pool = conv_layer(x_image, W_conv1, b_conv1, conv_strides, kernel_size, pool_strides, padding='SAME')# 第二個卷積層W_conv2 = sess.graph.get_tensor_by_name("W_conv2:0")b_conv2 = sess.graph.get_tensor_by_name("b_conv2:0")conv_strides = [1, 1, 1, 1]kernel_size = [1, 1, 1, 1]pool_strides = [1, 1, 1, 1]L2_pool = conv_layer(L1_pool, W_conv2, b_conv2, conv_strides, kernel_size, pool_strides, padding='SAME')# 全連接層W_fc1 = sess.graph.get_tensor_by_name("W_fc1:0")b_fc1 = sess.graph.get_tensor_by_name("b_fc1:0")h_pool2_flat = tf.reshape(L2_pool, [-1, 16 * 20 * 32])h_fc1 = full_connect(h_pool2_flat, W_fc1, b_fc1)# dropoutkeep_prob = tf.placeholder(tf.float32)h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)# readout層W_fc2 = sess.graph.get_tensor_by_name("W_fc2:0")b_fc2 = sess.graph.get_tensor_by_name("b_fc2:0")# 定義優化器和訓練opconv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)for n in range(3, 8):path = "test_images/%s.bmp" % (n)img = Image.open(path)width = img.size[0]height = img.size[1]img_data = [[0] * SIZE for i in range(1)]for h in range(0, height):for w in range(0, width):if img.getpixel((w, h)) < 190:img_data[0][w + h * width] = 1else:img_data[0][w + h * width] = 0result = sess.run(conv, feed_dict={x: np.array(img_data), keep_prob: 1.0})max1 = 0max2 = 0max3 = 0max1_index = 0max2_index = 0max3_index = 0for j in range(NUM_CLASSES):if result[0][j] > max1:max1 = result[0][j]max1_index = jcontinueif (result[0][j] > max2) and (result[0][j] <= max1):max2 = result[0][j]max2_index = jcontinueif (result[0][j] > max3) and (result[0][j] <= max2):max3 = result[0][j]max3_index = jcontinuelicense_num = license_num + LETTERS_DIGITS[max1_index]print("概率: [%s %0.2f%%] [%s %0.2f%%] [%s %0.2f%%]" % (LETTERS_DIGITS[max1_index], max1 * 100, LETTERS_DIGITS[max2_index], max2 * 100,LETTERS_DIGITS[max3_index],max3 * 100))print("車牌編號是: 【%s】" % license_num)測試結果
車牌號是: 01672Q
總結
以上是生活随笔為你收集整理的深度学习实战之车牌识别项目的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 使用wget下载GLDAS数据
- 下一篇: 梳理百年深度学习发展史-七月在线机器学习