yolov3 数据预处理
生活随笔
收集整理的這篇文章主要介紹了
yolov3 数据预处理
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
代碼在 github
import tensorflow as tf from absl.flags import FLAGS@tf.function def transform_targets_for_output(y_true, grid_size, anchor_idxs):#這個函數分別對比某一類anchors (一共是三類,每一類對應不同的尺寸的box)#每一類box 對應的尺寸翻倍# y_true: (N, boxes, (x1, y1, x2, y2, class, best_anchor))#這里的N是樣本的數量N = tf.shape(y_true)[0]# y_true_out: (N, grid, grid, anchors, [x1, y1, x2, y2, obj, class])#輸出的張量尺寸#tf.shape(anchor_idxs)=3=len(anchor_idxs)y_true_out = tf.zeros((N, grid_size, grid_size, tf.shape(anchor_idxs)[0], 6))anchor_idxs = tf.cast(anchor_idxs, tf.int32)#這是動態數組indexes = tf.TensorArray(tf.int32, 1, dynamic_size=True)updates = tf.TensorArray(tf.float32, 1, dynamic_size=True)idx = 0#N 對應的是樣本數量#二維遍歷,i對應的是每一個樣本for i in tf.range(N):#tf.shape(y_true)=[ N, 100, 6]#一 張圖片最多識別100個目標,因為一幅圖最多對應100個for j in tf.range(tf.shape(y_true)[1]):"""++++++++++++++++ x2,y2+ ++ ++ +x1,y1 ++++++++++++"""# x2=y_true[i][j][2]對應的是標記的矩形終點坐標#如果x2==0那么就沒有這個類別 passif tf.equal(y_true[i][j][2], 0):continue#這里指的是y_true[i][j][5] 這個種類的anchor 是否在這個 anchor_idxs中anchor_eq = tf.equal(anchor_idxs, tf.cast(y_true[i][j][5], tf.int32))print(anchor_eq)# print(anchor_idxs.numpy(), '##############',y_true[i][j][5].numpy())print('-'*30+'>')# print(i,j)#如果y_true[i][j][5] 這個種類的anchor 是在這個 anchor_idxs中#即 anchor_idxs 存在一個 值為True if tf.reduce_any(anchor_eq):#這是box的坐標box = y_true[i][j][0:4]#box 的中點坐標box_xy = (y_true[i][j][0:2] + y_true[i][j][2:4]) / 2#找到標注的那個box 對應的anchor 對應的位置,這里重新編碼了anchor_idx = tf.cast(tf.where(anchor_eq), tf.int32)#grid_xy是grid_size*grid_size 這個真實 box下anchor中心的坐標grid_xy = tf.cast(box_xy // (1/grid_size), tf.int32)# grid[y][x][anchor] = (tx, ty, bw, bh, obj, class)indexes = indexes.write(#id i=樣本編號(0-6),anchor中心坐標x,y anchor 種類取值在[0,1,2]idx, [i, grid_xy[1], grid_xy[0], anchor_idx[0][0]])updates = updates.write(#對應的標注坐標 和 #1只是占位 類別idx, [box[0], box[1], box[2], box[3], 1, y_true[i][j][4]])idx += 1# tf.print(indexes.stack())# tf.print(updates.stack())#y_true_out.shape=[3, 104, 104, 3, 6]#3是樣本數量#104是指的是box 的大小,每一個pixel都有可能是anchor 的中心點#所以就粗暴的給每一個pixel分配了一個內存空間#3 是同一個尺度的anhor 點有3個box #6對應 [x1, y1, x2, y2, class , anchor_class]##return tf.tensor_scatter_nd_update(y_true_out, indexes.stack(), updates.stack())def transform_targets(y_train, anchors, anchor_masks, size):y_outs = []#將圖像分成32*32格#grid_size=13grid_size = size // 32# calculate anchor index for true boxesanchors = tf.cast(anchors, tf.float32)#anchors 是聚類出來的點,x,y分別是聚類框框的寬度和高度#這里是每個anchor 框框的面積anchor_area = anchors[..., 0] * anchors[..., 1]#box_wh的寬度-高度, box_wh.shape=[k, 100, 2],k是樣本的數量box_wh = y_train[..., 2:4] - y_train[..., 0:2]#這里將box_wh從三維擴張到四維box_wh_expand=tf.expand_dims(box_wh, -2)# tf.tile是將量在某個或某幾個維度上復制,這里是在第三個維度上復制,復制9個,因為一共9個錨點#box_wh.shape=[3, 100, 9, 2],從原來的一行兩列變成9行兩列'''box_wh[0][0]=<tf.Tensor: shape=(9, 2), dtype=float32, numpy= array([[0.55466664, 0.32999998],[0.55466664, 0.32999998],[0.55466664, 0.32999998],[0.55466664, 0.32999998],[0.55466664, 0.32999998],[0.55466664, 0.32999998],[0.55466664, 0.32999998],[0.55466664, 0.32999998],[0.55466664, 0.32999998]], dtype=float32)>'''box_wh = tf.tile(box_wh_expand,(1, 1, tf.shape(anchors)[0], 1))#box_area.shape=[k, 100, 9]'''box_area[0][0]Out[362]: <tf.Tensor: shape=(9,), dtype=float32, numpy=array([0.18303998, 0.18303998, 0.18303998, 0.18303998, 0.18303998,0.18303998, 0.18303998, 0.18303998, 0.18303998], dtype=float32)>'''box_area = box_wh[..., 0] * box_wh[..., 1]#tf.minimum(A,B), A的維度為mn,B的維度為kn,且m=n,或者 n=1,就可以比較大小#intersection是交集#這里用到了矩陣的廣播機制,分別與9個anchor box 進行比較# delta xintersection = tf.minimum(box_wh[..., 0], anchors[..., 0]) * \tf.minimum(box_wh[..., 1], anchors[..., 1]) #delta y#交并比iou = intersection / (box_area + anchor_area - intersection)#找到和標記的框框最接近那個anchor ,輸出anchor_id anchor_idx = tf.cast(tf.argmax(iou, axis=-1), tf.float32)anchor_idx = tf.expand_dims(anchor_idx, axis=-1)#這里的y_train.shape=3, 100, 6],最后一個維度是6#[x1,y1,x2,y2,class_id,anchor_idx]y_train = tf.concat([y_train, anchor_idx], axis=-1)for anchor_idxs in anchor_masks:y_outs.append(transform_targets_for_output(y_train, grid_size, anchor_idxs))grid_size *= 2return tuple(y_outs)import pickle import numpy as np# data_output = open('data.pkl','wb') # pickle.dump(kk1,data_output) # data_output.close()# rb 以二進制讀取 data_input = open('data.pkl','rb') y_train = pickle.load(data_input) data_input.close()size=416anchors = np.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),(59, 119), (116, 90), (156, 198), (373, 326)],np.float32) / 416 anchor_masks = np.array([[6, 7, 8], [3, 4, 5], [0, 1, 2]])#y_train.shape=(6, 100, 5)#6是樣本數量#100是標簽數量# 5[x1,y1,x2,y2,class]cc= transform_targets(y_train, anchors, anchor_masks, size)對應的標注坐標 和 #1只是占位 類別,所以最后一個維度是6
倒數第二個維度3=len(anchor_masks[k]),k=0,1,2
[box[0], box[1], box[2], box[3], 1, y_true[i][j][4]])
cc[0].shape Out[248]: TensorShape([6, 13, 13, 3, 6])cc[1].shape Out[249]: TensorShape([6, 26, 26, 3, 6])cc[2].shape Out[250]: TensorShape([6, 52, 52, 3, 6])總結
以上是生活随笔為你收集整理的yolov3 数据预处理的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: Python @函数装饰器用法
- 下一篇: 在 tensorflow 和numpy