yolo_model to output理解
生活随笔
收集整理的這篇文章主要介紹了
yolo_model to output理解
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
如果想看yolov3的 非極大抑制算法可以看這里
由神經網絡的特征層到輸出層
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Thu Jun 17 14:48:19 2021@author: ledi """from absl import flags from absl.flags import FLAGS import numpy as np import tensorflow as tf from tensorflow.keras import Model from tensorflow.keras.layers import (Add,Concatenate,Conv2D,Input,Lambda,LeakyReLU,MaxPool2D,UpSampling2D,ZeroPadding2D,BatchNormalization, ) from tensorflow.keras.regularizers import l2 from tensorflow.keras.losses import (binary_crossentropy,sparse_categorical_crossentropy ) from utils1 import broadcast_iou# flags.DEFINE_integer('yolo_max_boxes', 100, # 'maximum number of boxes per image') # flags.DEFINE_float('yolo_iou_threshold', 0.5, 'iou threshold') # flags.DEFINE_float('yolo_score_threshold', 0.5, 'score threshold')yolo_anchors = np.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),(59, 119), (116, 90), (156, 198), (373, 326)],np.float32) / 416 yolo_anchor_masks = np.array([[6, 7, 8], [3, 4, 5], [0, 1, 2]])yolo_tiny_anchors = np.array([(10, 14), (23, 27), (37, 58),(81, 82), (135, 169), (344, 319)],np.float32) / 416 yolo_tiny_anchor_masks = np.array([[3, 4, 5], [0, 1, 2]])def DarknetConv(x, filters, size, strides=1, batch_norm=True):if strides == 1:padding = 'same'else:x = ZeroPadding2D(((1, 0), (1, 0)))(x) # top left half-paddingpadding = 'valid'x = Conv2D(filters=filters, kernel_size=size,strides=strides, padding=padding,use_bias=not batch_norm, kernel_regularizer=l2(0.0005))(x)if batch_norm:x = BatchNormalization()(x)x = LeakyReLU(alpha=0.1)(x)return xdef DarknetResidual(x, filters):prev = xx = DarknetConv(x, filters // 2, 1)x = DarknetConv(x, filters, 3)x = Add()([prev, x])return xdef DarknetBlock(x, filters, blocks):x = DarknetConv(x, filters, 3, strides=2)for _ in range(blocks):x = DarknetResidual(x, filters)return xdef Darknet(name=None):x = inputs = Input([None, None, 3])x = DarknetConv(x, 32, 3)x = DarknetBlock(x, 64, 1)x = DarknetBlock(x, 128, 2) # skip connectionx = x_36 = DarknetBlock(x, 256, 8) # skip connectionx = x_61 = DarknetBlock(x, 512, 8)x = DarknetBlock(x, 1024, 4)return tf.keras.Model(inputs, (x_36, x_61, x), name=name)def DarknetTiny(name=None):x = inputs = Input([None, None, 3])x = DarknetConv(x, 16, 3)x = MaxPool2D(2, 2, 'same')(x)x = DarknetConv(x, 32, 3)x = MaxPool2D(2, 2, 'same')(x)x = DarknetConv(x, 64, 3)x = MaxPool2D(2, 2, 'same')(x)x = DarknetConv(x, 128, 3)x = MaxPool2D(2, 2, 'same')(x)x = x_8 = DarknetConv(x, 256, 3) # skip connectionx = MaxPool2D(2, 2, 'same')(x)x = DarknetConv(x, 512, 3)x = MaxPool2D(2, 1, 'same')(x)x = DarknetConv(x, 1024, 3)return tf.keras.Model(inputs, (x_8, x), name=name)#這個函數是一個嵌套函數,外層設置filters的數量 #內層負責運算 def YoloConv(filters, name=None):#filters=512, name='yolo_conv_0'def yolo_conv(x_in):if isinstance(x_in, tuple):print('-'*30+'>','is_tuple')inputs = Input(x_in[0].shape[1:]), Input(x_in[1].shape[1:])x, x_skip = inputs# concat with skip connectionx = DarknetConv(x, filters, 1)x = UpSampling2D(2)(x)x = Concatenate()([x, x_skip])else:print('>'*30+'?','not_tuple')x = inputs = Input(x_in.shape[1:])x = DarknetConv(x, filters, 1)x = DarknetConv(x, filters * 2, 3)x = DarknetConv(x, filters, 1)x = DarknetConv(x, filters * 2, 3)x = DarknetConv(x, filters, 1)print(x)return Model(inputs, x, name=name)(x_in)return yolo_convdef YoloConvTiny(filters, name=None):def yolo_conv(x_in):if isinstance(x_in, tuple):inputs = Input(x_in[0].shape[1:]), Input(x_in[1].shape[1:])x, x_skip = inputs# concat with skip connectionx = DarknetConv(x, filters, 1)x = UpSampling2D(2)(x)x = Concatenate()([x, x_skip])else:x = inputs = Input(x_in.shape[1:])x = DarknetConv(x, filters, 1)return Model(inputs, x, name=name)(x_in)return yolo_convdef YoloOutput(filters, anchors, classes, name=None):def yolo_output(x_in):x = inputs = Input(x_in.shape[1:])x = DarknetConv(x, filters * 2, 3)x = DarknetConv(x, anchors * (classes + 5), 1, batch_norm=False)x = Lambda(lambda x: tf.reshape(x, (-1, tf.shape(x)[1], tf.shape(x)[2],anchors, classes + 5)))(x)return tf.keras.Model(inputs, x, name=name)(x_in)return yolo_output# As tensorflow lite doesn't support tf.size used in tf.meshgrid, # we reimplemented a simple meshgrid function that use basic tf function. def _meshgrid(n_a, n_b):return [tf.reshape(tf.tile(tf.range(n_a), [n_b]), (n_b, n_a)),tf.reshape(tf.repeat(tf.range(n_b), n_a), (n_b, n_a))]def yolo_boxes(pred, anchors, classes):# pred=output_0# anchors=anchors[masks[0]]# pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...classes))grid_size = tf.shape(pred)[1:3]print('grid_size=',grid_size)#將85 維度的向量分割成 2+2+1+classesbox_xy, box_wh, objectness, class_probs = tf.split(pred, (2, 2, 1, classes), axis=-1)box_xy = tf.sigmoid(box_xy)objectness = tf.sigmoid(objectness)class_probs = tf.sigmoid(class_probs)pred_box = tf.concat((box_xy, box_wh), axis=-1) # original xywh for loss# !!! grid[x][y] == (y, x)grid = _meshgrid(grid_size[1],grid_size[0])grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2) # [gx, gy, 1, 2]#https://www.cnblogs.com/wangxinzhe/p/10648465.html#https://www.shuzhiduo.com/A/qVdeERkndP/box_xy = (tf.cast(box_xy,tf.float32) + tf.cast(grid, tf.float32)) / \tf.cast(grid_size, tf.float32)box_wh = tf.exp(box_wh) * anchorsbox_x1y1 = box_xy - box_wh / 2box_x2y2 = box_xy + box_wh / 2bbox = tf.concat([box_x1y1, box_x2y2], axis=-1)return bbox, objectness, class_probs, pred_box''' %%%%%%%%%%%%%%%%%%%%%%%%%%%%%% FLAGS.yolo_max_boxes= 100 FLAGS.yolo_iou_threshold 0.5 FLAGS.yolo_score_threshold 0.5 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%% '''def yolo_nms(outputs, anchors, masks, classes):#從feature到box的映射#box的兩個點坐標 #準確度 #softmax多分類本例中80#4 維 #一維 #n 維度 #bbox, objectness, class_probs, # outputs=(boxes_0[:3], boxes_1[:3], boxes_2[:3])# boxes, conf, typeb, c, t = [], [], []#b:box的兩個點坐標#c:準確度#t:softmax多分類for o in outputs:b.append(tf.reshape(o[0], (tf.shape(o[0])[0], -1, tf.shape(o[0])[-1])))print(o[0].shape,o[1].shape,o[2].shape)c.append(tf.reshape(o[1], (tf.shape(o[1])[0], -1, tf.shape(o[1])[-1])))t.append(tf.reshape(o[2], (tf.shape(o[2])[0], -1, tf.shape(o[2])[-1])))'''(1, 13, 13, 3, 4) (1, 13, 13, 3, 1) (1, 13, 13, 3, 80)(1, 26, 26, 3, 4) (1, 26, 26, 3, 1) (1, 26, 26, 3, 80)(1, 52, 52, 3, 4) (1, 52, 52, 3, 1) (1, 52, 52, 3, 80)'''bbox = tf.concat(b, axis=1)print(' bbox.shape =',bbox.shape) #(1, 10647, 4)confidence = tf.concat(c, axis=1)print('confidence.shape=',confidence.shape) #(1, 10647, 1)class_probs = tf.concat(t, axis=1)print('class_probs.shape=',class_probs.shape) #(1, 10647, 80)#這里的10647=(13*13+26*26+52*52)*3,'''bbox.shape = (1, 21294, 4)confidence.shape= (1, 21294, 1)class_probs.shape= (1, 21294, 80)'''#矩陣廣播 scores.shape=[1, 10647, 80]scores = confidence * class_probsprint('scores.shape=',scores.shape)dscores = tf.squeeze(scores, axis=0)#dscores.shape= (10647, 80)print('dscores.shape=',dscores.shape)#tf.reduce_max(dscores,[1]) 取 dscores每一行的最大值#這里是softmax 多分類,找到最大的那個score,行#一共10647 行,一幅圖像檢測10647 點,這里的score 是這10647行每一行的最大值scores = tf.reduce_max(dscores,[1])#這個時候bbox.shape= (10647, 4)bbox = tf.reshape(bbox,(-1,4))print('bbox.shape=',bbox.shape)#這里是softmax 多分類,找到最大的那個score的index,每行最大classes = tf.argmax(dscores,1)print('classes')#這里是 nmsselected_indices, selected_scores = tf.image.non_max_suppression_with_scores(boxes=bbox, #所有要檢測的boxes,一共10647個scores=scores, #每一個box的打分值,一共10647個 max_output_size=100, #最多保留多少個boxiou_threshold=0.5, #設定非最大抑制的閾值,當兩個box 的交并比大于0.5時候就保留score較大的那個boxscore_threshold=0.5, #對score 也要加以限制soft_nms_sigma=0.5 #)num_valid_nms_boxes = tf.shape(selected_indices)[0]selected_indices = tf.concat([selected_indices,tf.zeros(100-num_valid_nms_boxes, tf.int32)], 0)selected_scores = tf.concat([selected_scores,tf.zeros(100-num_valid_nms_boxes,tf.float32)], -1)#tf.gather 相當于列表按照索引取值boxes=tf.gather(bbox, selected_indices)print('boxes.shape=',boxes.shape)boxes = tf.expand_dims(boxes, axis=0)print('boxes.shape=',boxes.shape)scores=selected_scoresprint('scores.shape=',scores.shape)scores = tf.expand_dims(scores, axis=0)print('scores.shape=',scores.shape)classes = tf.gather(classes,selected_indices)print('classes.shape=',classes.shape)classes = tf.expand_dims(classes, axis=0)print('classes.shape=',classes.shape)valid_detections=num_valid_nms_boxesvalid_detections = tf.expand_dims(valid_detections, axis=0)return boxes, scores, classes, valid_detections# def YoloV3(size=None, channels=3, anchors=yolo_anchors, # masks=yolo_anchor_masks, classes=80, training=False):size=None channels=3 anchors=yolo_anchors masks=yolo_anchor_masks classes=80 training=False # x = inputs = Input([size, size, channels], name='input')#這里通過輸入矩陣x 提取特征""" x_36.shape =TensorShape([1, 52, 52, 256]) x_61.shape = TensorShape([1, 26, 26, 512]) x.shape =TensorShape([1, 13, 13, 1024])""" #這里體現了yolov3 模型的多尺度,就是同時提取模型的多層特征 #如下的 x_36, x_61, x#在這里我們糟了了一個數據x=inputs=np.array(range(1*416*416*3)).reshape(1,416,416,3)/1*416*416*3x_36, x_61, x = Darknet(name='yolo_darknet')(x)x = inputs= YoloConv(512, name='yolo_conv_0')(x)#僅僅是x 特征的輸出 output_0 = YoloOutput(512, len(masks[0]), classes, name='yolo_output_0')(x) #同時融合 (x, x_61)的輸出 x = YoloConv(256, name='yolo_conv_1')((x, x_61)) output_1 = YoloOutput(256, len(masks[1]), classes, name='yolo_output_1')(x)#同時融合 (x, x_61,x_36)的輸出 x = YoloConv(128, name='yolo_conv_2')((x, x_36)) output_2 = YoloOutput(128, len(masks[2]), classes, name='yolo_output_2')(x)""" output_0,output_1,output_2 這三個output的維度都是 (None, None, None, 3, 80+5)"""# if training: # return Model(inputs, (output_0, output_1, output_2), name='yolov3')#這是最大的那個框框 boxes_0 = Lambda(lambda x: yolo_boxes(x, anchors[masks[0]], classes),name='yolo_boxes_0')(output_0)#這是中號的框框 boxes_1 = Lambda(lambda x: yolo_boxes(x, anchors[masks[1]], classes),name='yolo_boxes_1')(output_1) #這是小號的框框 boxes_2 = Lambda(lambda x: yolo_boxes(x, anchors[masks[2]], classes),name='yolo_boxes_2')(output_2)outputs = Lambda(lambda x: yolo_nms(x, anchors, masks, classes),name='yolo_nms')((boxes_0[:3], boxes_1[:3], boxes_2[:3]))總結
以上是生活随笔為你收集整理的yolo_model to output理解的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 目标检测 nms非极大抑制算法
- 下一篇: yolo loss 将图像标注的真实事坐