u版本的yolo3代碼是真的復(fù)雜。
loss.py詳細(xì)的代碼注釋如下:
# Loss functionsimport torch
import torch.nn as nnfrom utils.general import bbox_iou
from utils.torch_utils import is_paralleldef smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441# return positive, negative label smoothing BCE targetsreturn 1.0 - 0.5 * eps, 0.5 * epsclass BCEBlurWithLogitsLoss(nn.Module):# BCEwithLogitLoss() with reduced missing label effects.def __init__(self, alpha=0.05):super(BCEBlurWithLogitsLoss, self).__init__()self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none') # must be nn.BCEWithLogitsLoss()self.alpha = alphadef forward(self, pred, true):loss = self.loss_fcn(pred, true)pred = torch.sigmoid(pred) # prob from logitsdx = pred - true # reduce only missing label effects# dx = (pred - true).abs() # reduce missing label and false label effectsalpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4))loss *= alpha_factorreturn loss.mean()class FocalLoss(nn.Module):# Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):super(FocalLoss, self).__init__()self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()self.gamma = gammaself.alpha = alphaself.reduction = loss_fcn.reductionself.loss_fcn.reduction = 'none' # required to apply FL to each elementdef forward(self, pred, true):loss = self.loss_fcn(pred, true)# p_t = torch.exp(-loss)# loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability# TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.pypred_prob = torch.sigmoid(pred) # prob from logitsp_t = true * pred_prob + (1 - true) * (1 - pred_prob)alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)modulating_factor = (1.0 - p_t) ** self.gammaloss *= alpha_factor * modulating_factorif self.reduction == 'mean':return loss.mean()elif self.reduction == 'sum':return loss.sum()else: # 'none'return lossclass QFocalLoss(nn.Module):# Wraps Quality focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):super(QFocalLoss, self).__init__()self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()self.gamma = gammaself.alpha = alphaself.reduction = loss_fcn.reductionself.loss_fcn.reduction = 'none' # required to apply FL to each elementdef forward(self, pred, true):loss = self.loss_fcn(pred, true)pred_prob = torch.sigmoid(pred) # prob from logitsalpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)modulating_factor = torch.abs(true - pred_prob) ** self.gammaloss *= alpha_factor * modulating_factorif self.reduction == 'mean':return loss.mean()elif self.reduction == 'sum':return loss.sum()else: # 'none'return lossclass ComputeLoss:# Compute lossesdef __init__(self, model, autobalance=False):super(ComputeLoss, self).__init__()device = next(model.parameters()).device # get model deviceh = model.hyp # hyperparameters'''{'lr0': 0.01, 'lrf': 0.2, 'momentum': 0.937, 'weight_decay': 0.0005, 'warmup_epochs': 3.0, 'warmup_momentum': 0.8,'warmup_bias_lr': 0.1, 'box': 0.05, 'cls': 0.5, 'cls_pw': 1.0, 'obj': 1.0, 'obj_pw': 1.0, 'iou_t': 0.2, 'anchor_t': 4.0, 'fl_gamma': 0.0, 'hsv_h': 0.015, 'hsv_s': 0.7, 'hsv_v': 0.4, 'degrees': 0.0, 'translate': 0.1,'scale': 0.5, 'shear': 0.0, 'perspective': 0.0, 'flipud': 0.0, 'fliplr': 0.5, 'mosaic': 1.0, 'mixup': 0.0, 'label_smoothing': 0.0}'''# Define criteriaBCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device))BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device))# Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3#self.cp 1.0 self.cn 0.0self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets# Focal loss g=0g = h['fl_gamma'] # focal loss gammaif g > 0:BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)det = model.module.model[-1] if is_parallel(model) else model.model[-1] # Detect() moduleself.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02]) # P3-P7self.ssi = list(det.stride).index(16) if autobalance else 0 # stride 16 index autobalance = False 0self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, model.gr, h, autobalancefor k in 'na', 'nc', 'nl', 'anchors':setattr(self, k, getattr(det, k))'''na = 3nc = 80nl = 3anchors =tensor([[[1.25000, 1.62500],[2.00000, 3.75000],[4.12500, 2.87500]],[[1.87500, 3.81250],[3.87500, 2.81250],[3.68750, 7.43750]],[[3.62500, 2.81250],[4.87500, 6.18750],[11.65625, 10.18750]]], device='cuda:0')注意這里的anchor數(shù)值已經(jīng)歸一化到指定的縮放比例下了。在class Model代碼有這么一段代碼歸一化:m = self.model[-1] # Detect()if isinstance(m, Detect):s = 256 # 2x min stridem.inplace = self.inplace# tmp111 = self.forward(torch.zeros(1, ch, s, s))#value [8,16,32]m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forwardtmp12 = m.stride.view(-1, 1, 1) #shape [3,1,1]通過(guò)跑前向得到3層featuremap的縮放系數(shù)分別是8,16,32#m.anchors shape[3,3,2]tensor([[[ 10., 13.],[ 16., 30.],[ 33., 23.]],[[ 30., 61.],[ 62., 45.],[ 59., 119.]],[[116., 90.],[156., 198.],[373., 326.]]])m.anchors /= m.stride.view(-1, 1, 1)check_anchor_order(m)self.stride = m.strideself._initialize_biases() # only run once# logger.info('Strides: %s' % m.stride.tolist())有3個(gè)featuremap,對(duì)應(yīng)3組anchor,對(duì)應(yīng)3個(gè)縮放系數(shù),原本的anchor都是相對(duì)于原圖大小的,分別對(duì)應(yīng)了原圖小中大目標(biāo)。那么在不同縮放層的featuremap上面,anchor也要做對(duì)應(yīng)的縮放'''def __call__(self, p, targets): # predictions, targets, model''':param p: list[4,3,80,80,85][4,3,40,40,85][4,3,20,20,85]:param targets: [95,6][bs,class,x,y,w,h]:return:'''device = targets.devicelcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)tcls, tbox, indices, anchors = self.build_targets(p, targets) # targets'''看完這里總結(jié)一下函數(shù)build_targets:這段代碼就是gt與anchor綁定在3個(gè)層次上大小的feature map,把gt也放到這3個(gè)大小的feature map上面gt的長(zhǎng)寬與anchor長(zhǎng)寬比小于4的,就認(rèn)為gt與anchor匹配,這是重要的一步。然后gt在當(dāng)前feature map上面取小數(shù),就是整數(shù)部分代表一個(gè)單元格,目標(biāo)的中心在這個(gè)單元格,那么就該單元格負(fù)責(zé);這里比如有90個(gè)目標(biāo)gt,那么傳出去的變量行數(shù)都為90,列的話有b,c,x,y,w,h,a這里很巧妙的是a代表著是哪個(gè)anchor,一個(gè)單元格有3個(gè)anchor,只要長(zhǎng)寬比小于4,那么都保留這樣設(shè)計(jì)的話就是一行里面,代表一個(gè)gt,一行有g(shù)t所有信息,b,c,x,y,w,h,aanch是具體的anchor的值,比如(35,24)tcls, tbox, indices, anchtcls是list,有3個(gè)列表,每個(gè)shape是[95],[84],[90]tbox是list,有3個(gè)列表,每個(gè)shape是[95,4],[84,4],[90,4]indices是list,有3個(gè)列表,每個(gè)列表是元組,每個(gè)元組存放了4個(gè)shape是[95],[84],[90]的tensoranch是list,有3個(gè)列表,[95,2],[84,2],[90,2]'''# Lossesfor i, pi in enumerate(p): # layer index, layer predictions#pi [4,3,80,80,85] [4,3,40,40,85] [4,3,20,20,85]#tmp_0 = pi[..., 0] #[4,3,80,80]#b[95] a[95] gi[95] gj[95]b, a, gj, gi = indices[i] # image, anchor, gridy, gridx#tobj [4,3,80,80]tobj = torch.zeros_like(pi[..., 0], device=device) # target objn = b.shape[0] # number of targetsif n:## ps [95,85]ps = pi[b, a, gj, gi] # prediction subset corresponding to targets# 這里需要仔細(xì)看下,這里pi是網(wǎng)絡(luò)輸出的值,# 而b,a,gj,gi都是目標(biāo)gt的信息#所以這里就是為了讓網(wǎng)絡(luò)輸出的值相應(yīng)位置也要和gt一樣!# Regressionpxy = ps[:, :2].sigmoid() * 2. - 0.5 #[95,2]pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] #[95,2]pbox = torch.cat((pxy, pwh), 1) # predicted box #[95,4]'''pxy加sigmod是為了讓值變?yōu)?-1之間數(shù)值,pxy是小數(shù),就是相對(duì)于某個(gè)單元格是小數(shù)坐標(biāo)。單元格是相應(yīng)位置,已經(jīng)根據(jù)gj,gi獲取到了,ps = pi[b, a, gj, gi]就是代表著坐標(biāo)【gi,gj】,你這個(gè)位置來(lái)負(fù)責(zé)和目標(biāo)gt一樣!pwh同樣需要sigmod把值歸一化到0-1之間,然后乘上anchors[i],因?yàn)閍nchor的長(zhǎng)寬與gt相差不大了,就是4倍左右。所以把網(wǎng)絡(luò)預(yù)測(cè)值×2再平方 [0-1] --> [0,2] -->[0,4] |||| (ps[:, 2:4].sigmoid() * 2) ** 2 '''iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target) [95]lbox += (1.0 - iou).mean() # iou loss [1]'''這里一開(kāi)始沒(méi)看明白,x,y是相對(duì)于單元格里面的偏移,是小數(shù)。得到bbox還需要加上單元格的gi,gj坐標(biāo)啊。而實(shí)際代碼就是把偏移當(dāng)做中心坐標(biāo)來(lái)計(jì)算框交并比了。后來(lái)想想確實(shí)可以,因?yàn)橹皇莻€(gè)中心點(diǎn)坐標(biāo),計(jì)算交并比. 把兩個(gè)框放到哪里計(jì)算都一樣,只要你的相對(duì)位置沒(méi)有變就可以!這里就是說(shuō)你單元格gi,gj坐標(biāo)一樣,然后就是看你中心點(diǎn)小數(shù)部分的坐標(biāo)了。lbox += (1.0 - iou).mean() # iou loss [1]ciou loss 格式,加上一個(gè)mean就變成一個(gè)值了!'''#[95]#tmp_3 = (1.0 - self.gr) + self.gr * iou.detach().clamp(0).type(tobj.dtype)# Objectnesstobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * iou.detach().clamp(0).type(tobj.dtype) # iou ratio'''#tobj [4,3,80,80]這里tobj[b, a, gj, gi][b, a, gj, gi]可以確保得到和iou一樣的個(gè)數(shù)95然后iou95個(gè)值就放到同樣位置上去。代表這95個(gè)位置上才有目標(biāo),且用iou的值代表有目標(biāo)的概率'''# Classificationif self.nc > 1: # cls loss (only if multiple classes)#t [95,80]t = torch.full_like(ps[:, 5:], self.cn, device=device) # targets'''ps [95,85] ps[:, 5:] --> shape [95,80] 是每個(gè)類(lèi)別的分?jǐn)?shù)self.cn = 0t [95,80] 值都為0'''t[range(n), tcls[i]] = self.cp'''range(n) -->shape[95] 值是0-94tcls是list,有3個(gè)列表,每個(gè)shape是[95],[84],[90]tcls[i] 存放的是95個(gè)目標(biāo)的類(lèi)別數(shù)self.cp = 1所以, t[range(n), tcls[i]] = self.cp這行代碼的意思就是:把每個(gè)目標(biāo)的相應(yīng)類(lèi)別位置賦值為1相當(dāng)于one-hot格式的gt'''lcls += self.BCEcls(ps[:, 5:], t) # BCE [1]'''t [95,80]## ps [95,85] ps[:, 5:] -->[95,80]'''# Append targets to text file# with open('targets.txt', 'a') as file:# [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]obji = self.BCEobj(pi[..., 4], tobj)lobj += obji * self.balance[i] # obj loss'''self.balance[i] [4,1,1]#tobj [4,3,80,80]#pi [4,3,80,80,85] pi[..., 4] -->[4,3,80,80]這里說(shuō)下85含義, x,y,w,h,is_obj,class_0,class_1,...,class_79所以,4就代表是否是目標(biāo)這類(lèi)'''if self.autobalance:self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()if self.autobalance:self.balance = [x / self.balance[self.ssi] for x in self.balance]lbox *= self.hyp['box']lobj *= self.hyp['obj']lcls *= self.hyp['cls']bs = tobj.shape[0] # batch sizeloss = lbox + lobj + lclsreturn loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach()# targets[bs,class,x,y,w,h]def build_targets(self, p, targets): #p list [4,3,80,80,85] [4,3,40,40,85] [4,3,20,20,85] targets[31,6]# Build targets for compute_loss(), input targets(image,class,x,y,w,h)na, nt = self.na, targets.shape[0] # number of anchors 3, targets na = 3,nt = 31tcls, tbox, indices, anch = [], [], [], []gain = torch.ones(7, device=targets.device) # normalized to gridspace gainai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) [3,31]tmp_1 = targets.repeat(na, 1, 1) # target[31,6] tmp_1[3,31,6]tmp_2 = ai[:, :, None] #[3,31,1]targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices [3,31,7]'''這里相當(dāng)于把targets復(fù)制了3份,并且把每一份后面寫(xiě)了0,1,2復(fù)制3份是為了便于后續(xù)每個(gè)gt與anchor的寬高做除法,看gt與anchor的尺寸是否差不多。31個(gè)gt與anchor0做除法31個(gè)gt與anchor1做除法31個(gè)gt與anchor2做除法因?yàn)槊拷Manchor有3個(gè)anchor!0,1,2就是為了區(qū)分是哪個(gè)anchor.很厲害,這樣就把gt與anchor綁定了。'''g = 0.5 # biasoff = torch.tensor([[0, 0], ##這玩意沒(méi)用啊# [1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m# [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm], device=targets.device).float() * g # offsetsfor i in range(self.nl):anchors = self.anchors[i] #[3,2] 取出其中一組anchor,總共3組tmp_1 = torch.tensor(p[i].shape) #[4,3,40,40,85]tmp_2 = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain #shape[4] value [40,40,40,40]gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain#把gain的2到6位置賦值為當(dāng)前featuremap的尺寸 80 40 20# Match targets to anchorst = targets * gain #t [3,72,7] targets[3,72,7] gain [7]# t 為當(dāng)前feature map上 目標(biāo)的尺寸if nt:# Matchestmp_3 = t[:, :, 4:6] #[3,72,2] #gt的寬高tmp_4 = anchors[:, None] #[3,1,2]r = t[:, :, 4:6] / anchors[:, None] # wh ratio [3,72,2]#上面這句很厲害#每個(gè)gt的寬高和每個(gè)anchor相除## tmp_5 = torch.max(r, 1. / r) #[3,72,2]tmp_6 = torch.max(r, 1. / r).max(2)# 0:max_val [3,72] 1:index[3,72]'''原本是[3,72,2],現(xiàn)在取最大,把ratio_w ratio_h兩者取最大max_val [3,72]'''j = torch.max(r, 1. / r).max(2)[0] < self.hyp['anchor_t']#hyp['anchor_t']=4 # compare [3,72]# j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))# j【3,72】#存放的都是True or False#代表最大的值大于或者小于4#這里小于4為T(mén)rue,默認(rèn)小于4的為gt的長(zhǎng)寬與anchor的長(zhǎng)寬差不多,保留!t = t[j] # filter # t[3,72,7] j[3,72] --->> [95,7]#這里j相當(dāng)于一個(gè)mask,只取t位置為T(mén)rue的。即保留與anchor長(zhǎng)寬相差不大的位置上面的gt#最后的t是[95,7]#注意這里一開(kāi)始是3份的gt,每份與一個(gè)anchor對(duì)應(yīng),但是現(xiàn)在變成2維的,丟失了前面的0,1,2代表哪個(gè)anchor的信息#但是巧妙的是這里一開(kāi)始加了一列,之前是6列的,現(xiàn)在是7列,第7列就是保留的哪個(gè)anchor,0,1,2#所以,如果同一個(gè)目標(biāo)與3個(gè)anc長(zhǎng)寬比都小于4的話,那么都會(huì)保留# Offsetsgxy = t[:, 2:4] # grid xy gxy [95,2]#######useless###########################################################gxi = gain[[2, 3]] - gxy # inverse [95,2]aa = 4.5456 % 1.tmp_7 = gxy % 1. ##[95,2]tmp_8 = (gxy % 1. < g) #[95,2]tmp_9 = (gxy > 1.) #[95,2]tmp_10 = ((gxy % 1. < g) & (gxy > 1.)) #[95,2]tmp_11 = ((gxy % 1. < g) & (gxy > 1.)).T #[2,95]# test_1 = torch.rand(2,4)# a1,a2 = test_1j, k = ((gxy % 1. < g) & (gxy > 1.)).T #j[95] k[95]l, m = ((gxi % 1. < g) & (gxi > 1.)).T #l[95] m[95]j = torch.stack((torch.ones_like(j),)) #j[1,95]t = t.repeat((off.shape[0], 1, 1))[j] ##off [1,2] t [95,7]#gxy [95,2]tmp_12 = torch.zeros_like(gxy)[None] #[1,95,2]tmp_13 = off[:, None] #[1,1,2]tmp_14 = (torch.zeros_like(gxy)[None] + off[:, None]) #[1,95,2]offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] #[95,2]################################################################### print("max offsets==",torch.max(offsets))else:t = targets[0]offsets = 0# Defineb, c = t[:, :2].long().T # image, class #b[90] c[90]gxy = t[:, 2:4] # grid xy [90,2] 這里的gxy是帶小數(shù)的floatgwh = t[:, 4:6] # grid wh [90,2] 這里wh 是相對(duì)于featuremap的實(shí)際值 80 40 20gij = (gxy - offsets).long() #[90,2] 這里offset是0 然后取整是整形intgi, gj = gij.T # grid xy indices gi[90] g[j]90#這里的gi gj就是網(wǎng)格坐標(biāo),是整數(shù)# Appenda = t[:, 6].long() # anchor indices [90]tmp_15 = (b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indicestbox.append(torch.cat((gxy - gij, gwh), 1)) # box#注意這里gxy - gij 就是小數(shù)了 代表著以gi gj網(wǎng)格為坐標(biāo)點(diǎn),然后小數(shù)部分就是相對(duì)于當(dāng)前網(wǎng)格的偏移anch.append(anchors[a]) # anchorstcls.append(c) # class`#注意這里存放的變量tcls, tbox, indices, anch 它們的行數(shù)都是一樣的,90return tcls, tbox, indices, anch'''看完這里總結(jié)一下:這段代碼就是gt與anchor綁定在3個(gè)層次上大小的feature map,把gt也放到這3個(gè)大小的feature map上面gt的長(zhǎng)寬與anchor長(zhǎng)寬比小于4的,就認(rèn)為gt與anchor匹配,這是重要的一步。然后gt在當(dāng)前feature map上面取小數(shù),就是整數(shù)部分代表一個(gè)單元格,目標(biāo)的中心在這個(gè)單元格,那么就該單元格負(fù)責(zé);這里比如有90個(gè)目標(biāo)gt,那么傳出去的變量行數(shù)都為90,列的話有b,c,x,y,w,h,a這里很巧妙的是a代表著是哪個(gè)anchor,一個(gè)單元格有3個(gè)anchor,只要長(zhǎng)寬比小于4,那么都保留這樣設(shè)計(jì)的話就是一行里面,代表一個(gè)gt,一行有g(shù)t所有信息,b,c,x,y,w,h,aanch是具體的anchor的值,比如(35,24)tcls, tbox, indices, anchtcls是list,有3個(gè)列表,每個(gè)shape是[95],[84],[90]tbox是list,有3個(gè)列表,每個(gè)shape是[95,4],[84,4],[90,4]indices是list,有3個(gè)列表,每個(gè)列表是元組,每個(gè)元組存放了4個(gè)shape是[95],[84],[90]的tensoranch是list,有3個(gè)列表,[95,2],[84,2],[90,2]'''
總結(jié)
以上是生活随笔為你收集整理的u版yolov3详解 --- loss 部分的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問(wèn)題。
如果覺(jué)得生活随笔網(wǎng)站內(nèi)容還不錯(cuò),歡迎將生活随笔推薦給好友。