CornerNet:实现demo、可视化heatmap、测试各类别精度
CornerNet:實現demo、可視化heatmap、測試各類別精度
文章目錄
- CornerNet:實現demo、可視化heatmap、測試各類別精度
- 前言
- 實現demo
- 方案一
- 方案二
- 可視化heatmap
- 測試各類別精度
歡迎大家來討論關于CornerNet的代碼,一起交流,直接評論就好了,我很快回的,不信你試試???
前言
有段時間沒有整理自己最近在研究的東西了,但還是感覺時不時要停下來總結一下才能記憶深刻。最近一直在搞Anchor-free的二維目標檢測算法(CornerNet\CornerNet-Lite\CenterNet\CenterNet…),接觸比較多的還是CornerNet(ECCV2018),詳情請看CornerNet論文解讀和CornerNet配置。
今天要說的是CornerNet代碼中并沒有類似demo.py這樣的調用模型畫出檢測結果的代碼(好像是有個debug參數,但是我沒試過),還有就是整篇都在說heatmap,但是不知道heatmap長什么樣,最后還有實驗結果的分析不夠豐富,所以增加了多類別的精度輸出。
實現demo
這里給出兩種方案:
方案一
直接在github上找到了別人實現的demo.py。雖然這個能解決CornerNet的demo問題,但是我覺得還是有必要了解下第二種方法。
#!/usr/bin/env python
import os
import json
import torch
import pprint
import argparse
import importlib
import numpy as np
import cv2import matplotlib
matplotlib.use("Agg")from config import system_configs
from nnet.py_factory import NetworkFactoryfrom config import system_configs
from utils import crop_image, normalize_
from external.nms import soft_nms, soft_nms_mergetorch.backends.cudnn.benchmark = Falseclass_name = ['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane','bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant','stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse','sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack','umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis','snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove','skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass','cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich','orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake','chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv','laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave','oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase','scissors', 'teddy bear', 'hair drier', 'toothbrush'
]image_ext = ['jpg', 'jpeg', 'png']def parse_args():parser = argparse.ArgumentParser(description="Demo CornerNet")parser.add_argument("--demo", dest="demo",help="demo image or image folder",default="", type=str)parser.add_argument("--cfg_file", help="config file", default='CornerNet', type=str)parser.add_argument("--testiter", dest="testiter",help="test at iteration i",default=None)parser.add_argument("--suffix", dest="suffix", default=None, type=str)args = parser.parse_args()return argsdef _rescale_dets(detections, ratios, borders, sizes):xs, ys = detections[..., 0:4:2], detections[..., 1:4:2]xs /= ratios[:, 1][:, None, None]ys /= ratios[:, 0][:, None, None]xs -= borders[:, 2][:, None, None]ys -= borders[:, 0][:, None, None]np.clip(xs, 0, sizes[:, 1][:, None, None], out=xs)np.clip(ys, 0, sizes[:, 0][:, None, None], out=ys)def kp_decode(nnet, images, K, ae_threshold=0.5, kernel=3, debug=False):detections = nnet.test([images], ae_threshold=ae_threshold, K=K, kernel=kernel)detections = detections.data.cpu().numpy()return detectionsif __name__ == "__main__":args = parse_args()if args.suffix is None:cfg_file = os.path.join(system_configs.config_dir, args.cfg_file + ".json")else:cfg_file = os.path.join(system_configs.config_dir, args.cfg_file + "-{}.json".format(args.suffix))print("cfg_file: {}".format(cfg_file))with open(cfg_file, "r") as f:configs = json.load(f)configs["system"]["snapshot_name"] = args.cfg_filesystem_configs.update_config(configs["system"])print("system config...")pprint.pprint(system_configs.full)test_iter = system_configs.max_iter if args.testiter is None \else args.testiterprint("loading parameters at iteration: {}".format(test_iter))print("building neural network...")nnet = NetworkFactory(None)print("loading parameters...")nnet.load_params(test_iter)nnet.cuda()nnet.eval_mode()K = configs["db"]["top_k"]ae_threshold = configs["db"]["ae_threshold"]nms_kernel = 3scales = configs["db"]["test_scales"]weight_exp = 8merge_bbox = Falsecategories = configs["db"]["categories"]nms_threshold = configs["db"]["nms_threshold"]max_per_image = configs["db"]["max_per_image"]nms_algorithm = {"nms": 0,"linear_soft_nms": 1, "exp_soft_nms": 2}["exp_soft_nms"]mean = np.array([0.40789654, 0.44719302, 0.47026115], dtype=np.float32)std = np.array([0.28863828, 0.27408164, 0.27809835], dtype=np.float32)top_bboxes = {}if os.path.isdir(args.demo):image_names = []ls = os.listdir(args.demo)for file_name in ls:ext = file_name[file_name.rfind('.') + 1:].lower()if ext in image_ext:image_names.append(os.path.join(args.demo, file_name))else:image_names = [args.demo]for image_id, image_name in enumerate(image_names):image = cv2.imread(image_name)height, width = image.shape[0:2]detections = []for scale in scales:new_height = int(height * scale)new_width = int(width * scale)new_center = np.array([new_height // 2, new_width // 2])inp_height = new_height | 127inp_width = new_width | 127images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)ratios = np.zeros((1, 2), dtype=np.float32)borders = np.zeros((1, 4), dtype=np.float32)sizes = np.zeros((1, 2), dtype=np.float32)out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4height_ratio = out_height / inp_heightwidth_ratio = out_width / inp_widthresized_image = cv2.resize(image, (new_width, new_height))resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width])resized_image = resized_image / 255.normalize_(resized_image, mean, std)images[0] = resized_image.transpose((2, 0, 1))borders[0] = bordersizes[0] = [int(height * scale), int(width * scale)]ratios[0] = [height_ratio, width_ratio]images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)images = torch.from_numpy(images)dets = kp_decode(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel, debug=True)dets = dets.reshape(2, -1, 8)dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]dets = dets.reshape(1, -1, 8)_rescale_dets(dets, ratios, borders, sizes)dets[:, :, 0:4] /= scaledetections.append(dets)detections = np.concatenate(detections, axis=1)classes = detections[..., -1]classes = classes[0]detections = detections[0]# reject detections with negative scoreskeep_inds = (detections[:, 4] > -1)detections = detections[keep_inds]classes = classes[keep_inds]top_bboxes[image_id] = {}for j in range(categories):keep_inds = (classes == j)top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype(np.float32)if merge_bbox:soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp)else:soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm)top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5]scores = np.hstack([top_bboxes[image_id][j][:, -1] for j in range(1, categories + 1)])if len(scores) > max_per_image:kth = len(scores) - max_per_imagethresh = np.partition(scores, kth)[kth]for j in range(1, categories + 1):keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh)top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds]if 1:image = cv2.imread(image_name)bboxes = {}for j in range(1, categories + 1):keep_inds = (top_bboxes[image_id][j][:, -1] > 0.5)cat_name = class_name[j]cat_size = cv2.getTextSize(cat_name + '0.0', cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]color = np.random.random((3, )) * 0.6 + 0.4color = color * 255color = color.astype(np.int32).tolist()for bbox in top_bboxes[image_id][j][keep_inds]:sc = bbox[4]bbox = bbox[0:4].astype(np.int32)txt = '{}{:.1f}'.format(cat_name, sc)if bbox[1] - cat_size[1] - 2 < 0:cv2.rectangle(image,(bbox[0], bbox[1] + 2),(bbox[0] + cat_size[0], bbox[1] + cat_size[1] + 2),color, -1)cv2.putText(image, txt, (bbox[0], bbox[1] + cat_size[1] + 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1)else:cv2.rectangle(image,(bbox[0], bbox[1] - cat_size[1] - 2),(bbox[0] + cat_size[0], bbox[1] - 2),color, -1)cv2.putText(image, txt, (bbox[0], bbox[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1)cv2.rectangle(image,(bbox[0], bbox[1]),(bbox[2], bbox[3]),color, 2)# cv2.imshow('out', image)# cv2.waitKey()cv2.imwrite("/home/jhsu/lyj/CornerNet/demo_result/" + str(image_id+1) + ".jpg", ima
方案二
上述方案能夠解決CornerNet的demo問題。但如果說最終目的是發paper,那么則需要多種算法對比,也就是需要多條baseline,如果每條baseline都要跑一些demo,那么上述的方案就不能通用了,比如說CenterNet、Faster R-CNN等就沒辦法直接套上述代碼,因此方案二通用性更高。
我用的是第二種方法:測試代碼先跑測試集–>原先代碼自動生成result.json–>python腳本json轉txt–>python腳本讀取txt將坐標信息畫回測試集圖片。(這個流程是針對CornerNet的,其余算法不一定有result.json,不過沒有關系,總之總的流程就是:測試代碼先跑測試集-->修改算法代碼將測試結果保存為txt-->python腳本讀取txt將坐標信息畫回測試集圖片)接下來一步一步分析。
-
測試代碼先跑測試集
這個就直接運行測試代碼,但是這里建議寫個shell腳本,比如叫test.sh,這樣的好處有:(1). 能夠保存終端輸出的信息,因為測試到最后會有mAP輸出,別到時候稍不留神就沒注意到,就白跑了,所以直接將終端輸出的信息保存下來穩。(2). 有些測試命令賊長,每次都要輸入很長的話那誰頂得住,有這個test.sh,方便很多:
#!/usr/bin/env bash NETWORK="yjl_CornerNet" MODELDIR="./yjl_test_CornerNet/test-$NETWORK-`date +%Y-%m-%d-%H-%M-%S`" mkdir -p "$MODELDIR" # 這里填迭代幾次的模型跑測試 iter=435000 LOGFILE="$MODELDIR/log-iter-$iter-$NETWORK-`date +%Y-%m-%d-%H-%M-%S`.log"# test model python test.py CornerNet --testiter $iter 2>&1 | tee $LOGFILE例如上述代碼命名為test.sh,那么直接在終端
./test.sh運行就好,運行之后,你會看見自動創建了目錄存儲了測試日志:之后你每次運行./test.sh都會自動創建新的log,記錄測試過程:
-
原先代碼自動生成result.json
測試結束后,自動生成的result.json
這個json就是存儲數據的一種格式,這個打開要挺久的,里面放的就是測試結果,一堆框的信息啥的,沒啥打開的必,直接進行下一步。 -
python腳本json轉txt
直接同個路徑下建個python腳本,鍵入:
import jsonf = open("results.json", 'r') arr = json.loads(f.read()) f.close()f = open("result.txt", 'w') for i in arr:f.write("%08d %d %f %f %f %f %f\n" % (i['image_id'], i['category_id'], i['score'],i['bbox'][0], i['bbox'][1], i['bbox'][2] + i['bbox'][0], i['bbox'][3] + i['bbox'][1])) f.close()這樣就會將result.json轉化成result.txt,轉化的意義在于我更方便用python處理這些格式。
-
python腳本讀取txt將坐標信息畫回測試集圖片
到此為止,你已經有了CornerNet算法的檢測結果(result.txt),然后還要剛才測試的測試集的圖片,有兩個東西之后,就差個下面的腳本了,當然還是先看下txt長啥樣:
# 圖片名 類別id 置信度 x1 y1 x2 y2 00000001 0 0.300000 466.450000 138.720000 502.650000 206.690000 00000001 0 0.150000 466.450000 138.720000 498.860000 170.720000 00000001 0 0.080000 466.530000 154.640000 486.630000 198.660000 00000001 0 0.030000 466.600000 138.700000 502.630000 174.680000 00000001 0 0.020000 243.160000 159.020000 511.140000 166.620000
然后是畫框的腳本
# coding:utf-8
'''
將測試生成的txt文件,把文件中對應的box的坐標畫回原圖
'''
import cv2
import numpy as py
import osdef drawBBox(txt_path, img_path, save_path, first_img):global img_idimg_id = first_imgwith open(txt_path,'r')as fp:while(1):line = fp.readline()if not line:print("txt is over!!!")breakstr = line.split()x = round(float(str[3]))y = round(float(str[4]))w = round(float(str[5]))h = round(float(str[6]))ap = round(float(str[2]))if ap >= 0.5:if str[0] != img_id or img_id == first_img:img = cv2.imread(img_path + str[0] + ".jpg")else:img = cv2.imread(save_path + str[0] + ".jpg")if str[1] == '5':cv2.rectangle(img,(x,y-22),(x+100,y),(0,255,255), thickness = -1)cv2.putText(img, "Pedestrian", (x,y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0))cv2.rectangle(img,(x,y),(w,h),(0,255,255),3,4,0)elif str[1] == '0':cv2.rectangle(img,(x,y-22),(x+50,y),(0,255,0), thickness = -1)cv2.putText(img, "Rider", (x,y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0))cv2.rectangle(img,(x,y),(w,h),(0,255,0),3,4,0)elif str[1] == '3':cv2.rectangle(img,(x,y-22),(x+130,y),(0,0,255), thickness = -1)cv2.putText(img, "Electromobile", (x,y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0))cv2.rectangle(img,(x,y),(w,h),(0,0,255),3,4,0)elif str[1] == '6':cv2.rectangle(img,(x,y-22),(x+40,y),(255,255,0), thickness = -1)cv2.putText(img, "Bike", (x,y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0))cv2.rectangle(img,(x,y),(w,h),(255,255,0),3,4,0)elif str[1] == '2':cv2.rectangle(img,(x,y-22),(x+90,y),(172,172,0), thickness = -1)cv2.putText(img, "Motorbike", (x,y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0))cv2.rectangle(img,(x,y),(w,h),(172,172,0),3,4,0)elif str[1] == '4':cv2.rectangle(img,(x,y-22),(x+110,y),(172,0,172), thickness = -1)cv2.putText(img, "Rider_trunc", (x,y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0))cv2.rectangle(img,(x,y),(w,h),(172,0,172),3,4,0)elif str[1] == '1':cv2.rectangle(img,(x,y-22),(x+35,y),(255,0,255), thickness = -1)cv2.putText(img, "Hat", (x,y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0))cv2.rectangle(img,(x,y),(w,h),(255,0,255),3,4,0)img_id = str[0]cv2.imwrite(save_path + img_id+".jpg", img)print(str[0]+".jpg is save....OK!!!")if __name__ == '__main__':# txt存放的路徑txt_path = "./lsm-CornerNet-Lite.txt"# 原圖片路徑img_path = "./test/"# 畫出來的圖片保存的路徑save_path = "./result/"# 測試集第一張圖片名first_img = "00000001"drawBBox(txt_path, img_path, save_path, first_img)print("All Done....")
中間一大段的if…elif…是txt中類別id對應的類別名,這個根據自身情況簡單修改下就好。
最終實現的效果就是測試集所有的圖片都被畫上了bbox(每種顏色的框對應不不同的類別):
可視化heatmap
這里參考大佬的復現:可視化heatmap,直接按照鏈接來就能達到效果,里面碰到的第一步對demo.py進行修改,這里的demo.py就是上述方案一的代碼。接著我對visualize.py進行了注釋:
# -*- coding: utf-8 -*-
import numpy as np
import cv2
import torchdef visualize(image, tl_heat, br_heat):# image'size = [2, 3, 1151, 2047]# 這個圖片的原始尺寸下采樣4倍,就變成heatmap的維度,所以測試時不一定是128# [2, 7, 288, 512],這個是不定的,看圖片大小tl_heat = torch.sigmoid(tl_heat)# [2, 7, 288, 512]br_heat = torch.sigmoid(br_heat)# 這個colors是一個list,shape為(7, 1, 1, 3),7是類別數,1,1,3是隨機random的# 這個作用就是給每個類定制了專屬的隨機生成的顏色,大概長下面這樣'''[array([[[105, 131, 151]]], dtype=uint8),array([[[180, 216, 153]]], dtype=uint8),array([[[151, 150, 167]]], dtype=uint8),array([[[188, 236, 177]]], dtype=uint8),array([[[111, 143, 220]]], dtype=uint8),array([[[240, 194, 238]]], dtype=uint8),array([[[207, 136, 124]]], dtype=uint8)]'''colors = [((np.random.random((1, 1, 3)) * 0.6 + 0.4)*255).astype(np.uint8)\for _ in range(tl_heat.shape[1])]# tl_heat[0] size = [7, 288, 512]# 取走第一個batch的特征,配上顏色# tl_hm、br_hm的維度均是[h, w, 3]tl_hm = _gen_colormap(tl_heat[0].detach().cpu().numpy(), colors)br_hm = _gen_colormap(br_heat[0].detach().cpu().numpy(), colors)# 標準差和均值mean = np.array([0.40789654, 0.44719302, 0.47026115],dtype=np.float32).reshape(3, 1, 1)std = np.array([0.28863828, 0.27408164, 0.27809835],dtype=np.float32).reshape(3, 1, 1)# 為rgb的圖片,每通道乘上標準差加上均值,相當于每通道分配一個數字img = (image[0].detach().cpu().numpy() * std + mean) * 255# 再把圖片transpose成標準的樣子img = img.astype(np.uint8).transpose(1, 2, 0)tl_blend = _blend_img(img, tl_hm)br_blend = _blend_img(img, br_hm)cv2.imwrite("./tl_heatmap.jpg", tl_blend)cv2.imwrite("./br_heatmap.jpg", br_blend)print("~~~save heatmaps OK!")def _gen_colormap(heatmap, colors):# 這個heatmap的維度是[7, 288, 512]num_classes = heatmap.shape[0]h, w = heatmap.shape[1], heatmap.shape[2]color_map = np.zeros((h, w, 3), dtype=np.uint8)for i in range(num_classes):# np.maximum是兩個輸入進行對比,每次誰大就挑誰的,維度要一致# color_map維度[h, w, 3]# heatmap[i, :, :, np.newaxis]維度[h, w, 1]# colors[i]維度[1, 1, 3]# 最終右邊這一長串其實是0-255的整型數字# 接著循環類別次,color_map一直更新,每次挑maximum的color_map = np.maximum(color_map, (heatmap[i, :, :, np.newaxis] * colors[i]).astype(np.uint8))return color_mapdef _blend_img(back, fore, trans=0.7):'''back = img-->[h*4, w*4, 3]fore = tl_hm-->[h, w, 3]'''if fore.shape[0] != back.shape[0] or fore.shape[0] != back.shape[1]:fore = cv2.resize(fore, (back.shape[1], back.shape[0]))if len(fore.shape) == 2:fore = fore.reshape(fore.shape[0], fore.shape[1], 1)# 兩幅圖像進行合并時,按公式:blended_img = img1 * (1 – alpha) + img2* alpha 進行ret = (back * (1. - trans) + fore * trans).astype(np.uint8)# 別越界了,ret的大小就是原圖的大小ret[ret > 255] = 255return ret
測試各類別精度
像這種訓練COCO數據集的,測試完只能輸出類似這樣的評估指標:
能夠輸出mAP的信息,雖然很多精度,可是各個類別的AP值卻沒有,因此訓練多類別時,這樣的信息還不夠,我們更想要的是各個類別的具體AP值,這樣才能分析哪個類別檢測難度大,哪個類別精度高等等。因此需要修改下代碼:
在~/CornerNet/db/coco.py中,直接加入新的函數:
def _print_detection_eval_metrics(self, coco_eval):IoU_lo_thresh = 0.5IoU_hi_thresh = 0.95def _get_thr_ind(coco_eval, thr):ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) &(coco_eval.params.iouThrs < thr + 1e-5))[0][0]iou_thr = coco_eval.params.iouThrs[ind]assert np.isclose(iou_thr, thr)return indind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh)ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh)# precision has dims (iou, recall, cls, area range, max dets)# area range index 0: all area ranges# max dets index 2: 100 per imageprecision = \coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2]ap_default = np.mean(precision[precision > -1])print(('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] ''~~~~').format(IoU_lo_thresh, IoU_hi_thresh))# print("")print('MAP:{:.1f}'.format(100 * ap_default))for cls_ind, cls in enumerate(self._classes):if cls == '__background__':continue# minus 1 because of __background__# cat_name = db.class_name(cls_ind)# print(cat_name)cat_name = self.class_name(cls)# print(cat_name+":")precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, cls_ind, 0, 2]ap = np.mean(precision[precision > -1])print(cat_name+':{:.1f}'.format(100 * ap))
接著直接Ctrl F在代碼中定位到coco_eval.evaluate()這一行,在這一行下面新增加兩行:
coco_eval.accumulate()
self._print_detection_eval_metrics(coco_eval)
之后重新跑測試的時候,最終就能夠輸出各個類別的AP值:
總結
以上是生活随笔為你收集整理的CornerNet:实现demo、可视化heatmap、测试各类别精度的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: TensorFlow学习笔记——实现经典
- 下一篇: CornerNet代码解析——损失函数