當前位置：首頁 > 人工智能 > 循环神经网络 >内容正文

循环神经网络

matlab mobilenet v2,MobileNetV2-SSDLite代码分析-6 VOC Dataset

發布時間：2023/12/15 循环神经网络 30 豆豆

生活随笔收集整理的這篇文章主要介紹了 matlab mobilenet v2,MobileNetV2-SSDLite代码分析-6 VOC Dataset 小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

class VOCDataset:

初始化

主要是確定了一下各個文件的目錄，確定了class_names

def __init__(self, root, transform=None, target_transform=None, is_test=False, keep_difficult=False, label_file=None):

"""Dataset for VOC data.

Args:

root: the root of the VOC2007 or VOC2012 dataset, the directory contains the following sub-directories:

Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject.

"""

self.root = pathlib.Path(root)

self.transform = transform

self.target_transform = target_transform

if is_test:

image_sets_file = self.root / "ImageSets/Main/test.txt"

else:

image_sets_file = self.root / "ImageSets/Main/trainval.txt"

self.ids = VOCDataset._read_image_ids(image_sets_file) # 得到各個圖的編號

self.keep_difficult = keep_difficult

# if the labels file exists, read in the class names

label_file_name = self.root / "labels.txt"

if os.path.isfile(label_file_name):

class_string = ""

with open(label_file_name, 'r') as infile:

for line in infile:

class_string += line.rstrip()

# classes should be a comma separated list

classes = class_string.split(',')

# prepend BACKGROUND as first class

classes.insert(0, 'BACKGROUND')

classes = [ elem.replace(" ", "") for elem in classes]

self.class_names = tuple(classes)

logging.info("VOC Labels read from file: " + str(self.class_names))

else:

logging.info("No labels file, using default VOC classes.")

self.class_names = ('BACKGROUND',

'aeroplane', 'bicycle', 'bird', 'boat',

'bottle', 'bus', 'car', 'cat', 'chair',

'cow', 'diningtable', 'dog', 'horse',

'motorbike', 'person', 'pottedplant',

'sheep', 'sofa', 'train', 'tvmonitor')

self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)}

getitem

image_id獲得第index圖像的圖像名稱

從而得到boxes, labels和image的數據

相應的做轉換

def __getitem__(self, index):

image_id = self.ids[index]

# boxes:(num_of_objects_per_image, 4), labels:(num_of_objects_per_image, 1)

boxes, labels, is_difficult = self._get_annotation(image_id)

if not self.keep_difficult:

boxes = boxes[is_difficult == 0]

labels = labels[is_difficult == 0]

# image: 第index張圖的cv2數據

image = self._read_image(image_id)

# 此時， labels:(num_of_objects_per_image, 1)

if self.transform:

image, boxes, labels = self.transform(image, boxes, labels)

# 此時，labels:(num_of_objects_per_image, 1)，維度不變

if self.target_transform:

boxes, labels = self.target_transform(boxes, labels)

# 此時，labels:torch.Size([3000])，維度得到了統一，因此target_transform是關鍵

return image, boxes, labels

獲得ID信息

# 輸入是test.txt文件的地址，將文件名放到ids中去

@staticmethod

def _read_image_ids(image_sets_file):

ids = []

with open(image_sets_file) as f:

for line in f:

ids.append(line.rstrip())

return ids

獲得標注信息

根據image_id確定annotation_file的位置，對XML文件做解析，得到bndbox

(假設這張圖上一共有num_of_objects_per_image)

最后返回結果是

boxes(框的位置)(num_of_objects_per_image, 4)

labels(第幾類物體)(num_of_objects_per_image, 1)

is_difficult(這個目前的情況是一般為空，所以具體什么含義不是很清楚)(num_of_objects_per_image, 4)

def _get_annotation(self, image_id):

annotation_file = self.root / f"Annotations/{image_id}.xml"

objects = ET.parse(annotation_file).findall("object")

boxes = []

labels = []

is_difficult = []

for object in objects:

class_name = object.find('name').text.lower().strip()

# we're only concerned with clases in our list

if class_name in self.class_dict:

bbox = object.find('bndbox')

# VOC dataset format follows Matlab, in which indexes start from 0

x1 = float(bbox.find('xmin').text) - 1

y1 = float(bbox.find('ymin').text) - 1

x2 = float(bbox.find('xmax').text) - 1

y2 = float(bbox.find('ymax').text) - 1

boxes.append([x1, y1, x2, y2])

labels.append(self.class_dict[class_name])

is_difficult_str = object.find('difficult').text

is_difficult.append(int(is_difficult_str) if is_difficult_str else 0)

return (np.array(boxes, dtype=np.float32),

np.array(labels, dtype=np.int64),

np.array(is_difficult, dtype=np.uint8))

獲得圖像數據

根據image_id確定圖像的位置，讀取圖像，做顏色轉換

def _read_image(self, image_id):

image_file = self.root / f"JPEGImages/{image_id}.jpg"

image = cv2.imread(str(image_file))

image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

return image

labels和bbox的處理

可以發現，在上面兩個處理，一個transform，一個target_transform：

if self.transform:

image, boxes, labels = self.transform(image, boxes, labels)

if self.target_transform:

boxes, labels = self.target_transform(boxes, labels)

train_transform: TrainAugmentation

先分析transform，其定義如下。

train_transform = TrainAugmentation(config.image_size, config.image_mean, config.image_std)

if self.transform:

image, boxes, labels = self.transform(image, boxes, labels)

TrainAugmentation的定義如下。這個寫法有點意思。這里augment中的所有集合，輸入均為self, img, boxes, labels，輸出均為img, boxes, labels

輸入維度

boxes (num_of_objects_per_image, 4)

labels (num_of_objects_per_image, 1)

輸出維度保持不變

class TrainAugmentation:

def __init__(self, size, mean=0, std=1.0):

"""

Args:

size: the size the of final image.

mean: mean pixel value per channel.

"""

self.mean = mean

self.size = size

self.augment = Compose([

ConvertFromInts(), # img圖像轉np.float32

PhotometricDistort(), # 光譜失真處理

Expand(self.mean),

RandomSampleCrop(),

RandomMirror(),

ToPercentCoords(),

Resize(self.size),

SubtractMeans(self.mean),

lambda img, boxes=None, labels=None: (img / std, boxes, labels),

ToTensor(),

])

def __call__(self, img, boxes, labels):

"""

Args:

img: the output of cv.imread in RGB layout.

boxes: boundding boxes in the form of (x1, y1, x2, y2).

labels: labels of boxes.

"""

return self.augment(img, boxes, labels)

target_transform: MatchPrior

這兒的target_transform定義如下。它的作用就是將標注的數據轉換成我們需要的用于anchor回歸的數據。

config.priors：(num_priors, 4)

target_transform = MatchPrior(config.priors, config.center_variance,

config.size_variance, 0.5)

MatchPrior具體代碼見下(ssd.py)

__init__用來初始化一些參數

__call__的寫法可以學一下。之后再調用就是走的這條線，把boxes變成我們需要的locations

class MatchPrior(object):

def __init__(self, center_form_priors, center_variance, size_variance, iou_threshold):

self.center_form_priors = center_form_priors

self.corner_form_priors = box_utils.center_form_to_corner_form(center_form_priors)

self.center_variance = center_variance

self.size_variance = size_variance

self.iou_threshold = iou_threshold

def __call__(self, gt_boxes, gt_labels):

if type(gt_boxes) is np.ndarray:

gt_boxes = torch.from_numpy(gt_boxes)

if type(gt_labels) is np.ndarray:

gt_labels = torch.from_numpy(gt_labels)

boxes, labels = box_utils.assign_priors(gt_boxes, gt_labels,

self.corner_form_priors, self.iou_threshold) # 這一步操作

boxes = box_utils.corner_form_to_center_form(boxes)

locations = box_utils.convert_boxes_to_locations(boxes, self.center_form_priors, self.center_variance, self.size_variance)

return locations, labels

看到這兒我有個疑問是這樣的：

每張圖可能有多個boxes和labels，現在作者雖然都取出來了，但維度應該是[batch_size, boxes_size, labels_size]，這個放到一個Batch里長度不統一，不知道作者是如何實現的。——20190924補充：target_transform: MatchPrior里實現了長度的統一

目前作者寫VOC dataset時并沒有繼續torch中的dataset

assign_priors的代碼如下。

def assign_priors(gt_boxes, gt_labels, corner_form_priors,

iou_threshold):

"""Assign ground truth boxes and targets to priors.

Args:

gt_boxes (num_targets, 4): ground truth boxes.

gt_labels (num_targets): labels of targets.

priors (num_priors, 4): corner form priors

Returns:

boxes (num_priors, 4): real values for priors.

labels (num_priros): labels for priors.

"""

# size: num_priors x num_targets iou_of作者定義的算重疊的函數

ious = iou_of(gt_boxes.unsqueeze(0), corner_form_priors.unsqueeze(1))

# size: num_priors

best_target_per_prior, best_target_per_prior_index = ious.max(1)

# size: num_targets

best_prior_per_target, best_prior_per_target_index = ious.max(0)

for target_index, prior_index in enumerate(best_prior_per_target_index):

best_target_per_prior_index[prior_index] = target_index

# 2.0 is used to make sure every target has a prior assigned

best_target_per_prior.index_fill_(0, best_prior_per_target_index, 2)

# size: num_priors

labels = gt_labels[best_target_per_prior_index]

labels[best_target_per_prior < iou_threshold] = 0 # the backgournd id

boxes = gt_boxes[best_target_per_prior_index]

return boxes, labels

總結

以上是生活随笔為你收集整理的matlab mobilenet v2,MobileNetV2-SSDLite代码分析-6 VOC Dataset的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇： A卡又一次战未来 AMD新驱动隐藏福利：
下一篇：科幻感拉满！智能手表可内置投影仪三星已