當前位置：首頁 > 编程语言 > asp.net >内容正文

asp.net

XML--视频--人脸VOC

發布時間：2025/3/21 asp.net 46 豆豆

生活随笔收集整理的這篇文章主要介紹了 XML--视频--人脸VOC 小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

http://www.2cto.com/kf/201702/602665.html

（1）先把視頻保存成圖片（2）再獲取每張中人臉的坐標信息，保存到output.txt中（3）利用output.txt生成XML文件

第一步：從視頻中獲得包括人臉的圖像

/* 獲取視頻的每一幀，如果檢測到人臉，就把這一幀圖像保存到本地如果沒檢測到人臉，就不保存 */#include<opencv2\opencv.hpp> #include <iostream> #include <stdio.h> #include<fstream> using namespace cv;//必須加入,否則無法檢找到OPENCV的各個函數 using namespace std;int img_num = 1; char img_name[60] = { 0 }; string face_cascade_name = "haarcascade_frontalface_alt.xml"; CascadeClassifier face_cascade; string window_name = "人臉識別"; void detectAndDisplay(Mat frame){std::vector<Rect> faces;//Mat frame_gray;//cvtColor(frame, frame_gray, CV_BGR2GRAY);//equalizeHist(frame_gray, frame_gray);face_cascade.detectMultiScale(frame, faces, 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); //如果檢測到人臉，就保存起來圖片sprintf(img_name, "%d.jpg", img_num);img_num++;if (faces.size() >= 0){imwrite(img_name, frame);}for (int i = 0; i < faces.size(); i++){rectangle(frame, faces[i], Scalar(255, 0, 0), 2, 8, 0);}imshow(window_name, frame); }int main(int argc, int* argv[]) {VideoCapture cap("1.avi"); // open the default camera if (!cap.isOpened()) // check if we succeeded return -1;if (!face_cascade.load(face_cascade_name)){printf("[error] 無法加載級聯分類器文件！\n");return -1;}int nTick = 0;for (;;){if (!cap.isOpened()){//等等攝像頭打開 continue;}Mat frame;nTick = getTickCount();cap >> frame; // get a new frame from camera if (frame.data == NULL){//等到捕獲到數據 continue;}detectAndDisplay(frame);if (waitKey(24) >= 0) break;}return 0; }

=====================================

第二步：獲得output.txt文件

/*依次讀取本地中的圖片，進行檢測：如果檢測到人臉，就保存到output.txt文件中如果檢測不到人臉，就保存到no_face.txt文件中*/#include<opencv2\opencv.hpp> #include <iostream> #include <stdio.h> #include<fstream> using namespace std; using namespace cv;void detectAndDisplay(Mat frame);String face_cascade_name = "haarcascade_frontalface_alt.xml";CascadeClassifier face_cascade; //定義人臉分類器ofstream file_write("output.txt", ios::app); //output.txt文件 ofstream file_no_face("no_face.txt", ios::app); //output.txt文件 char buf[60] = { 0 }; //buf圖片名稱 char buf_path[60] = { 0 }; int i_img = 1; //遍歷第i_img張圖片int main(void) {int count_img = 100000; //輸入圖片的總數量//for (; i_img <= count_img; i_img++){sprintf(buf, "%d.jpg", i_img);//格式化輸出sprintf函數//sprintf(buf_path, "絕對路徑\\%d.jpg", i_img);sprintf(buf_path, "G:\\img\\%d.jpg", i_img);Mat frame = imread(buf_path);if (frame.data == NULL){printf("處理完成 | 第%d張圖片加載失敗\n", i_img);break;}if (!face_cascade.load(face_cascade_name)){printf("加載XML文件失敗\n");return -1;};detectAndDisplay(frame);}waitKey(0);return 0; }void detectAndDisplay(Mat frame) {std::vector<Rect> faces;//檢測人臉face_cascade.detectMultiScale(frame, faces, 1.1, 3, CV_HAAR_DO_ROUGH_SEARCH, Size(70, 70), Size(100, 100));char no_face_buf[60] = { 0 };sprintf(no_face_buf, "%d.jpg\n", i_img);//格式化輸出sprintf函數if (faces.size() <= 0){file_no_face << no_face_buf;}for (size_t j = 0; j < faces.size(); j++){rectangle(frame, faces[j], Scalar(255, 0, 0), 2, 8, 0);Mat faceROI = frame(faces[j]);//寫入本地char name[60] = { 0 };sprintf(name, "img%d_face_%d.jpg", i_img, j);//imwrite(name, faceROI);char output[100] = { 0 };sprintf(output, "%s person %d %d %d %d\n", buf, faces[j].tl().x, faces[j].tl().y, faces[j].br().x, faces[j].br().y);// 圖片名字類別矩形的兩個頂點坐標printf("%s", output);file_write << output;}imshow("正在處理", frame);waitKey(300); }

第三步：產生XML文件

將第2步得到的txt轉成xml。格式如下：(注意folder字段) <annotation><folder>logos</folder> <filename>000001.jpg</filename><source><database>The logs Database</database><annotation>The logs Database</annotation><image>flickr</image><flickrid>0</flickrid></source><owner><flickrid>I do not know</flickrid><name>I do not know</name></owner><size><width>293</width><height>220</height><depth>3</depth></size><segmented>0</segmented><object><name>光頭</name><pose>Unspecified</pose><truncated>0</truncated><difficult>0</difficult><bndbox><xmin>157</xmin><ymin>24</ymin><xmax>241</xmax><ymax>99</ymax></bndbox></object> </annotation>

【matlab代碼】
下載工具鏈接：http://pan.baidu.com/s/1nuRMOsD 密碼：2z4g

%% %該代碼可以做voc2007數據集中的xml文件， %txt文件每行格式為：000002.jpg dog 44 28 132 121 %即每行由圖片名、目標類型、包圍框坐標組成，空格隔開 %如果一張圖片有多個目標，則格式如下：（比如兩個目標） %000002.jpg dog 44 28 132 121 %000002.jpg car 50 27 140 110 %包圍框坐標為左上角和右下角 %作者：小咸魚_ %CSDN:http://blog.csdn.net/sinat_30071459 %% clc; clear; %注意修改下面四個變量 imgpath='F:\face\';%圖像存放文件夾 txtpath='F:\face\output.txt';%txt文件 xmlpath_new='F:\Annotations\';%修改后的xml保存文件夾 foldername='VOC2007';%xml的folder字段名fidin=fopen(txtpath,'r'); lastname='begin';while ~feof(fidin)tline=fgetl(fidin);str = regexp(tline, ' ','split');filepath=[imgpath,str{1}];img=imread(filepath);[h,w,d]=size(img);imshow(img);rectangle('Position',[str2double(str{3}),str2double(str{4}),str2double(str{5})-str2double(str{3}),str2double(str{6})-str2double(str{4})],'LineWidth',4,'EdgeColor','r');pause(0.1);if strcmp(str{1},lastname)%如果文件名相等，只需增加objectobject_node=Createnode.createElement('object');Root.appendChild(object_node);node=Createnode.createElement('name');node.appendChild(Createnode.createTextNode(sprintf('%s',str{2})));object_node.appendChild(node);node=Createnode.createElement('pose');node.appendChild(Createnode.createTextNode(sprintf('%s','Unspecified')));object_node.appendChild(node);node=Createnode.createElement('truncated');node.appendChild(Createnode.createTextNode(sprintf('%s','0')));object_node.appendChild(node);node=Createnode.createElement('difficult');node.appendChild(Createnode.createTextNode(sprintf('%s','0')));object_node.appendChild(node);bndbox_node=Createnode.createElement('bndbox');object_node.appendChild(bndbox_node);node=Createnode.createElement('xmin');node.appendChild(Createnode.createTextNode(sprintf('%s',num2str(str{3}))));bndbox_node.appendChild(node);node=Createnode.createElement('ymin');node.appendChild(Createnode.createTextNode(sprintf('%s',num2str(str{4}))));bndbox_node.appendChild(node);node=Createnode.createElement('xmax');node.appendChild(Createnode.createTextNode(sprintf('%s',num2str(str{5}))));bndbox_node.appendChild(node);node=Createnode.createElement('ymax');node.appendChild(Createnode.createTextNode(sprintf('%s',num2str(str{6}))));bndbox_node.appendChild(node);else %如果文件名不等，則需要新建xmlcopyfile(filepath, 'JPEGImages');%先保存上一次的xmlif exist('Createnode','var')tempname=lastname;tempname=strrep(tempname,'.jpg','.xml');xmlwrite(tempname,Createnode); endCreatenode=com.mathworks.xml.XMLUtils.createDocument('annotation');Root=Createnode.getDocumentElement;%根節點node=Createnode.createElement('folder');node.appendChild(Createnode.createTextNode(sprintf('%s',foldername)));Root.appendChild(node);node=Createnode.createElement('filename');node.appendChild(Createnode.createTextNode(sprintf('%s',str{1})));Root.appendChild(node);source_node=Createnode.createElement('source');Root.appendChild(source_node);node=Createnode.createElement('database');node.appendChild(Createnode.createTextNode(sprintf('My Database')));source_node.appendChild(node);node=Createnode.createElement('annotation');node.appendChild(Createnode.createTextNode(sprintf('VOC2007')));source_node.appendChild(node);node=Createnode.createElement('image');node.appendChild(Createnode.createTextNode(sprintf('flickr')));source_node.appendChild(node);node=Createnode.createElement('flickrid');node.appendChild(Createnode.createTextNode(sprintf('NULL')));source_node.appendChild(node);owner_node=Createnode.createElement('owner');Root.appendChild(owner_node);node=Createnode.createElement('flickrid');node.appendChild(Createnode.createTextNode(sprintf('NULL')));owner_node.appendChild(node);node=Createnode.createElement('name');node.appendChild(Createnode.createTextNode(sprintf('xiaoxianyu')));owner_node.appendChild(node);size_node=Createnode.createElement('size');Root.appendChild(size_node);node=Createnode.createElement('width');node.appendChild(Createnode.createTextNode(sprintf('%s',num2str(w))));size_node.appendChild(node);node=Createnode.createElement('height');node.appendChild(Createnode.createTextNode(sprintf('%s',num2str(h))));size_node.appendChild(node);node=Createnode.createElement('depth');node.appendChild(Createnode.createTextNode(sprintf('%s',num2str(d))));size_node.appendChild(node);node=Createnode.createElement('segmented');node.appendChild(Createnode.createTextNode(sprintf('%s','0')));Root.appendChild(node);object_node=Createnode.createElement('object');Root.appendChild(object_node);node=Createnode.createElement('name');node.appendChild(Createnode.createTextNode(sprintf('%s',str{2})));object_node.appendChild(node);node=Createnode.createElement('pose');node.appendChild(Createnode.createTextNode(sprintf('%s','Unspecified')));object_node.appendChild(node);node=Createnode.createElement('truncated');node.appendChild(Createnode.createTextNode(sprintf('%s','0')));object_node.appendChild(node);node=Createnode.createElement('difficult');node.appendChild(Createnode.createTextNode(sprintf('%s','0')));object_node.appendChild(node);bndbox_node=Createnode.createElement('bndbox');object_node.appendChild(bndbox_node);node=Createnode.createElement('xmin');node.appendChild(Createnode.createTextNode(sprintf('%s',num2str(str{3}))));bndbox_node.appendChild(node);node=Createnode.createElement('ymin');node.appendChild(Createnode.createTextNode(sprintf('%s',num2str(str{4}))));bndbox_node.appendChild(node);node=Createnode.createElement('xmax');node.appendChild(Createnode.createTextNode(sprintf('%s',num2str(str{5}))));bndbox_node.appendChild(node);node=Createnode.createElement('ymax');node.appendChild(Createnode.createTextNode(sprintf('%s',num2str(str{6}))));bndbox_node.appendChild(node);lastname=str{1};end%處理最后一行if feof(fidin)tempname=lastname;tempname=strrep(tempname,'.jpg','.xml');xmlwrite(tempname,Createnode);end end fclose(fidin);file=dir(pwd); for i=1:length(file)if length(file(i).name)>=4 && strcmp(file(i).name(end-3:end),'.xml')fold=fopen(file(i).name,'r');fnew=fopen([xmlpath_new file(i).name],'w');line=1;while ~feof(fold)tline=fgetl(fold);if line==1line=2;continue;endexpression = ' ';replace=char(9);newStr=regexprep(tline,expression,replace);fprintf(fnew,'%s\n',newStr);endfprintf('已處理%s\n',file(i).name);fclose(fold);fclose(fnew);delete(file(i).name);end end

【python代碼】

/*把上一步得到的txt文檔轉換為XML文件，python代碼如下，我的需要訓練識別的物體只有一個，如果要識別多個目標，那需要略微修改一下如下代碼。 */#encoding=utf-8import sys import os import codecs import cv2root = r'F:\dataset\xml' fp = open('F:\dataset\pos-all.txt') fp2 = open('train.txt', 'w') uavinfo = fp.readlines()for i in range(len(uavinfo)):line = uavinfo[i]line = line.strip().split('\t') img = cv2.imread(line[0])sp = img.shapeheight = sp[0]width = sp[1]depth = sp[2]info1 = line[0].split('\\')[-1]info2 = info1.split('.')[0]l_pos1 = line[1]l_pos2 = line[2]r_pos1 = line[3]r_pos2 = line[4]fp2.writelines(info2 + '\n')with codecs.open(root +r'\\'+ info2 + '.xml', 'w', 'utf-8') as xml:xml.write('\n')xml.write('\t<folder>' + 'UAV_data' + '</folder>\n')xml.write('\t<filename>' + info1 + '</filename>\n')xml.write('\t<source>\n')xml.write('\t\t<database>The UAV autolanding</database>\n')xml.write('\t\tUAV AutoLanding</annotation>\n')xml.write('\t\t<img src="" style="display: none;"><img alt="加載中..." title="圖片加載中..." src="http://statics.2cto.com/images/s_nopic.gif">flickr\n')xml.write('\t\t<flickrid>NULL</flickrid>\n')xml.write('\t\n')xml.write('\t<owner>\n')xml.write('\t\t<flickrid>NULL</flickrid>\n')xml.write('\t\t<name>XuSenhai</name>\n')xml.write('\t</owner>\n')xml.write('\t<size>\n')xml.write('\t\t<width>'+ str(width) + '</width>\n')xml.write('\t\t<height>'+ str(height) + '</height>\n')xml.write('\t\t<depth>' + str(depth) + '</depth>\n')xml.write('\t</size>\n')xml.write('\t\t<segmented>0</segmented>\n')xml.write('\t<object>\n')xml.write('\t\t<name>uav</name>\n')xml.write('\t\t<pose>Unspecified</pose>\n')xml.write('\t\t<truncated>0</truncated>\n')xml.write('\t\t<difficult>0</difficult>\n')xml.write('\t\t<bndbox>\n')xml.write('\t\t\t<xmin>' + l_pos1 + '</xmin>\n')xml.write('\t\t\t<ymin>' + l_pos2 + '</ymin>\n')xml.write('\t\t\t<xmax>' + r_pos1 + '</xmax>\n')xml.write('\t\t\t<ymax>' + r_pos2 + '</ymax>\n')xml.write('\t\t</bndbox>\n')xml.write('\t</object>\n')xml.write('</annotation>') fp2.close()

第四步：數據集分割–matlab代碼

/*在實際訓練過程中，需要四個文件，分別為test.txt是測試集，train.txt是訓練集，val.txt是驗證集，trainval.txt是訓練和驗證集。每個文件為對于圖片的名字。在VOC2007中，trainval大概是整個數據集的50%，test也大概是整個數據集的50%；train大概是trainval的50%，val大概是trainval的50%。可參考以下代碼： */%% %該代碼根據已生成的xml，制作VOC2007數據集中的trainval.txt;train.txt;test.txt和val.txt %trainval占總數據集的50%，test占總數據集的50%；train占trainval的50%，val占trainval的50%； %上面所占百分比可根據自己的數據集修改，如果數據集比較少，test和val可少一些 %% %注意修改下面四個值 xmlfilepath='E:\Annotations'; txtsavepath='E:\ImageSets\Main\'; trainval_percent=0.5;%trainval占整個數據集的百分比，剩下部分就是test所占百分比 train_percent=0.5;%train占trainval的百分比，剩下部分就是val所占百分比 %% xmlfile=dir(xmlfilepath); numOfxml=length(xmlfile)-2;%減去.和.. 總的數據集大小 trainval=sort(randperm(numOfxml,floor(numOfxml*trainval_percent))); test=sort(setdiff(1:numOfxml,trainval)); trainvalsize=length(trainval);%trainval的大小 train=sort(trainval(randperm(trainvalsize,floor(trainvalsize*train_percent)))); val=sort(setdiff(trainval,train)); ftrainval=fopen([txtsavepath 'trainval.txt'],'w'); ftest=fopen([txtsavepath 'test.txt'],'w'); ftrain=fopen([txtsavepath 'train.txt'],'w'); fval=fopen([txtsavepath 'val.txt'],'w'); for i=1:numOfxml if ismember(i,trainval) fprintf(ftrainval,'%s\n',xmlfile(i+2).name(1:end-4)); if ismember(i,train) fprintf(ftrain,'%s\n',xmlfile(i+2).name(1:end-4)); else fprintf(fval,'%s\n',xmlfile(i+2).name(1:end-4)); end else fprintf(ftest,'%s\n',xmlfile(i+2).name(1:end-4)); end end fclose(ftrainval); fclose(ftrain); fclose(fval); fclose(ftest);

第五步：將得到的文件放置正確路徑

1. 將訓練圖片放置于 faster-rcnn/data/VOCdevkit2007/VOC2007/JPEGImages 中 2. 將得到的xml文件放置于 faster-rcnn/data/VOCdevkit2007/VOC2007/Annotations 中 3. 將得到的test.txt，train.txt，val.txt，trainval.txt 放置于 faster-rcnn/data/VOCdevkit2007/VOC2007/Annotations/ImageSets/Main

總結

以上是生活随笔為你收集整理的XML--视频--人脸VOC的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇： 1、用Anaconda配置Windows
下一篇：海康摄像头