當前位置：首頁 > 人工智能 > pytorch >内容正文

pytorch

人像抠图——基于深度学习一键去除视频背景

發布時間：2023/12/20 pytorch 55 豆豆

生活随笔收集整理的這篇文章主要介紹了人像抠图——基于深度学习一键去除视频背景小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

前言

1.摳圖技術應用很廣泛，比如證件照，美體，人體區域特殊處理，還有B站的字幕穿人效果等等。這些的關鍵技術都在于高精度高性能的分割算法。RobustVideoMatting是來自字節跳動視頻人像摳圖算法（RVM），專為穩定人物視頻摳像設計。不同于現有神經網絡將每一幀作為單獨圖片處理，RVM 使用循環神經網絡，在處理視頻流時有時間記憶。RVM 可在任意視頻上做實時高清人像摳圖。
2.關于RobustVideoMatting算法和模型訓練步驟可以直接轉到官方的git:https://github.com/PeterL1n/RobustVideoMatting。這里只實現模型的C++推理與部署。
3.使用的開發環境是win10,顯卡RTX3080，cuda11.2，cudnn8.1，OpenCV4.5,onnxruntime，IDE 是Vs2019。

一、模型與依賴

1.官方公布了很多種格式的模型，有mnn,ncnn，onnx等等，這里使用的是onnx這個模型，直接從這官方公布的地址（gym7）下載就可以了。

2.onnxruntime直接下載官方編譯好的release版本就可以使用了。

3.如果想用GPU進行推理，則要下載安裝cuda，cudnn，具體安裝方法網上有很多種，可以參考。

二、代碼

1.推理代碼

typedef struct MattingContentType {Mat fgr_mat; Mat pha_mat;Mat merge_mat;bool flag;MattingContentType() : flag(false){}; } MattingContent;class RobustVideoMatting { public:RobustVideoMatting(string model_path);void detect(const Mat& mat, MattingContent& content, float downsample_ratio); private:Session* session_;Env env = Env(ORT_LOGGING_LEVEL_ERROR, "robustvideomatting");SessionOptions sessionOptions = SessionOptions();unsigned int num_inputs = 6;vector<const char*> input_node_names = {"src","r1i","r2i","r3i","r4i","downsample_ratio"};vector<vector<int64_t>> dynamic_input_node_dims = {{1, 3, 1280, 720}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1},{1, 1, 1, 1},{1} };unsigned int num_outputs = 6;vector<const char*> output_node_names = {"fgr","pha","r1o","r2o","r3o","r4o"};vector<float> dynamic_src_value_handler;vector<float> dynamic_r1i_value_handler = { 0.0f }; vector<float> dynamic_r2i_value_handler = { 0.0f };vector<float> dynamic_r3i_value_handler = { 0.0f };vector<float> dynamic_r4i_value_handler = { 0.0f };vector<float> dynamic_dsr_value_handler = { 0.25f }; int64_t value_size_of(const std::vector<int64_t>& dims);bool context_is_update = false;void normalize_(Mat img, vector<float>& output);vector<Ort::Value> transform(const Mat& mat);void generate_matting(vector<Ort::Value>& output_tensors, MattingContent& content);void update_context(vector<Ort::Value>& output_tensors); };RobustVideoMatting::RobustVideoMatting(string model_path) {wstring widestr = wstring(model_path.begin(), model_path.end());sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_EXTENDED);session_ = new Session(env, widestr.c_str(), sessionOptions); }void RobustVideoMatting::normalize_(Mat img, vector<float>& output) {int row = img.rows;int col = img.cols;for (int c = 0; c < 3; c++){for (int i = 0; i < row; i++) {for (int j = 0; j < col; j++) {float pix = img.ptr<uchar>(i)[j * 3 + 2 - c]; output[c * row * col + i * col + j] = pix / 255.0;}}} }int64_t RobustVideoMatting::value_size_of(const std::vector<int64_t>& dims) {if (dims.empty()) return 0;int64_t value_size = 1;for (const auto& size : dims) value_size *= size;return value_size; }vector<Ort::Value> RobustVideoMatting::transform(const Mat& mat) {Mat src = mat.clone();const unsigned int img_height = mat.rows;const unsigned int img_width = mat.cols;vector<int64_t>& src_dims = dynamic_input_node_dims.at(0); src_dims.at(2) = img_height;src_dims.at(3) = img_width;std::vector<int64_t>& r1i_dims = dynamic_input_node_dims.at(1); std::vector<int64_t>& r2i_dims = dynamic_input_node_dims.at(2); std::vector<int64_t>& r3i_dims = dynamic_input_node_dims.at(3); std::vector<int64_t>& r4i_dims = dynamic_input_node_dims.at(4);std::vector<int64_t>& dsr_dims = dynamic_input_node_dims.at(5); int64_t src_value_size = this->value_size_of(src_dims); int64_t r1i_value_size = this->value_size_of(r1i_dims); int64_t r2i_value_size = this->value_size_of(r2i_dims); int64_t r3i_value_size = this->value_size_of(r3i_dims); int64_t r4i_value_size = this->value_size_of(r4i_dims); int64_t dsr_value_size = this->value_size_of(dsr_dims); dynamic_src_value_handler.resize(src_value_size);this->normalize_(src, dynamic_src_value_handler);std::vector<Ort::Value> input_tensors;auto allocator_info = MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);input_tensors.push_back(Value::CreateTensor<float>(allocator_info, dynamic_src_value_handler.data(), dynamic_src_value_handler.size(), src_dims.data(), src_dims.size()));input_tensors.push_back(Value::CreateTensor<float>(allocator_info, dynamic_r1i_value_handler.data(), r1i_value_size, r1i_dims.data(), r1i_dims.size()));input_tensors.push_back(Value::CreateTensor<float>(allocator_info, dynamic_r2i_value_handler.data(), r2i_value_size, r2i_dims.data(), r2i_dims.size()));input_tensors.push_back(Value::CreateTensor<float>(allocator_info, dynamic_r3i_value_handler.data(), r3i_value_size, r3i_dims.data(), r3i_dims.size()));input_tensors.push_back(Value::CreateTensor<float>(allocator_info, dynamic_r4i_value_handler.data(), r4i_value_size, r4i_dims.data(), r4i_dims.size()));input_tensors.push_back(Value::CreateTensor<float>(allocator_info, dynamic_dsr_value_handler.data(), dsr_value_size, dsr_dims.data(), dsr_dims.size()));return input_tensors; }void RobustVideoMatting::generate_matting(std::vector<Ort::Value>& output_tensors, MattingContent& content) {Ort::Value& fgr = output_tensors.at(0); Ort::Value& pha = output_tensors.at(1); auto fgr_dims = fgr.GetTypeInfo().GetTensorTypeAndShapeInfo().GetShape();auto pha_dims = pha.GetTypeInfo().GetTensorTypeAndShapeInfo().GetShape();const unsigned int height = fgr_dims.at(2);const unsigned int width = fgr_dims.at(3); const unsigned int channel_step = height * width;float* fgr_ptr = fgr.GetTensorMutableData<float>();float* pha_ptr = pha.GetTensorMutableData<float>();Mat rmat(height, width, CV_32FC1, fgr_ptr);Mat gmat(height, width, CV_32FC1, fgr_ptr + channel_step);Mat bmat(height, width, CV_32FC1, fgr_ptr + 2 * channel_step);Mat pmat(height, width, CV_32FC1, pha_ptr);rmat *= 255.;bmat *= 255.;gmat *= 255.;Mat rest = 1. - pmat;Mat mbmat = bmat.mul(pmat) + rest * 153.;Mat mgmat = gmat.mul(pmat) + rest * 255.;Mat mrmat = rmat.mul(pmat) + rest * 120.;std::vector<Mat> fgr_channel_mats, merge_channel_mats;fgr_channel_mats.push_back(bmat);fgr_channel_mats.push_back(gmat);fgr_channel_mats.push_back(rmat);merge_channel_mats.push_back(mbmat);merge_channel_mats.push_back(mgmat);merge_channel_mats.push_back(mrmat);content.pha_mat = pmat;merge(fgr_channel_mats, content.fgr_mat);merge(merge_channel_mats, content.merge_mat);content.fgr_mat.convertTo(content.fgr_mat, CV_8UC3);content.merge_mat.convertTo(content.merge_mat, CV_8UC3);content.flag = true; }void RobustVideoMatting::update_context(std::vector<Ort::Value>& output_tensors) {Ort::Value& r1o = output_tensors.at(2);Ort::Value& r2o = output_tensors.at(3); Ort::Value& r3o = output_tensors.at(4); Ort::Value& r4o = output_tensors.at(5); auto r1o_dims = r1o.GetTypeInfo().GetTensorTypeAndShapeInfo().GetShape();auto r2o_dims = r2o.GetTypeInfo().GetTensorTypeAndShapeInfo().GetShape();auto r3o_dims = r3o.GetTypeInfo().GetTensorTypeAndShapeInfo().GetShape();auto r4o_dims = r4o.GetTypeInfo().GetTensorTypeAndShapeInfo().GetShape();dynamic_input_node_dims.at(1) = r1o_dims;dynamic_input_node_dims.at(2) = r2o_dims;dynamic_input_node_dims.at(3) = r3o_dims;dynamic_input_node_dims.at(4) = r4o_dims;int64_t new_r1i_value_size = this->value_size_of(r1o_dims); int64_t new_r2i_value_size = this->value_size_of(r2o_dims); int64_t new_r3i_value_size = this->value_size_of(r3o_dims);int64_t new_r4i_value_size = this->value_size_of(r4o_dims); dynamic_r1i_value_handler.resize(new_r1i_value_size);dynamic_r2i_value_handler.resize(new_r2i_value_size);dynamic_r3i_value_handler.resize(new_r3i_value_size);dynamic_r4i_value_handler.resize(new_r4i_value_size);float* new_r1i_value_ptr = r1o.GetTensorMutableData<float>();float* new_r2i_value_ptr = r2o.GetTensorMutableData<float>();float* new_r3i_value_ptr = r3o.GetTensorMutableData<float>();float* new_r4i_value_ptr = r4o.GetTensorMutableData<float>();std::memcpy(dynamic_r1i_value_handler.data(), new_r1i_value_ptr, new_r1i_value_size * sizeof(float));std::memcpy(dynamic_r2i_value_handler.data(), new_r2i_value_ptr, new_r2i_value_size * sizeof(float));std::memcpy(dynamic_r3i_value_handler.data(), new_r3i_value_ptr, new_r3i_value_size * sizeof(float));std::memcpy(dynamic_r4i_value_handler.data(), new_r4i_value_ptr, new_r4i_value_size * sizeof(float));context_is_update = true; }void RobustVideoMatting::detect(const Mat& mat, MattingContent& content, float downsample_ratio) {if (mat.empty()) return;dynamic_dsr_value_handler.at(0) = downsample_ratio;std::vector<Ort::Value> input_tensors = this->transform(mat);auto output_tensors = session_->Run(Ort::RunOptions{ nullptr }, input_node_names.data(),input_tensors.data(), num_inputs, output_node_names.data(),num_outputs);this->generate_matting(output_tensors, content);context_is_update = false; this->update_context(output_tensors); }

2.對圖像中的人像進行摳圖：

void detect_image(const cv::Mat& cv_src, string model_path) {const float downsample_ratio = 0.2;RobustVideoMatting rvm(model_path);MattingContent content;rvm.detect(cv_src, content, downsample_ratio);namedWindow("src", 0);imshow("src", cv_src);namedWindow("matting", 0);imshow("matting", content.pha_mat * 255.);namedWindow("merge", 0);imshow("merge", content.merge_mat);waitKey(0); }

執行效果：

3.對視頻進行人像摳圖

void detect_video(const std::string video_path, string model_path) {const float downsample_ratio = 0.25;RobustVideoMatting rvm(model_path);cv::VideoCapture video_capture(video_path);const unsigned int width = video_capture.get(cv::CAP_PROP_FRAME_WIDTH);const unsigned int height = video_capture.get(cv::CAP_PROP_FRAME_HEIGHT);const unsigned int frame_count = video_capture.get(cv::CAP_PROP_FRAME_COUNT);if (!video_capture.isOpened()){return;}cv::Mat cv_src;while (video_capture.read(cv_src)){MattingContent content;rvm.detect(cv_src, content, downsample_ratio);namedWindow("src", 0);imshow("src", cv_src);namedWindow("matting", WINDOW_NORMAL);imshow("matting", content.pha_mat);namedWindow("merge", WINDOW_NORMAL);imshow("merge", content.merge_mat);waitKey(10);} }

執行效果：

三、源碼

1.源碼地址：https://download.csdn.net/download/matt45m/86821062
2.下載源碼后解壓，使用vs2019打開，images和video是測試的圖像和視頻，lib里面有使用到的依賴庫，include依賴的頭文件。

3.配置include和lib路徑

4.添加lib，把lib目錄下所有.lib后綴的添加到依賴項。

5.運行配置

總結

以上是生活随笔為你收集整理的人像抠图——基于深度学习一键去除视频背景的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇：【技巧】ApiPost生成word格式的
下一篇：深度学习机器学习思维导图