當(dāng)前位置：首頁 > 人文社科 > 生活经验 >内容正文

生活经验

TensorRT Samples: GoogleNet

發(fā)布時間：2023/11/27 生活经验 48 豆豆

生活随笔收集整理的這篇文章主要介紹了 TensorRT Samples: GoogleNet 小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.

關(guān)于TensorRT的介紹可以參考：? http://blog.csdn.net/fengbingchun/article/details/78469551 ?

以下是參考TensorRT 2.1.2中的sampleGoogleNet.cpp文件改寫的測試代碼，文件(googlenet.cpp)內(nèi)容如下：

#include <iostream>
#include <tuple>
#include <string>
#include <vector>
#include <algorithm>#include <cuda_runtime_api.h>
#include <NvInfer.h>
#include <NvCaffeParser.h>#include "common.hpp"// reference: TensorRT-2.1.2/samples/sampleMNIST/sampleGoogleNet.cppnamespace {
// batch size, timing iterations, input blob name, output blob name, deploy file, model file
typedef std::tuple<int, int, std::string, std::string, std::string , std::string> DATA_INFO;  struct Profiler : public nvinfer1::IProfiler {typedef std::pair<std::string, float> Record;std::vector<Record> mProfile;int timing_iterations {1};void setTimeIterations(int iteration){timing_iterations = iteration;}virtual void reportLayerTime(const char* layerName, float ms){auto record = std::find_if(mProfile.begin(), mProfile.end(), [&](const Record& r){ return r.first == layerName; });if (record == mProfile.end())mProfile.push_back(std::make_pair(layerName, ms));elserecord->second += ms;}void printLayerTimes(){float totalTime = 0;for (size_t i = 0; i < mProfile.size(); ++i) {fprintf(stdout, "%s %4.3fms\n", mProfile[i].first.c_str(), mProfile[i].second / timing_iterations);totalTime += mProfile[i].second;}fprintf(stdout, "Time over all layers: %4.3f\n", totalTime / timing_iterations);}};int caffeToGIEModel(const std::string& deployFile,		// name for caffe prototxtconst std::string& modelFile,				// name for model const std::vector<std::string>& outputs,   // network outputsunsigned int maxBatchSize,					// batch size - NB must be at least as large as the batch we want to run with)nvinfer1::IHostMemory *&gieModelStream, Logger logger)
{// create API root class - must span the lifetime of the engine usagenvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(logger);nvinfer1::INetworkDefinition* network = builder->createNetwork();// parse the caffe model to populate the network, then set the outputsnvcaffeparser1::ICaffeParser* parser = nvcaffeparser1::createCaffeParser();bool useFp16 = builder->platformHasFastFp16();nvinfer1::DataType modelDataType = useFp16 ? nvinfer1::DataType::kHALF : nvinfer1::DataType::kFLOAT; // create a 16-bit model if it's natively supportedconst nvcaffeparser1::IBlobNameToTensor* blobNameToTensor = parser->parse(deployFile.c_str(), modelFile.c_str(), *network, modelDataType);CHECK(blobNameToTensor != nullptr);// the caffe file has no notion of outputs, so we need to manually say which tensors the engine should generate	for (auto& s : outputs)network->markOutput(*blobNameToTensor->find(s.c_str()));// Build the enginebuilder->setMaxBatchSize(maxBatchSize);builder->setMaxWorkspaceSize(16 << 20);// set up the network for paired-fp16 format if availableif(useFp16)builder->setHalf2Mode(true);nvinfer1::ICudaEngine* engine = builder->buildCudaEngine(*network);CHECK(engine != nullptr);// we don't need the network any more, and we can destroy the parsernetwork->destroy();parser->destroy();// serialize the engine, then close everything downgieModelStream = engine->serialize();engine->destroy();builder->destroy();nvcaffeparser1::shutdownProtobufLibrary();return 0;
}int timeInference(nvinfer1::ICudaEngine* engine, const DATA_INFO& info, Profiler* profiler)
{// input and output buffer pointers that we pass to the engine - the engine requires exactly ICudaEngine::getNbBindings(),// of these, but in this case we know that there is exactly one input and one output.CHECK(engine->getNbBindings() == 2);void* buffers[2];// In order to bind the buffers, we need to know the names of the input and output tensors.// note that indices are guaranteed to be less than ICudaEngine::getNbBindings()int inputIndex = engine->getBindingIndex(std::get<2>(info).c_str()), outputIndex = engine->getBindingIndex(std::get<3>(info).c_str());// allocate GPU buffersnvinfer1::DimsCHW inputDims = static_cast<nvinfer1::DimsCHW&&>(engine->getBindingDimensions(inputIndex)), outputDims = static_cast<nvinfer1::DimsCHW&&>(engine->getBindingDimensions(outputIndex));size_t inputSize = std::get<0>(info) * inputDims.c() * inputDims.h() * inputDims.w() * sizeof(float);size_t outputSize = std::get<0>(info) * outputDims.c() * outputDims.h() * outputDims.w() * sizeof(float);cudaMalloc(&buffers[inputIndex], inputSize);cudaMalloc(&buffers[outputIndex], outputSize);nvinfer1::IExecutionContext* context = engine->createExecutionContext();context->setProfiler(profiler);// zero the input buffercudaMemset(buffers[inputIndex], 0, inputSize);for (int i = 0; i < std::get<1>(info); ++i)context->execute(std::get<0>(info), buffers);// release the context and bufferscontext->destroy();cudaFree(buffers[inputIndex]);cudaFree(buffers[outputIndex]);return 0;
}} // namespaceint test_googlenet()
{fprintf(stdout, "Building and running a GPU inference engine for GoogleNet, N=4...\n");// stuff we know about the network and the caffe input/output blobsDATA_INFO info(4, 1000, "data", "prob", "models/googlenet.prototxt", "models/googlenet.caffemodel");Logger logger;// parse the caffe model and the mean filenvinfer1::IHostMemory* gieModelStream{ nullptr };caffeToGIEModel(std::get<4>(info), std::get<5>(info), std::vector<std::string>{std::get<3>(info)}, std::get<0>(info), gieModelStream, logger);// create an enginenvinfer1::IRuntime* infer = nvinfer1::createInferRuntime(logger);nvinfer1::ICudaEngine* engine = infer->deserializeCudaEngine(gieModelStream->data(), gieModelStream->size(), nullptr);fprintf(stdout, "Bindings after deserializing:\n"); for (int bi = 0; bi < engine->getNbBindings(); bi++) { if (engine->bindingIsInput(bi) == true) { fprintf(stdout, "Binding %d (%s): Input.\n",  bi, engine->getBindingName(bi)); } else { fprintf(stdout, "Binding %d (%s): Output.\n", bi, engine->getBindingName(bi)); } } Profiler profiler;profiler.setTimeIterations(std::get<1>(info));// run inference with null data to time network performancetimeInference(engine,  info, &profiler);engine->destroy();infer->destroy();profiler.printLayerTimes();fprintf(stdout, "Done.\n");return 0;
}

執(zhí)行結(jié)果如下：

測試代碼編譯步驟如下(ReadMe.txt)：

在Linux下通過CMake編譯TensorRT_Test中的測試代碼步驟：
1. 將終端定位到CUDA_Test/prj/linux_tensorrt_cmake，依次執(zhí)行如下命令：$ mkdir build$ cd build$ cmake ..$ make (生成TensorRT_Test執(zhí)行文件)$ ln -s ../../../test_data/models  ./ (將models目錄軟鏈接到build目錄下)$ ln -s ../../../test_data/images  ./ (將images目錄軟鏈接到build目錄下)$ ./TensorRT_Test
2. 對于有需要用OpenCV參與的讀取圖像的操作，需要先將對應(yīng)文件中的圖像路徑修改為Linux支持的路徑格式

GitHub:?https://github.com/fengbingchun/CUDA_Test

總結(jié)

以上是生活随笔為你收集整理的TensorRT Samples: GoogleNet的全部內(nèi)容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網(wǎng)站內(nèi)容還不錯，歡迎將生活随笔推薦給好友。

上一篇： TensorRT Samples: MN
下一篇： TensorRT Samples: Ch