如何解决在 TensorRT 中读取 pb 文件和在 C++ 中读取 NetFromTensorflow 时出错
我有 Python 代码以及 TensorRT 和 Docker 容器 20.03,它有 CUDA 10.2 和 TensorRT 7.0.0
from __future__ import print_function
import warnings
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from imutils.paths import list_images
from keras import backend as K
from keras.callbacks import CSVLogger
from keras.layers import *
from keras.models import Model
from keras.optimizers import Adam
from scipy.io import loadmat
from scipy.misc import imread
from skimage.io import imsave
from skimage.transform import resize
from scipy.io import loadmat
import cv2
import tensorflow as tf
Ny=1024
target_size = (Ny,Ny)
warnings.filterwarnings('ignore')
LR= 1e-4
E,BS = 2,4
def get_unet(img_rows,img_cols):
inputs = Input((img_rows,img_cols,1))
conv1 = Conv2D(32,(3,3),activation='relu',padding='same')(inputs)
conv1 = Batchnormalization()(conv1)
conv1 = Conv2D(32,padding='same')(conv1)
conv1 = Batchnormalization()(conv1)
pool1 = MaxPooling2D(pool_size=(2,2))(conv1)
conv2 = Conv2D(64,padding='same')(pool1)
conv2 = Batchnormalization()(conv2)
conv2 = Conv2D(64,padding='same')(conv2)
conv2 = Batchnormalization()(conv2)
pool2 = MaxPooling2D(pool_size=(2,2))(conv2)
conv3 = Conv2D(128,padding='same')(pool2)
conv3 = Batchnormalization()(conv3)
conv3 = Conv2D(128,padding='same')(conv3)
conv3 = Batchnormalization()(conv3)
pool3 = MaxPooling2D(pool_size=(2,2))(conv3)
conv4 = Conv2D(256,padding='same')(pool3)
conv4 = Batchnormalization()(conv4)
conv4 = Conv2D(256,padding='same')(conv4)
conv4 = Batchnormalization()(conv4)
pool4 = MaxPooling2D(pool_size=(2,2))(conv4)
conv5 = Conv2D(512,padding='same')(pool4)
conv5 = Batchnormalization()(conv5)
conv5 = Conv2D(512,padding='same')(conv5)
conv5 = Batchnormalization()(conv5)
conv5 = Dropout(0.5)(conv5)
up6 = concatenate([UpSampling2D(size=(2,2))(conv5),conv4],axis=3)
conv6 = Conv2D(256,padding='same')(up6)
conv6 = Conv2D(256,padding='same')(conv6)
up7 = concatenate([UpSampling2D(size=(2,2))(conv6),conv3],axis=3)
conv7 = Conv2D(128,padding='same')(up7)
conv7 = Conv2D(128,padding='same')(conv7)
up8 = concatenate([UpSampling2D(size=(2,2))(conv7),conv2],axis=3)
conv8 = Conv2D(64,padding='same')(up8)
conv8 = Conv2D(64,padding='same')(conv8)
up9 = concatenate([UpSampling2D(size=(2,2))(conv8),conv1],axis=3)
conv9 = Conv2D(32,padding='same')(up9)
conv9 = Conv2D(32,padding='same')(conv9)
conv10 = Conv2D(1,(1,1),activation='sigmoid')(conv9)
'''
def get_unet(img_rows,img_cols):
inputs = Input((img_rows,1))
conv9=inputs
conv10 = Conv2D(1,activation='sigmoid')(conv9)
'''
model = Model(inputs=[inputs],outputs=[conv10])
model.compile(optimizer=Adam(lr=LR),loss='binary_crossentropy',metrics=['binary_crossentropy'])
return model
train_images=np.zeros([1,1024,1],dtype=float)
annot_train=np.zeros([1,dtype=float)
test_images=np.zeros([1,dtype=float)
annot_test=np.zeros([1,dtype=float)
img = cv2.imread('owlResized.bmp',0)/255.0
label = cv2.imread('owlResized.bmp',0)/255.0
train_images[0,:,0],annot_train[0,0] =img,label
test_images[0,annot_test[0,label
print("finished reading")
C = np.concatenate([annot_test,annot_train])
I = np.concatenate([test_images,train_images])
unet = get_unet(Ny,Ny)
filepath="W-E_{epoch:02d}-L_{loss:.6f}"+outName+".h5"
history = unet.fit(I,C,verbose=2,epochs=E,batch_size=BS,validation_split=0.1)
#save pb
unet.save('owlSimple',overwrite=True,include_optimizer=False,save_format='tf')
然后呢
python3 -m tf2onnx.convert --opset 12 --saved-model ./owlSimple --output owlSimple12.onnx
我还尝试了 9、10、11 的 opset。他们似乎都成功转换了
当我尝试在 TensorRT 中运行 opsets 11 和 12 时,我会得到如下错误:
ERROR: ModelImporter.cpp:92 In function parseGraph:
[8] Assertion Failed: convertOnnxWeights(initializer,&weights,ctx)
ERROR: Could not parse the model.
setBindingDimensions
Segmentation fault (core dumped)
使用 opset 10,我得到:
While parsing node number 45 [Resize]:
ERROR: builtin_op_importers.cpp:2412 In function importResize:
[8] Assertion Failed: scales.is_weights() && "Resize scales must be an initializer!"
ERROR: Could not parse the model.
我得到了 Opset9
While parsing node number 45 [Upsample]:
ERROR: builtin_op_importers.cpp:3240 In function importUpsample:
[8] Assertion Failed: scales_input.is_weights()
ERROR: Could not parse the model.
即使我这样做了,我也遇到了这些错误
wget https://raw.githubusercontent.com/rmccorm4/tensorrt-utils/master/OSS/build_OSS.sh
source build_OSS.sh
如果我通过用
替换Unet来进一步简化Unetinputs = Input((img_rows,1))
conv9=inputs
conv10 = Conv2D(1,activation='sigmoid')(conv9)
如上面注释掉的代码所示,然后在 TensorRT 中运行各种操作集时出现这些错误:
input_2:0: dynamic input is missing dimensions in profile 0.
Network validation Failed.
Segmentation fault (core dumped)
我使用的 TensorRT C++ 是(它基于来自 https://www.learnopencv.com/how-to-run-inference-using-tensorrt-c-api/ 的代码):
#include <iostream>
#include <fstream>
#include <NvInfer.h>
#include <NvInferRuntime.h>
#include <memory>
#include <NvOnnxParser.h>
#include <vector>
#include <cuda_runtime_api.h>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/core/cuda.hpp>
#include <opencv2/cudawarping.hpp>
#include <opencv2/core.hpp>
#include <opencv2/cudaarithm.hpp>
#include <algorithm>
#include <numeric>
#include <opencv2/dnn/dnn.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/cudaimgproc.hpp>
using namespace cv;
// utilities ----------------------------------------------------------------------------------------------------------
// class to log errors,warnings,and other information during the build and inference phases
class Logger : public nvinfer1::ILogger
{
public:
void log(Severity severity,const char* msg) override {
// remove this 'if' if you need more logged info
if ((severity == Severity::kERROR) || (severity == Severity::kINTERNAL_ERROR)) {
std::cout << msg << "\n";
}
}
} gLogger;
// destroy TensorRT objects if something goes wrong
struct TRTDestroy
{
template <class T>
void operator()(T* obj) const
{
if (obj)
{
obj->destroy();
}
}
};
template <class T>
using TRTUniquePtr = std::unique_ptr<T,TRTDestroy>;
// calculate size of tensor
size_t getSizeByDim(const nvinfer1::Dims& dims)
{
size_t size = 1;
for (size_t i = 0; i < dims.nbDims; ++i)
{
size *= dims.d[i];
}
return size;
}
// get classes names
std::vector<std::string> getClassNames(const std::string& imagenet_classes)
{
std::ifstream classes_file(imagenet_classes);
std::vector<std::string> classes;
if (!classes_file.good())
{
std::cerr << "ERROR: can't read file with classes names.\n";
return classes;
}
std::string class_name;
while (std::getline(classes_file,class_name))
{
classes.push_back(class_name);
}
return classes;
}
// preprocessing stage ------------------------------------------------------------------------------------------------
void preprocessImage(const std::string& image_path,float* gpu_input,const nvinfer1::Dims& dims)
{
// read input image
cv::Mat frame = cv::imread(image_path);
if (frame.empty())
{
std::cerr << "Input image " << image_path << " load Failed\n";
return;
}
cv::cuda::GpuMat gpu_frame;
gpu_frame.upload(frame);
int channels=1;
auto input_width = dims.d[1];
auto input_height = dims.d[0];
auto input_size = cv::Size(input_width,input_height);
cv::cuda::GpuMat resized=gpu_frame;
//normalize
cv::cuda::GpuMat flt_image;
resized.convertTo(flt_image,CV_32FC1,1.f / 255.f);
std::vector<cv::cuda::GpuMat> chw;
for (size_t i = 0; i < channels; ++i)
{
chw.emplace_back(cv::cuda::GpuMat(input_size,gpu_input + i * input_width * input_height));
}
cv::cuda::split(flt_image,chw);
}
// post-processing stage ----------------------------------------------------------------------------------------------
void postprocessResults(float *gpu_output,const nvinfer1::Dims &dims,int batch_size)
{
std::vector<float> cpu_output(getSizeByDim(dims) * batch_size);
cudamemcpy(cpu_output.data(),gpu_output,cpu_output.size() * sizeof(float),cudamemcpyDevicetoHost);
auto rows = dims.d[1];
auto cols = dims.d[2];
cv::Mat Finalmat = cv::Mat(rows,cols,CV_32FC1); // initialize matrix of uchar of 1-channel where you will store vec data
//copy vector to mat
memcpy(Finalmat.data,cpu_output.data(),cpu_output.size()*sizeof(float));
Finalmat.convertTo(Finalmat,CV_8UC3,255.0);
cv::imwrite("trt_output.bmp",Finalmat);
}
// initialize TensorRT Context and Engine and parse ONNX model --------------------------------------------------------------------
void parSEOnnxModel(const std::string& model_path,TRTUniquePtr<nvinfer1::ICudaEngine>& engine,TRTUniquePtr<nvinfer1::IExecutionContext>& context)
{
nvinfer1::IBuilder *builder = nvinfer1::createInferBuilder(gLogger);
const auto explicitBatch = 1U << static_cast<uint32_t>(nvinfer1::NetworkDeFinitionCreationFlag::kEXPLICIT_BATCH);
nvinfer1::INetworkDeFinition *network = builder->createNetworkV2(explicitBatch);
TRTUniquePtr<nvonnxparser::IParser> parser{nvonnxparser::createParser(*network,gLogger)};
// parse ONNX
if (!parser->parseFromFile(model_path.c_str(),static_cast<int>(nvinfer1::ILogger::Severity::kINFO)))
{
std::cerr << "ERROR: Could not parse the model.\n";
return;
}
//create Config to configure engine parameters such as max memory or set FP16 mode
TRTUniquePtr<nvinfer1::IBuilderConfig> config{builder->createBuilderConfig()};
nvinfer1::IOptimizationProfile* profile = builder->createOptimizationProfile();
profile->setDimensions("input_1",nvinfer1::OptProfileSelector::kMIN,nvinfer1::Dims4(1,1));
profile->setDimensions("input_1",nvinfer1::OptProfileSelector::kOPT,nvinfer1::OptProfileSelector::kMAX,1));
config->addOptimizationProfile(profile);
// allow TensorRT to use up to 1GB of GPU memory for tactic selection.
config->setMaxWorkspaceSize(1ULL << 30);
// use FP16 mode if possible
if (builder->platformHasFastFp16())
{
config->setFlag(nvinfer1::BuilderFlag::kFP16);
}
// we have only one image in batch
builder->setMaxBatchSize(1);
// generate TensorRT engine optimized for the target platform
engine.reset(builder->buildEngineWithConfig(*network,*config));
context.reset(engine->createExecutionContext());
}
// main pipeline ------------------------------------------------------------------------------------------------------
int main(int argc,char* argv[])
{
if (argc < 3)
{
std::cerr << "usage: " << argv[0] << " [model_name].onnx [image_name].jpg\n";
return -1;
}
std::string model_path(argv[1]);
std::string image_path(argv[2]);
int batch_size = 1;
TRTUniquePtr<nvinfer1::ICudaEngine> engine{nullptr};
TRTUniquePtr<nvinfer1::IExecutionContext> context{nullptr};
parSEOnnxModel(model_path,engine,context);
// get sizes of input and output and allocate memory required for input data and for output data
std::vector<nvinfer1::Dims> input_dims; // we expect only one input
std::vector<nvinfer1::Dims> output_dims; // and one output
context->setBindingDimensions(0,1));
//getNbBindings is number of binding indices
// buffers for input and output data
std::vector<void*> buffers(engine->getNbBindings());
for (size_t i = 0; i < engine->getNbBindings(); ++i)
{
if (engine->bindingIsInput(i))
{
input_dims.emplace_back(context->getBindingDimensions(i));
}
else
{
output_dims.emplace_back(context->getBindingDimensions(i));
}
auto binding_size = getSizeByDim(context->getBindingDimensions(i)) * batch_size * sizeof(float);
auto status = cudamalloc(&buffers[i],binding_size);
if (status)
std::cout<<"cudamalloc worked\n";
}
if (input_dims.empty() || output_dims.empty())
{
std::cerr << "Expect at least one input and one output for network\n";
return -1;
}
preprocessImage(image_path,(float *) buffers[0],input_dims[0]);
postprocessResults((float *) buffers[1],output_dims[0],batch_size);
for (void* buf : buffers)
{
cudaFree(buf);
}
return 0;
}
然后我尝试用 readNetFromTensorflow
打开 pb 文件。我在 saved_model.pb
和 /workspace/owlSimple/saved_model.pb
中有 /workspace
。在/workspace
,我尝试过
dnn:Net nt = cv::dnn::readNetFromTensorflow("saved_model.pb");
dnn:Net nt = cv::dnn::readNetFromTensorflow("owlSimple/saved_model.pb");
dnn:Net nt = cv::dnn::readNetFromTensorflow("./owlSimple/saved_model.pb");
dnn:Net nt = cv::dnn::readNetFromTensorflow("/workspace/owlSimple/saved_model.pb");
dnn:Net nt = cv::dnn::readNetFromTensorflow("/owlSimple/saved_model.pb");
但他们都给出了相同的错误
String field 'opencv_tensorflow.FunctionDef.Node.ret' contains invalid UTF-8 data when parsing a protocol buffer. Use the 'bytes' type if you intend to send raw bytes.
what(): OpenCV(4.5.1-pre) /workspace/opencv/modules/dnn/src/tensorflow/tf_io.cpp:42: error: (-2:Unspecified error) Failed: ReadProtoFromBinaryFile(param_file,param). Failed to parse GraphDef file: owlSimple/saved_model.pb in function 'ReadTFNetParamsFromBinaryFileOrDie'
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。