TensorRT未使用float16或如何检查？

如何解决TensorRT未使用float16或如何检查？

我非常怀疑precision_mode='FP16'什么也不做（tf 1.15）。 .pb文件的大小没有变化，但是在读取this question后发现权重可能仍然是float32，而使用float16进行计算时，我试图检查张量。

我们在这里创建keras模型

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import backend as K
import numpy as np

from tensorflow.python.platform import gfile
from tensorflow.python.framework import graph_io



inp = keras.layers.Input(shape=(None,None,3))
x = keras.layers.Conv2D(64,3,padding='same')(inp)
out = keras.layers.Conv2D(3,padding='same')(x)
model = keras.Model([inp],[out])

model.compile(optimizer='adam',loss='mse')


input_name = model.inputs[0].name
output_name = model.outputs[0].name



print(input_name)
print(output_name)
'''
input_1:0
conv2d_1/BiasAdd:0
'''


# -------------------- SAVING

sess = K.get_session()

output_name = output_name.split(":")[0]

with sess.graph.as_default() as graph:

    input_graph_def = graph.as_graph_def()

    output_graph_def = tf.graph_util.convert_variables_to_constants(
        sess,# The session
        input_graph_def,# input_graph_def is useful for retrieving the nodes
        output_node_names = [output_name]) #[node.name for node in input_graph_def.node]  )

    #write the graph
    graph_io.write_graph(output_graph_def,'','model.pb',as_text=False)

然后使用precision_mode='FP16'将其转换为张量：

import tensorflow as tf
#from tensorflow.contrib import tensorrt as trt
from tensorflow.python.compiler.tensorrt import trt_convert as trt

tf.flags.DEFINE_bool('use_float16',True,'Whether we want to quantize it to float16.')

output_names = ['conv2d_1/BiasAdd']

def load_graph(file):
   with tf.gfile.GFile(file,'rb') as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())
    with tf.Graph().as_default() as graph:
      tf.import_graph_def(graph_def)
      return graph,graph_def


graph,graph_def = load_graph('model.pb')
tensorrt_graph = trt.create_inference_graph(graph_def,outputs=output_names,max_batch_size=1,precision_mode='FP16')
with tf.gfile.GFile('trt_model.pb','wb') as f:
    f.write(tensorrt_graph.SerializetoString())

转换日志：

2020-10-21 15:54:14.659757: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1304] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 3693 MB memory) -> physical GPU (device: 0,name: GeForce GTX 1050,pci bus id: 0000:01:00.0,compute capability: 6.1)
2020-10-21 15:54:14.661494: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x562666640c80 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2020-10-21 15:54:14.661507: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): GeForce GTX 1050,Compute Capability 6.1
2020-10-21 15:54:14.669536: I tensorflow/core/grappler/optimizers/Meta_optimizer.cc:786] Optimization results for grappler item: tf_graph
2020-10-21 15:54:14.669560: I tensorflow/core/grappler/optimizers/Meta_optimizer.cc:788]   constant_folding: Graph size after: 9 nodes (-4),8 edges (-4),time = 1.469ms.
2020-10-21 15:54:14.669569: I tensorflow/core/grappler/optimizers/Meta_optimizer.cc:788]   layout: Graph size after: 13 nodes (4),12 edges (4),time = 0.588ms.
2020-10-21 15:54:14.669575: I tensorflow/core/grappler/optimizers/Meta_optimizer.cc:788]   constant_folding: Graph size after: 13 nodes (0),12 edges (0),time = 1.32ms.
2020-10-21 15:54:14.669582: I tensorflow/core/grappler/optimizers/Meta_optimizer.cc:788]   constant_folding: Graph size after: 13 nodes (0),time = 0.784ms.

并加载它，打印张量的类型

from PIL import Image
import numpy as np

import tensorflow as tf

from tensorflow.core.framework import types_pb2,graph_pb2,attr_value_pb2
from tensorflow.tools.graph_transforms import TransformGraph
from google.protobuf import text_format

#tf.flags.DEFINE_bool('use_float16','Whether we want to quantize it to float16.')


def load_graph(model_path):
    graph = tf.Graph()
    with graph.as_default():
        graph_def = tf.GraphDef()
        if model_path.endswith("pb"):
            with open(model_path,"rb") as f:
                graph_def.ParseFromString(f.read())
        else:
            with open(model_path,"r") as pf:
                text_format.Parse(pf.read(),graph_def)
        tf.import_graph_def(graph_def,name="")
        sess = tf.Session(graph=graph)
        return sess,graph


sess,graph = load_graph('trt_model.pb')

input_name = 'input_1:0'
output_name = 'conv2d_1/BiasAdd:0'

print('---------------Done---------------')

#Test model



test_img_orig = Image.open('test.jpg').convert('RGB')
test_img_orig = (np.array(test_img_orig)/255.).astype(np.float16)

print(sess.graph.get_tensor_by_name(input_name))
print(sess.graph.get_tensor_by_name(output_name))


output_tensor = sess.graph.get_tensor_by_name(output_name)
output = sess.run(output_tensor,{input_name: test_img_orig[np.newaxis,...]})

print(sess.graph.get_tensor_by_name(input_name))
print(sess.graph.get_tensor_by_name(output_name))

结果是

Tensor("input_1:0",shape=(?,?,3),dtype=float32)
Tensor("conv2d_1/BiasAdd:0",dtype=float32)

这意味着模型是float32。如何使用Tensorrt量化float16的模型？