From 3e418bb2dc65fa5bf5c7e22ab5f3b9dca0cd0ecd Mon Sep 17 00:00:00 2001 From: Jetson Date: Wed, 18 Jan 2023 16:01:11 +0800 Subject: [PATCH] feat: TensorRT model & inference --- TensorRT_Model/convert_model.py | 70 ++++++++++++ common.py | 184 ++++++++++++++++++++++++++++++++ inference_rt.py | 45 ++++++++ 3 files changed, 299 insertions(+) create mode 100644 TensorRT_Model/convert_model.py create mode 100644 common.py create mode 100644 inference_rt.py diff --git a/TensorRT_Model/convert_model.py b/TensorRT_Model/convert_model.py new file mode 100644 index 0000000..6e2259c --- /dev/null +++ b/TensorRT_Model/convert_model.py @@ -0,0 +1,70 @@ +import multiprocessing + +keras_path = '../model_without_preprocess_finetuned.h5' +onnx_path = 'model.onnx' + +# convert to onnx +def keras2onnx(): + import os + + import onnx + import onnxmltools + import tensorflow as tf + from tensorflow import keras + + print('[*] Converting Keras Model to onnx') + + os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' + tf.get_logger().setLevel('ERROR') + + keras_model = keras.models.load_model(keras_path) + onnx_model = onnxmltools.convert_keras(keras_model) + + # one data at a time + onnx_model.graph.input[0].type.tensor_type.shape.dim[0].dim_value = 1 + onnx_model.graph.input[1].type.tensor_type.shape.dim[0].dim_value = 1 + onnx_model.graph.output[0].type.tensor_type.shape.dim[0].dim_value = 1 + + onnx.checker.check_model(onnx_model) + onnx.save(onnx_model, onnx_path) + + print(f'[*] onnx file saved as {onnx_path}') + +def onnx2rt(): + # convert to tensorrt + import tensorrt as trt + + TRT_LOGGER = trt.Logger(trt.Logger.ERROR) + EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + + trt_file = '../model.trt' + + print('[*] Converting onnx to tensorrt') + + with trt.Builder(TRT_LOGGER) as builder, builder.create_network(EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser: + with open(onnx_path, 'rb') as f: + if not parser.parse(f.read()): + print('ERROR: Failed to parse the ONNX file.') + for error in range(parser.num_errors): + print (parser.get_error(error)) + + config = builder.create_builder_config() + profile = builder.create_optimization_profile() + + config.add_optimization_profile(profile) + # config.flags = 1 << (int)(trt.BuilderFlag.DEBUG) + + with open(trt_file, "wb") as f: + f.write(builder.build_serialized_network(network, config)) + + print(f'[*] tensorrt file saved as {trt_file}') + +if __name__ == "__main__": + onnx = multiprocessing.Process(target=keras2onnx) + rt = multiprocessing.Process(target=onnx2rt) + + onnx.start() + onnx.join() + + rt.start() + rt.join() diff --git a/common.py b/common.py new file mode 100644 index 0000000..2af1547 --- /dev/null +++ b/common.py @@ -0,0 +1,184 @@ +# +# SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +import os + +import numpy as np +import pycuda.autoinit +import pycuda.driver as cuda +import tensorrt as trt + +try: + # Sometimes python does not understand FileNotFoundError + FileNotFoundError +except NameError: + FileNotFoundError = IOError + +EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + + +def GiB(val): + return val * 1 << 30 + + +def add_help(description): + parser = argparse.ArgumentParser(description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter) + args, _ = parser.parse_known_args() + + +def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[], err_msg=""): + """ + Parses sample arguments. + + Args: + description (str): Description of the sample. + subfolder (str): The subfolder containing data relevant to this sample + find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path. + + Returns: + str: Path of data directory. + """ + + # Standard command-line arguments for all samples. + kDEFAULT_DATA_ROOT = os.path.join(os.sep, "usr", "src", "tensorrt", "data") + parser = argparse.ArgumentParser(description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + "-d", + "--datadir", + help="Location of the TensorRT sample data directory, and any additional data directories.", + action="append", + default=[kDEFAULT_DATA_ROOT], + ) + args, _ = parser.parse_known_args() + + def get_data_path(data_dir): + # If the subfolder exists, append it to the path, otherwise use the provided path as-is. + data_path = os.path.join(data_dir, subfolder) + if not os.path.exists(data_path): + if data_dir != kDEFAULT_DATA_ROOT: + print("WARNING: " + data_path + " does not exist. Trying " + data_dir + " instead.") + data_path = data_dir + # Make sure data directory exists. + if not (os.path.exists(data_path)) and data_dir != kDEFAULT_DATA_ROOT: + print( + "WARNING: {:} does not exist. Please provide the correct data path with the -d option.".format( + data_path + ) + ) + return data_path + + data_paths = [get_data_path(data_dir) for data_dir in args.datadir] + return data_paths, locate_files(data_paths, find_files, err_msg) + + +def locate_files(data_paths, filenames, err_msg=""): + """ + Locates the specified files in the specified data directories. + If a file exists in multiple data directories, the first directory is used. + + Args: + data_paths (List[str]): The data directories. + filename (List[str]): The names of the files to find. + + Returns: + List[str]: The absolute paths of the files. + + Raises: + FileNotFoundError if a file could not be located. + """ + found_files = [None] * len(filenames) + for data_path in data_paths: + # Find all requested files. + for index, (found, filename) in enumerate(zip(found_files, filenames)): + if not found: + file_path = os.path.abspath(os.path.join(data_path, filename)) + if os.path.exists(file_path): + found_files[index] = file_path + + # Check that all files were found + for f, filename in zip(found_files, filenames): + if not f or not os.path.exists(f): + raise FileNotFoundError( + "Could not find {:}. Searched in data paths: {:}\n{:}".format(filename, data_paths, err_msg) + ) + return found_files + + +# Simple helper data class that's a little nicer to use than a 2-tuple. +class HostDeviceMem(object): + def __init__(self, host_mem, device_mem): + self.host = host_mem + self.device = device_mem + + def __str__(self): + return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device) + + def __repr__(self): + return self.__str__() + + +# Allocates all buffers required for an engine, i.e. host/device inputs/outputs. +def allocate_buffers(engine): + inputs = [] + outputs = [] + bindings = [] + stream = cuda.Stream() + for binding in engine: + size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size + dtype = trt.nptype(engine.get_binding_dtype(binding)) + # Allocate host and device buffers + host_mem = cuda.pagelocked_empty(size, dtype) + device_mem = cuda.mem_alloc(host_mem.nbytes) + # Append the device buffer to device bindings. + bindings.append(int(device_mem)) + # Append to the appropriate list. + if engine.binding_is_input(binding): + inputs.append(HostDeviceMem(host_mem, device_mem)) + else: + outputs.append(HostDeviceMem(host_mem, device_mem)) + return inputs, outputs, bindings, stream + + +# This function is generalized for multiple inputs/outputs. +# inputs and outputs are expected to be lists of HostDeviceMem objects. +def do_inference(context, bindings, inputs, outputs, stream, batch_size=1): + # Transfer input data to the GPU. + [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs] + # Run inference. + context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle) + # Transfer predictions back from the GPU. + [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs] + # Synchronize the stream + stream.synchronize() + # Return only the host outputs. + return [out.host for out in outputs] + + +# This function is generalized for multiple inputs/outputs for full dimension networks. +# inputs and outputs are expected to be lists of HostDeviceMem objects. +def do_inference_v2(context, bindings, inputs, outputs, stream): + # Transfer input data to the GPU. + [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs] + # Run inference. + context.execute_async_v2(bindings=bindings, stream_handle=stream.handle) + # Transfer predictions back from the GPU. + [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs] + # Synchronize the stream + stream.synchronize() + # Return only the host outputs. + return [out.host for out in outputs] diff --git a/inference_rt.py b/inference_rt.py new file mode 100644 index 0000000..b88163e --- /dev/null +++ b/inference_rt.py @@ -0,0 +1,45 @@ +labels = ['can', 'paper_cup', 'paper_box', 'paper_milkbox', 'plastic'] + +print("[*] Importing packages...") +import common +import tensorrt as trt +import os +import cv2 +import pandas as pd +import numpy as np + +TRT_LOGGER = trt.Logger(trt.Logger.WARNING) + +print("[*] Loading model...") + +# load trt engine +trt_path = 'model.trt' +with open(trt_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: + engine = runtime.deserialize_cuda_engine(f.read()) + inputs, outputs, bindings, stream = common.allocate_buffers(engine) + +if __name__ == '__main__': + def pred(f, dirpath): + img = cv2.imread(os.path.join(dirpath, f)) + weight = df.loc[f]['weight'] + + inputs[0].host = np.expand_dims(img, 0).astype('float32') + inputs[1].host = np.expand_dims(weight, 0).astype('float32') + + # inference + with engine.create_execution_context() as context: + trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs,stream=stream) + + result = trt_outputs[0].argmax(-1) + return labels[result] + + + df = pd.read_csv('test_data/weights_test.csv') + df = df.set_index('name') + + for dirpath, dirnames, filenames in os.walk('test_data'): + for f in filenames: + if f.endswith('.jpg'): + print(f'{f}: {pred(f, dirpath)}') + +