1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
| import random from PIL import Image import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt
import sys, os sys.path.insert(1, os.path.join(sys.path[0], "..")) import common
class ModelData(object): MODEL_PATH = "resnet50-infer-5.uff" INPUT_NAME = "input" INPUT_SHAPE = (3, 224, 224) OUTPUT_NAME = "GPU_0/tower_0/Softmax" DTYPE = trt.float32
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
def allocate_buffers(engine): h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE)) h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE)) d_input = cuda.mem_alloc(h_input.nbytes) d_output = cuda.mem_alloc(h_output.nbytes) stream = cuda.Stream() return h_input, d_input, h_output, d_output, stream
def do_inference(context, h_input, d_input, h_output, d_output, stream): cuda.memcpy_htod_async(d_input, h_input, stream) context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle) cuda.memcpy_dtoh_async(h_output, d_output, stream) stream.synchronize()
def build_engine_uff(model_file): with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser: builder.max_workspace_size = common.GiB(1) parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE) parser.register_output(ModelData.OUTPUT_NAME) parser.parse(model_file, network) return builder.build_cuda_engine(network)
def load_normalized_test_case(test_image, pagelocked_buffer): def normalize_image(image): c, h, w = ModelData.INPUT_SHAPE return np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel()
np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image))) return test_image
def main(): data_path, data_files = common.find_sample_data(description="Runs a ResNet50 network with a TensorRT inference engine.", subfolder="resnet50", find_files=["binoculars.jpeg", "reflex_camera.jpeg", "tabby_tiger_cat.jpg", ModelData.MODEL_PATH, "class_labels.txt"]) test_images = data_files[0:3] uff_model_file, labels_file = data_files[3:] labels = open(labels_file, 'r').read().split('\n')
with build_engine_uff(uff_model_file) as engine: h_input, d_input, h_output, d_output, stream = allocate_buffers(engine) with engine.create_execution_context() as context: test_image = random.choice(test_images) test_case = load_normalized_test_case(test_image, h_input) do_inference(context, h_input, d_input, h_output, d_output, stream) pred = labels[np.argmax(h_output)] if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]: print("Correctly recognized " + test_case + " as " + pred) else: print("Incorrectly recognized " + test_case + " as " + pred)
if __name__ == '__main__': main()
|