[TOC]

ONNX环境配置

Install

# CPU版本
pip install onnxruntime
# GPU版本
pip install onnxruntime-gpu

OnnxRun

>>> import onnxruntime
>>> onnxruntime.get_device()
'GPU'  #表示GPU可用
>>> onnxruntime.get_available_providers()
['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']

如果GPU不可用，可以在 ~/.bashrc 中添加下面两行内容：

1 2	export PATH=/usr/local/cuda/bin:$PATH export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH

Demo:

import onnxruntime
import numpy as np

device_name = 'cuda:0' # or 'cpu'
print(onnxruntime.get_available)

if device_name == 'cpu':
    providers = ['CPUExecutionProvider']
elif device_name == 'cuda:0':
    providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
# Create inference session
onnx_model = onnxruntime.InferenceSession('slowfast.onnx', providers=providers)
# Create the input（这里的输入对应slowfast的输入）
data = np.random.rand(1, 1, 3, 32, 256, 256).astype(np.float32)
# Inference
onnx_input = {onnx_model.get_inputs()[0].name: data}
outputs = onnx_model.run(None, onnx_input)

pth导出onnx

多输入模型

dummy_input0 = torch.LongTensor(Batch_size, seg_length).to(torch.device("cuda"))
dummy_input1 = torch.LongTensor(Batch_size, seg_length).to(torch.device("cuda"))
dummy_input2 = torch.LongTensor(Batch_size, seg_length).to(torch.device("cuda"))
torch.onnx.export(model. (dummy_input0, dummy_input1, dummy_input2), filepath)

https://blog.csdn.net/qq_38003892/article/details/89543299

固定batchsize导出ONNX模型

import torch
import torch.onnx
import numpy as np
import os
from nets.yolo4_tiny import YoloBody

def pth_to_onnx(checkpoint, onnx_path, input_names=['input'], 
                output_names=['output'], device='cpu'):
    #加载模型
    model = YoloBody(3, 16).eval()
    model.load_state_dict(torch.load(checkpoint))
    #将模型切换到推理状态
    model.eval()

    #创建输入张量
    input = torch.randn(1, 3, 416, 416)
    torch.onnx.export(model, input, onnx_path, verbose=True, 
                      input_names=input_names, 
                      output_names=output_names)
    print("Exporting .pth model to onnx model has been successful!")

if __name__ == '__main__':

    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    checkpoint = 'D:/pycharm/tinyyolov4/model_data/yolo4_tiny_weights_100epoch.pth'
    onnx_path = 'D:/pycharm/tinyyolov4/model_data/yolo4_tiny_weights_100epoch.onnx'  
    # device = torch.device("cuda:2" if torch.cuda.is_available() else 'cpu')
    pth_to_onnx(checkpoint, onnx_path)

原文链接：https://blog.csdn.net/m0_51004308/article/details/116152611

多batchsize导出ONNX模型

Demo1

b, h, w, c = model.shape
str_w = str(w)
str_h = str(h)
str_c = str(c)
dynamic_axes = {'input': {0: 'batch', 1: str_h, 2: str_w, 3: str_c}}                   
torch.onnx.export(model,                              # model being run
                  x,                                  # model input (or a tuple for multiple inputs)
                  "model.onnx",                      # where to save the model (can be a file or file-like object)
                  export_params=True,                  # store the trained parameter weights inside the model file
                  opset_version=11,                  # the ONNX version to export the model to
                  do_constant_folding=True,         # whether to execute constant folding for optimization
                  input_names=['input'],              # the model's input names
                  output_names=['output'],          # the model's output names
                  dynamic_axes=dynamic_axes)

原文链接：https://blog.csdn.net/wuqingshan2010/article/details/105686906

Demo2

import argparse
import os.path as osp
import warnings

import numpy as np
import onnx
import onnxruntime as rt
import torch
from mmcv import DictAction

from mmdet.core import (build_model_from_cfg, generate_inputs_and_wrap_model,
                        preprocess_example_input)
from onnxsim import simplify


if __name__ == '__main__':

    config_path = "configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py"
    checkpoint_path = "checkpoints/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth"
    output_file = 'fcos_ori.onnx'

    orig_model = build_model_from_cfg(config_path, checkpoint_path)

    normalize_cfg = {'mean': [0,0,0], 'std': [1,1,1]}
    input_config = {
        'input_shape': (1,3,256,256),
        'input_path': 'tests/data/color.jpg',
        'normalize_cfg': normalize_cfg
    }
    model, tensor_data = generate_inputs_and_wrap_model(config_path, checkpoint_path, input_config)

    # dynamic_ax = {'images': {0:"batch_size", 2: "image_height", 3: "image_width"},
    #               "fm1": {0:"batch_size", 2: "fm1_height", 3: "fm1_width"},
    #               "fm2": {0:"batch_size", 2: "fm2_height", 3: "fm2_width"},
    #               "fm3": {0:"batch_size", 2: "fm3_height", 3: "fm3_width"},
    #               "fm4": {0:"batch_size", 2: "fm4_height", 3: "fm4_width"},
    #               "fm5": {0:"batch_size", 2: "fm5_height", 3: "fm5_width"}}
    dynamic_ax = {'input':[0,2,3],"fm1":[0,2,3],"fm2":[0,2,3],"fm3":[0,2,3],"fm4":[0,2,3],"fm5":[0,2,3]}
    input_names = ["input"]
    output_names = ["fm1","fm2","fm3","fm4","fm5"]
    torch.onnx.export(
        model,
        tensor_data,
        output_file,
        input_names=input_names,
        output_names=output_names,
        export_params=True,
        keep_initializers_as_inputs=True,
        do_constant_folding=True,
        verbose=False,
        opset_version=11,
        dynamic_axes=dynamic_ax)
    print("convert to onnx success!")

    # model_simp, ok = simplify(onnx.load(output_file))
    # assert ok,"simp failed!"
    # onnx.save(model_simp,"fcos_simp.onnx")

Run

import onnxruntime as ort
import numpy as np
from time import time

sess = ort.InferenceSession("D:/tmp/fcos_ori.onnx")
# x = np.random.randn(1,3,256,256).astype(np.float32)
shapes = [[1,3,256,256],[10,3,256,256],[1,3,1024,1024],[10,3,1024,1024]]

s = time()
for shape in shapes:
    x = np.random.randn(*shape).astype(np.float32)
    output = sess.run(None,{"input":x})
    for o in output:
        print(o.shape)
    print('*'*30)
e = time()
print(e-s)
print()