Pytorch pth to onnx

[TOC]

ONNX环境配置

Install

1
2
3
4
# CPU版本
pip install onnxruntime
# GPU版本
pip install onnxruntime-gpu

OnnxRun

1
2
3
4
5
>>> import onnxruntime
>>> onnxruntime.get_device()
'GPU' #表示GPU可用
>>> onnxruntime.get_available_providers()
['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']

如果GPU不可用,可以在 ~/.bashrc 中添加下面两行内容:

1
2
export PATH=/usr/local/cuda/bin:$PATH
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH

Demo:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import onnxruntime
import numpy as np

device_name = 'cuda:0' # or 'cpu'
print(onnxruntime.get_available)

if device_name == 'cpu':
providers = ['CPUExecutionProvider']
elif device_name == 'cuda:0':
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
# Create inference session
onnx_model = onnxruntime.InferenceSession('slowfast.onnx', providers=providers)
# Create the input(这里的输入对应slowfast的输入)
data = np.random.rand(1, 1, 3, 32, 256, 256).astype(np.float32)
# Inference
onnx_input = {onnx_model.get_inputs()[0].name: data}
outputs = onnx_model.run(None, onnx_input)

pth导出onnx

多输入模型

1
2
3
4
dummy_input0 = torch.LongTensor(Batch_size, seg_length).to(torch.device("cuda"))
dummy_input1 = torch.LongTensor(Batch_size, seg_length).to(torch.device("cuda"))
dummy_input2 = torch.LongTensor(Batch_size, seg_length).to(torch.device("cuda"))
torch.onnx.export(model. (dummy_input0, dummy_input1, dummy_input2), filepath)

https://blog.csdn.net/qq_38003892/article/details/89543299

固定batchsize导出ONNX模型

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import torch
import torch.onnx
import numpy as np
import os
from nets.yolo4_tiny import YoloBody

def pth_to_onnx(checkpoint, onnx_path, input_names=['input'],
output_names=['output'], device='cpu'):
#加载模型
model = YoloBody(3, 16).eval()
model.load_state_dict(torch.load(checkpoint))
#将模型切换到推理状态
model.eval()

#创建输入张量
input = torch.randn(1, 3, 416, 416)
torch.onnx.export(model, input, onnx_path, verbose=True,
input_names=input_names,
output_names=output_names)
print("Exporting .pth model to onnx model has been successful!")

if __name__ == '__main__':

os.environ['CUDA_VISIBLE_DEVICES'] = '0'
checkpoint = 'D:/pycharm/tinyyolov4/model_data/yolo4_tiny_weights_100epoch.pth'
onnx_path = 'D:/pycharm/tinyyolov4/model_data/yolo4_tiny_weights_100epoch.onnx'
# device = torch.device("cuda:2" if torch.cuda.is_available() else 'cpu')
pth_to_onnx(checkpoint, onnx_path)

原文链接:https://blog.csdn.net/m0_51004308/article/details/116152611

多batchsize导出ONNX模型

Demo1

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
b, h, w, c = model.shape
str_w = str(w)
str_h = str(h)
str_c = str(c)
dynamic_axes = {'input': {0: 'batch', 1: str_h, 2: str_w, 3: str_c}}
torch.onnx.export(model, # model being run
x, # model input (or a tuple for multiple inputs)
"model.onnx", # where to save the model (can be a file or file-like object)
export_params=True, # store the trained parameter weights inside the model file
opset_version=11, # the ONNX version to export the model to
do_constant_folding=True, # whether to execute constant folding for optimization
input_names=['input'], # the model's input names
output_names=['output'], # the model's output names
dynamic_axes=dynamic_axes)

原文链接:https://blog.csdn.net/wuqingshan2010/article/details/105686906

Demo2

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import argparse
import os.path as osp
import warnings

import numpy as np
import onnx
import onnxruntime as rt
import torch
from mmcv import DictAction

from mmdet.core import (build_model_from_cfg, generate_inputs_and_wrap_model,
preprocess_example_input)
from onnxsim import simplify


if __name__ == '__main__':

config_path = "configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py"
checkpoint_path = "checkpoints/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth"
output_file = 'fcos_ori.onnx'

orig_model = build_model_from_cfg(config_path, checkpoint_path)

normalize_cfg = {'mean': [0,0,0], 'std': [1,1,1]}
input_config = {
'input_shape': (1,3,256,256),
'input_path': 'tests/data/color.jpg',
'normalize_cfg': normalize_cfg
}
model, tensor_data = generate_inputs_and_wrap_model(config_path, checkpoint_path, input_config)

# dynamic_ax = {'images': {0:"batch_size", 2: "image_height", 3: "image_width"},
# "fm1": {0:"batch_size", 2: "fm1_height", 3: "fm1_width"},
# "fm2": {0:"batch_size", 2: "fm2_height", 3: "fm2_width"},
# "fm3": {0:"batch_size", 2: "fm3_height", 3: "fm3_width"},
# "fm4": {0:"batch_size", 2: "fm4_height", 3: "fm4_width"},
# "fm5": {0:"batch_size", 2: "fm5_height", 3: "fm5_width"}}
dynamic_ax = {'input':[0,2,3],"fm1":[0,2,3],"fm2":[0,2,3],"fm3":[0,2,3],"fm4":[0,2,3],"fm5":[0,2,3]}
input_names = ["input"]
output_names = ["fm1","fm2","fm3","fm4","fm5"]
torch.onnx.export(
model,
tensor_data,
output_file,
input_names=input_names,
output_names=output_names,
export_params=True,
keep_initializers_as_inputs=True,
do_constant_folding=True,
verbose=False,
opset_version=11,
dynamic_axes=dynamic_ax)
print("convert to onnx success!")

# model_simp, ok = simplify(onnx.load(output_file))
# assert ok,"simp failed!"
# onnx.save(model_simp,"fcos_simp.onnx")

Run

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import onnxruntime as ort
import numpy as np
from time import time

sess = ort.InferenceSession("D:/tmp/fcos_ori.onnx")
# x = np.random.randn(1,3,256,256).astype(np.float32)
shapes = [[1,3,256,256],[10,3,256,256],[1,3,1024,1024],[10,3,1024,1024]]

s = time()
for shape in shapes:
x = np.random.randn(*shape).astype(np.float32)
output = sess.run(None,{"input":x})
for o in output:
print(o.shape)
print('*'*30)
e = time()
print(e-s)
print()

存在的问题,动态输入的onnx模型简化是失败,但是可以转tensorrt

原文链接:https://blog.csdn.net/qq_17127427/article/details/115749006

Demo3

https://www.freesion.com/article/2565433278/

pth 导出TRT

1
2


ONNX导出TensorRT

场景1

PyTorch 1.3,TensorRT 6.0,ONNX 1.5

PyTorch 训练好的 CRNN 模型,转换为 ONNX 之后无法转为 TensorRT

原因:

https://github.com/onnx/onnx-tensorrt/blob/master/operators.md

↑ onnx-tensorrt 支持的操作列表,根本就不支持 RNN,包括 LSTM、GRU 等都不支持

解决
https://s0docs0nvidia0com.icopy.site/deeplearning/sdk/tensorrt-developer-guide/index.html#create_network_python

https://s0docs0nvidia0com.icopy.site/deeplearning/sdk/tensorrt-api/python_api/infer/Graph/Network.html

用 TensorRT 重新构建网络,并载入训练好的权重
————————————————
原文链接:https://blog.csdn.net/tsukumo99/article/details/103498390

Pth不同版本的模型之间转换

https://blog.csdn.net/u010454261/article/details/114936724

参考资料

【TensorRT系列】1.TensorRT安装教程

【TensorRT系列】2.ONNX-TensorRT安装教程

【TensorRT系列】3.一个例子:PyTorch->ONNX->TensorRT