onnx安装

pip install onnx
pip install onnxruntime-gpu==1.16

ort对应cuda版本我是cuda11.8，此处安装1.16的onnxruntime-gpu

转换

def pytorch2onnx(task_type, onnx_model_path):
    train_conf = get_train_conf(task_type)

    img_size = train_conf['img_size']
    model_type = train_conf['model_type']
    model = get_model(task_type=task_type, model_type=model_type).cuda()
    _pytorch2onnx(model,onnx_model_path,img_size)

# 重点是这个方法，传入一个torch模型，指定好输入来传递一遍计算图
def _pytorch2onnx(model:nn.Module, onnx_model_path,img_size=1024):
    model=model.cuda()
    input = torch.FloatTensor(1, 3, img_size, img_size).cuda()
    torch.onnx.export(model, input, onnx_model_path,
                      # opset_version=16,
                      # 折叠常量，输出的模型文件更小
                      do_constant_folding=True,
                      # 指定输入输出的名字，有多个输入输出时有用，比如bert有两个输入。这里的模型是resnet都是单个输入输出的
                      input_names=['input'],
                      output_names=['output'],
                      # 指定输入输出的第0为是bs维度，即可变长度的维度
                      dynamic_axes={'input': {0: 'batch_size'},
                                    'output': {0: 'batch_size'}}
                      )
                      
def export(task_type='merge'):
    onnx_model_dir = f'{script_path}/model/{task_type}'
    if not os.path.exists(onnx_model_dir):
        os.makedirs(onnx_model_dir)
    if task_type=='merge':
        model = get_merge_model().cuda()
    else:
        assert task_type in ORDERED_TASK_TYPES
        model = get_model(task_type=task_type, model_type='resnet50').cuda()

    onnx_model_path=f'{onnx_model_dir}/resnet.onnx'
    _pytorch2onnx(model, onnx_model_path=onnx_model_path)

    onnx_model = onnx.load(onnx_model_path)
    onnx.checker.check_model(onnx_model)

推理

# {"cudnn_conv_algo_search": "DEFAULT"}可以让卷积操作更快
def get_session(model_path, device='cuda'):
    providers = [
        ("CUDAExecutionProvider", {"cudnn_conv_algo_search": "DEFAULT"}),
        'CPUExecutionProvider',
    ] if device == 'cuda' else ['CPUExecutionProvider']
	# 打开日志，这里关闭
    opts = onnxruntime.SessionOptions()
    opts.enable_profiling = False

    session = onnxruntime.InferenceSession(model_path, opts, providers=providers)
    return session

# 上述导出模型指定了输入输出名，这里就可以去详细传入/获取对应的tensor
def onnx_infer_by_session(session: InferenceSession, input: Tensor):
    pred = torch.from_numpy(session.run(['output'], {'input': input.numpy()})[0])
    gc.collect()
    return pred


def onnx_infer_by_path(onnx_model_path, input: Tensor, device='cuda'):
    session = get_session(onnx_model_path, device)
    return onnx_infer_by_session(session, input)

目录CONTENT

Pytorch模型转ONNX模型

onnx安装

转换

推理

评论区