当像我一样的菜鸡在使用开源的深度学习代码时,对于输出的pth模型文件,在预测时使用开源的predict.py文件进行部署,但是使用pth文件有一个问题,就是每次他都要重新加载一次模型,而且不方便移植,所以,想使用onnx进行部署,特此记录
pth2onnx
import torch
from ptsemseg.models import get_model
# 设置device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# torch.onnx.export的两个参数input和output
input=["input"]
output=['output']
# 加载模型model
model = get_model('pspnet', 150, version = 'ade20k')
model = model.eval().to(device)
# 构造一个输入图像Tensor为x
x = torch.randn(1, 3, 256, 256).to(device)
# 导出为onnx
torch.onnx.export(model, x, "pspnet.onnx256", verbose=True, input_names=input, output_names=output, opset_version=11)
preonnx
参照使用pth文件进行预测的predict.py,这里我新建了一个preonnx.py来使用onnx文件来进行预测
两个文件的核心是tensor和numpy数据类型的转换以及维度变换
使用pth进行预测
import numpy as np
from skimage.transform import resize
import cv2
import torch
import time
import onnx
import onnxruntime as ort
from ptsemseg.models import get_model
from ptsemseg.utils import convert_state_dict
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
weight_path = "../../pspnet_50_ade20k.pth"
def color_map(N=256, normalized=False):
"""
Return Color Map in PASCAL VOC format (rgb)
\param N (int) number of classes
\param normalized (bool) whether colors are normalized (float 0-1)
\return (Nx3 numpy array) a color map
"""
def bitget(byteval, idx):
return ((byteval & (1 << idx)) != 0)
dtype = 'float32' if normalized else 'uint8'
cmap = np.zeros((N, 3), dtype=dtype)
for i in range(N):
r = g = b = 0
c = i
for j in range(8):
r = r | (bitget(c, 0) << 7-j)
g = g | (bitget(c, 1) << 7-j)
b = b | (bitget(c, 2) << 7-j)
c = c >> 3
cmap[i] = np.array([r, g, b])
cmap = cmap/255.0 if normalized else cmap
return cmap
def decode_segmap(temp, n_classes, cmap):
"""
Given an image of class predictions, produce an bgr8 image with class colors
\param temp (2d numpy int array) input image with semantic classes (as integer)
\param n_classes (int) number of classes
\cmap (Nx3 numpy array) input color map
\return (numpy array bgr8) the decoded image with class colors
"""
r = temp.copy()
g = temp.copy()
b = temp.copy()
for l in range(0, n_classes):
r[temp == l] = cmap[l,0]
g[temp == l] = cmap[l,1]
b[temp == l] = cmap[l,2]
bgr = np.zeros((temp.shape[0], temp.shape[1], 3))
bgr[:, :, 0] = b
bgr[:, :, 1] = g
bgr[:, :, 2] = r
return bgr.astype(np.uint8)
def predict_max(img):
"""
Do semantic prediction for max fusion
\param img (numpy array rgb8)
"""
class_probs = predict(img)
# Take best prediction and confidence
pred_confidence, pred_label = class_probs.max(1)
pred_confidence = pred_confidence.squeeze(0).cpu().numpy()
pred_label = pred_label.squeeze(0).cpu().numpy()
pred_label = resize(pred_label, (375, 500), order = 0, mode = 'reflect', anti_aliasing=False, preserve_range = True) # order = 0, nearest neighbour
pred_label = pred_label.astype(np.int64)
# Add semantic color
semantic_color = decode_segmap(pred_label, 150, cmap)
pred_confidence = resize(pred_confidence, (375, 500), mode = 'reflect', anti_aliasing=True, preserve_range = True)
return (semantic_color, pred_confidence)
def predict(img):
"""
Do semantic segmantation
\param img: (numpy array bgr8) The input cv image
"""
img = img.copy() # Make a copy of image because the method will modify the image
#orig_size = (img.shape[0], img.shape[1]) # Original image size
# Prepare image: first resize to CNN input size then extract the mean value of SUNRGBD dataset. No normalization
img = resize(img, cnn_input_size, mode = 'reflect', anti_aliasing=True, preserve_range = True) # Give float64
img = img.astype(np.float32)
img -= mean
# Convert HWC -> CHW
img = img.transpose(2, 0, 1)
# Convert to tensor
img = torch.tensor(img, dtype = torch.float32)
img = img.unsqueeze(0) # Add batch dimension required by CNN
with torch.n