有人遇到过yolov8n.pt模型转torchscripts和onnx，在triton server或Deepytorch Inference上推理，精度下降的问题吗？ #7792

JackonLiu · 2024-11-14T02:08:21Z

Description
yolov8n.pt模型转torchscripts和onnx，在triton server或Deepytorch Inference上推理，精度下降

Triton Information
What version of Triton are you using?
nvcr.io/nvidia/tritonserver:23.04-py3
Are you using the Triton container or did you build it yourself?
no
To Reproduce
Steps to reproduce the behavior.

yolo export model=best.pt format=onnx opset=15
yolo export model=best.pt format=torchscripts
nano config.pbtxt
docker run --gpus all -it --rm -v /data/triton/models:/models -p 8000:8000 -p 8001:8001 -p 8002:8002 nvcr.io/nvidia/tritonserver:23.04-py3 tritonserver --model-repository=/models --log-verbose=1
python yolov10_triton.py
output:
boxes: tensor([[ 7.4102, 36.6875, 14.7109, 73.1250], [ 15.0469, 26.4531, 29.8750, 52.8438], [ 20.6875, 15.2891, 40.5000, 30.6094], ..., [472.5000, 575.0000, 348.7500, 143.5000], [503.0000, 568.0000, 326.0000, 178.0000], [545.0000, 586.5000, 323.2500, 224.0000]], device='cuda:0') scores: tensor([0., 0., 0., ..., 0., 0., 0.], device='cuda:0') class_ids: tensor([0, 0, 0, ..., 0, 0, 0], device='cuda:0') class_ids>0.7: tensor([], device='cuda:0', dtype=torch.int64)

_ @why this class_ids is 0?_

Describe the models (framework, inputs, outputs), ideally include the model configuration file (if using an ensemble include the model configuration file for that as well).

platform: "onnxruntime_onnx"
max_batch_size: 16
input [
{
name: "images"
data_type: TYPE_FP32
dims: [3, 640, 640 ]
}
]
output [
{
name: "output0"
data_type: TYPE_FP32
dims: [ -1,-1 ]
}
]
dynamic_batching {
preferred_batch_size: [1, 2, 4, 8, 16]
max_queue_delay_microseconds: 100
}
instance_group [
{
count: 1
kind: KIND_GPU
gpus: [0]
}
]

name: "bottle_plus"
platform: "pytorch_libtorch"
max_batch_size: 16
input [
{
name: "images__0"
data_type: TYPE_FP32
dims: [3, 640, 640 ]
}
]
output [
{
name: "output__0"
data_type: TYPE_FP32
dims: [ 77, 8400 ]
}
]
dynamic_batching {
preferred_batch_size: [1, 2, 4, 8, 16]
max_queue_delay_microseconds: 100
}
instance_group [
{
count: 1
kind: KIND_GPU
gpus: [0]
}
]

yolov10_triton.py
`import io
import os
from io import BytesIO

import cv2
import torch
import torchvision
from PIL import Image
from tritonclient.grpc import InferenceServerClient, InferInput
from tritonclient.utils import *

def getIou(box1, box2, inter_area):
box1_area = box1[2] * box1[3]
box2_area = box2[2] * box2[3]
union = box1_area + box2_area - inter_area
iou = inter_area / union
return iou

def getInter(box1, box2):
box1_x1, box1_y1, box1_x2, box1_y2 = box1[0] - box1[2] / 2, box1[1] - box1[3] / 2,
box1[0] + box1[2] / 2, box1[1] + box1[3] / 2
box2_x1, box2_y1, box2_x2, box2_y2 = box2[0] - box2[2] / 2, box2[1] - box1[3] / 2,
box2[0] + box2[2] / 2, box2[1] + box2[3] / 2
if box1_x1 > box2_x2 or box1_x2 < box2_x1:
return 0
if box1_y1 > box2_y2 or box1_y2 < box2_y1:
return 0
x_list = [box1_x1, box1_x2, box2_x1, box2_x2]
x_list = np.sort(x_list)
x_inter = x_list[2] - x_list[1]
y_list = [box1_y1, box1_y2, box2_y1, box2_y2]
y_list = np.sort(y_list)
y_inter = y_list[2] - y_list[1]
inter = x_inter * y_inter
return inter

class TritonInferer:
def init(self, model_name, server_url='localhost:8001'):
self.img_src = None
self.img = None
self.triton_client = InferenceServerClient(url=server_url, verbose=False)
self.model_name = model_name
self.input_name = 'images'
self.output_name = 'output0'
self.image_size = 640
self.conf_thres = 0.5
self.iou_thres = 0.5

def preprocess_image(self, payload):
    stream = BytesIO(payload)
    file_bytes = np.asarray(bytearray(stream.read()), dtype=np.uint8)
    img_src = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
    stream.close()
    self.img_src = img_src
    img_size = (self.image_size, self.image_size)
    image = cv2.resize(img_src, img_size)
    image = image.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
    image = image.astype(np.float32) / 255.0  # Normalize to [0, 1]
    self.img = image
    return image, img_src

def rescale(self, ori_shape, boxes, target_shape):
    """Rescale the output to the original image shape"""
    ratio = min(ori_shape[0] / target_shape[0], ori_shape[1] / target_shape[1])
    padding = (ori_shape[1] - target_shape[1] * ratio) / 2, (ori_shape[0] - target_shape[0] * ratio) / 2

    boxes[:, [0, 2]] -= padding[0]
    boxes[:, [1, 3]] -= padding[1]
    boxes[:, :4] /= ratio

    boxes[:, 0].clamp_(0, target_shape[1])  # x1
    boxes[:, 1].clamp_(0, target_shape[0])  # y1
    boxes[:, 2].clamp_(0, target_shape[1])  # x2
    boxes[:, 3].clamp_(0, target_shape[0])  # y2

    return boxes

def predict(self, payload):
    image, original_shape = self.preprocess_image(payload)
    image_input = InferInput(self.input_name, [1, 3, self.image_size, self.image_size], "FP32")
    image_input.set_data_from_numpy(image[np.newaxis, ...])

    results = self.triton_client.infer(
        model_name=self.model_name,
        inputs=[image_input]
    )
    output = results.as_numpy(self.output_name)

    # Here we assume it's a dictionary with 'boxes', 'confidences', and 'class_ids'
    self.parse_output(output)


def non_max_suppression(self, boxes, scores, iou_thres):

    indices = torchvision.ops.nms(torch.tensor(boxes), torch.tensor(scores), iou_thres)
    return boxes[indices], scores[indices]

def parse_output(self, output):

    num_classes = 77  # YOLOv8n 的类别数
    # num_classes = 84  # YOLOv8n 的类别数
    if output.shape == (1, num_classes, 8400):
        output = output.squeeze(0).transpose(1, 0)  # 转换为 (8400, num_classes)
    else:
        raise ValueError(f"Unexpected output shape: {output.shape}")

    boxes = output[:, :4]  # (8400, 4)
    scores = output[:, 4]  # (8400,)
    if np.any(scores > 0):

        max_score = np.max(scores)
        min_score = np.min(scores)

        new_min = 0
        new_max = 1.0


        scaled_scores = (scores - min_score) / (max_score - min_score) * (new_max - new_min) + new_min
        max_score = np.max(scaled_scores)
        min_score = np.min(scaled_scores)
        median_score = np.median(scaled_scores)
        mean_score = np.mean(scaled_scores)
        print(
            f"Scores - Max: {max_score:.4f}, Min: {min_score:.4f}, Median: {median_score:.4f}, Mean: {mean_score:.4f}")

        class_probs = output[:, 5:]  # (8400, 80)

        # 将 NumPy 数组转换为 PyTorch 张量
        output_tensor = torch.from_numpy(class_probs)

        # 使用 argmax
        class_indices = torch.argmax(output_tensor, dim=1)  # 获取每个锚点的最大类别的索引
        print("class_ids:", class_indices)

        # 过滤掉低置信度的检测结果
        confidence_threshold = 0.7
        mask = scaled_scores > confidence_threshold
        boxes = boxes[mask]
        scores = scaled_scores[mask]
        class_ids = class_indices[mask]
        # Apply Non-Maximum Suppression
        boxes1, confidences = self.non_max_suppression(boxes, scores, 0.45)
   

        print("class_ids>0.7:", class_ids)
        boxes_xyxy = torch.from_numpy(boxes1)
        boxes_xyxy_rescale = self.rescale(self.img.shape[1:], boxes_xyxy, self.img_src.shape).round()
        prediction_list = []
        class_ids = class_ids.cpu().numpy()
        boxes_xyxy_rescale = boxes_xyxy_rescale.cpu().numpy()
        for n in range(boxes.shape[0]):
            xy = boxes_xyxy_rescale[n]
            c = class_ids[n]
            # p = scores[n]
            p = confidences[n]
            Location = {'Left': int(xy[0]), 'Top': int(xy[1]), 'Weight': int(xy[2]),
                        'Height': int(xy[3])}
            i = {'Layer': c, 'Probablity': round(p, 4), 'Location': Location}
            prediction = [i['Layer'], i['Probablity'], i['Location']['Left'], i['Location']['Top'],
                          i['Location']['Weight'], i['Location']['Height']]
            print(prediction)
            prediction_list.append(prediction)

        return prediction_list

bottle_name = 'v10_bottle5_240925'
bottle_plus_name = 'v10_bottle_plus11_240925'
yolov8_wt = str(r"D:\workspace_py\pxys-model-rest\weights\bottle{}.pt".format(bottle_name))
yolov8_plus_wt = str(r"D:\workspace_py\pxys-model-rest\weights\bottle{}.pt".format(bottle_plus_name))

triton_inferer = TritonInferer(model_name='bottle_plus_onnx')

image_folder = r'E:\data\数据库\9月瓶\新品由柑汁\obj_train_data\images'

for filename in os.listdir(image_folder):
if filename.endswith('.jpg') or filename.endswith('.png'):
image_path = os.path.join(image_folder, filename)

    with Image.open(image_path) as img:

        img = img.convert('RGB')


        byte_arr = io.BytesIO()


        img.save(byte_arr, format='PNG')


        payload = byte_arr.getvalue()
    predictions = triton_inferer.predict(payload)
    print(predictions)

`
Expected behavior
A clear and concise description of what you expected to happen.
expected not 0

The text was updated successfully, but these errors were encountered:

wenshinlee · 2024-11-29T11:06:50Z

老哥啊，我也和你有同样的问题阿，我这用MMdeploy部署准确率都是正常的，转换成onnx或者pth直接部署，准确率都20%一下的

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

有人遇到过yolov8n.pt模型转torchscripts和onnx，在triton server或Deepytorch Inference上推理，精度下降的问题吗？ #7792

有人遇到过yolov8n.pt模型转torchscripts和onnx，在triton server或Deepytorch Inference上推理，精度下降的问题吗？ #7792

JackonLiu commented Nov 14, 2024 •

edited

Loading

wenshinlee commented Nov 29, 2024

有人遇到过yolov8n.pt模型转torchscripts和onnx，在triton server或Deepytorch Inference上推理，精度下降的问题吗？ #7792

有人遇到过yolov8n.pt模型转torchscripts和onnx，在triton server或Deepytorch Inference上推理，精度下降的问题吗？ #7792

Comments

JackonLiu commented Nov 14, 2024 • edited Loading

wenshinlee commented Nov 29, 2024

JackonLiu commented Nov 14, 2024 •

edited

Loading