Python-Export-Onnx

from ultralytics import YOLO
model = YOLO('yolo11n.pt')
#model = YOLO('yolo12n.pt')
model.train(data="coco128.yaml", epochs=4, batch=2, imgsz=640)
model.val(data="coco128.yaml", imgsz=640, batch=2, conf=0.25, iou=0.6)
model.predict("bus.jpg")
model.export(format="onnx")

Python-Onnxruntime

import cv2
import numpy as np
import onnxruntime as ort
def readClassesNames(file_path):
    with open(file_path, encoding='utf-8') as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]
    return class_names
classes_names = 'coco.names'
classes = readClassesNames(classes_names)
image = cv2.imread('bus.jpg')
image_height, image_width = image.shape[:2]
model_path = 'yolo11n.onnx'
start_time = cv2.getTickCount()
session = ort.InferenceSession(model_path, providers=['CPUExecutionProvider'])
conf_thresold = 0.45
iou_threshold = 0.25
model_inputs = session.get_inputs()
input_names = [model_inputs[i].name for i in range(len(model_inputs))]
input_shape = model_inputs[0].shape
model_output = session.get_outputs()
output_names = [model_output[i].name for i in range(len(model_output))]
input_height, input_width = input_shape[2:]
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
resized = cv2.resize(image_rgb, (input_width, input_height))
input_image = resized / 255.0
input_image = input_image.transpose(2,0,1)
input_tensor = input_image[np.newaxis, :, :, :].astype(np.float32)
outputs = session.run(output_names, {input_names[0]: input_tensor})[0]
predictions = np.squeeze(outputs).T
scores = np.max(predictions[:, 4:], axis=1)
predictions = predictions[scores > conf_thresold, :]
scores = scores[scores > conf_thresold]
class_ids = np.argmax(predictions[:, 4:], axis=1)
boxes = predictions[:, :4]
input_shape = np.array([input_width, input_height, input_width, input_height])
boxes = np.divide(boxes, input_shape, dtype=np.float32)
boxes *= np.array([image_width, image_height, image_width, image_height])
boxes = boxes.astype(np.int32)
indices = cv2.dnn.NMSBoxes(boxes, scores, score_threshold=conf_thresold, nms_threshold=iou_threshold)
detections = []
def xywh2xyxy(x):
    y = np.copy(x)
    y[..., 0] = x[..., 0] - x[..., 2] / 2
    y[..., 1] = x[..., 1] - x[..., 3] / 2
    y[..., 2] = x[..., 0] + x[..., 2] / 2
    y[..., 3] = x[..., 1] + x[..., 3] / 2
    return y
for (bbox, score, label) in zip(xywh2xyxy(boxes[indices]), scores[indices], class_ids[indices]):
    bbox = bbox.round().astype(np.int32).tolist()
    cls_id = int(label)
    cls = classes[cls_id]
    cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), (0,0,255), 2, 8)
    cv2.rectangle(image, (bbox[0], (bbox[1]-20)), (bbox[2], bbox[1]), (0,255,255), -1)
    cv2.putText(image, f'{cls}', (bbox[0], bbox[1] - 5),
                cv2.FONT_HERSHEY_PLAIN,2, [225, 0, 0], thickness=2)
end_time = cv2.getTickCount()
t = (end_time - start_time)/cv2.getTickFrequency()
fps = 1/t
print(f"EStimated FPS: {fps:.2f}")
cv2.putText(image, 'FPS: {:.2f}'.format(fps), (20, 40), cv2.FONT_HERSHEY_PLAIN, 2, [225, 0, 0], 2, 8);
cv2.imshow("YOLO11-ONNXRUNTIME", image)
cv2.waitKey(0)

Cpp-Onnxruntime

#include <fstream>
#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h>
using namespace std;
using namespace cv;
using namespace Ort;
vector<string> readClassNames(const string& filename) {
    vector<string> classNames;
    ifstream file(filename);
    if (!file.is_open()) {
        cerr << "Error opening file: " << filename << endl;
        return classNames;
    }
    string line;
    while (getline(file, line)) {
        if (!line.empty()) {
            classNames.push_back(line);
        }
    }
    file.close();
    return classNames;
}
int main(int argc, char** argv)
{
    string filename = "coco.names";
    vector<string> labels = readClassNames(filename);
    Mat image = imread("bus.jpg");
    int ih = image.rows;
    int iw = image.cols;
    string onnxpath = "yolo11n.onnx";
    wstring modelPath = wstring(onnxpath.begin(), onnxpath.end());
    SessionOptions session_options;
    Env env = Env(ORT_LOGGING_LEVEL_ERROR, "yolo11n");
    session_options.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
    Session session_(env, modelPath.c_str(), session_options);
    vector<string> input_node_names;
    vector<string> output_node_names;
    size_t numInputNodes = session_.GetInputCount();
    size_t numOutputNodes = session_.GetOutputCount();
    AllocatorWithDefaultOptions allocator;
    input_node_names.reserve(numInputNodes);
    int input_w = 0;
    int input_h = 0;
    for (int i = 0; i < numInputNodes; i++) {
        auto input_name = session_.GetInputNameAllocated(i, allocator);
        input_node_names.push_back(input_name.get());
        TypeInfo input_type_info = session_.GetInputTypeInfo(i);
        auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo();
        auto input_dims = input_tensor_info.GetShape();
        input_w = input_dims[3];
        input_h = input_dims[2];
        cout << "input format: NxCxHxW = " << input_dims[0] << "x" << input_dims[1] << "x" << input_dims[2] << "x" << input_dims[3] << endl;
    }
    int output_h = 0;
    int output_w = 0;
    TypeInfo output_type_info = session_.GetOutputTypeInfo(0);
    auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo();
    auto output_dims = output_tensor_info.GetShape();
    output_h = output_dims[1]; 
    output_w = output_dims[2]; 
    cout << "output format : HxW = " << output_dims[1] << "x" << output_dims[2] << endl;
    for (int i = 0; i < numOutputNodes; i++) {
        auto out_name = session_.GetOutputNameAllocated(i, allocator);
        output_node_names.push_back(out_name.get());
    }
    cout << "input: " << input_node_names[0] << " output: " << output_node_names[0] << endl;
    int64 start = getTickCount();
    int w = image.cols;
    int h = image.rows;
    int _max = max(h, w);
    Mat image_ = Mat::zeros(Size(_max, _max), CV_8UC3);
    Rect roi(0, 0, w, h);
    image.copyTo(image_(roi));
    float x_factor = image_.cols / static_cast<float>(input_w);
    float y_factor = image_.rows / static_cast<float>(input_h);
    Mat blob = dnn::blobFromImage(image_, 1 / 255.0, Size(input_w, input_h), Scalar(0, 0, 0), true, false);
    size_t tpixels = input_h * input_w * 3;
    array<int64_t, 4> input_shape_info{ 1, 3, input_h, input_w };
    auto allocator_info = MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
    Value input_tensor_ = Value::CreateTensor<float>(allocator_info, blob.ptr<float>(), tpixels, input_shape_info.data(), input_shape_info.size());
    const array<const char*, 1> inputNames = { input_node_names[0].c_str() };
    const array<const char*, 1> outNames = { output_node_names[0].c_str() };
    vector<Value> ort_outputs;
    try {
        ort_outputs = session_.Run(RunOptions{ nullptr }, inputNames.data(), &input_tensor_, 1, outNames.data(), outNames.size());
    }
    catch (exception e) {
        cout << e.what() << endl;
    }
    const float* pdata = ort_outputs[0].GetTensorMutableData<float>();
    Mat dout(output_h, output_w, CV_32F, (float*)pdata);
    Mat det_output = dout.t();
    session_options.release();
    session_.release();
    vector<Rect> boxes;
    vector<int> classIds;
    vector<float> confidences;
    for (int i = 0; i < det_output.rows; i++) {
        Mat classes_scores = det_output.row(i).colRange(4, 84);
        Point classIdPoint;
        double score;
        minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);
        if (score > 0.25)
        {
            float cx = det_output.at<float>(i, 0);
            float cy = det_output.at<float>(i, 1);
            float ow = det_output.at<float>(i, 2);
            float oh = det_output.at<float>(i, 3);
            int x = static_cast<int>((cx - 0.5 * ow) * x_factor);
            int y = static_cast<int>((cy - 0.5 * oh) * y_factor);
            int width = static_cast<int>(ow * x_factor);
            int height = static_cast<int>(oh * y_factor);
            Rect box;
            box.x = x;
            box.y = y;
            box.width = width;
            box.height = height;
            boxes.push_back(box);
            classIds.push_back(classIdPoint.x);
            confidences.push_back(score);
        }
    }
    vector<int> indexes;
    dnn::NMSBoxes(boxes, confidences, 0.25, 0.45, indexes);
    for (size_t i = 0; i < indexes.size(); i++) {
        int index = indexes[i];
        int idx = classIds[index];
        rectangle(image, boxes[index], Scalar(0, 0, 255), 2, 8);
        rectangle(image, Point(boxes[index].tl().x, boxes[index].tl().y - 20),
            Point(boxes[index].br().x, boxes[index].tl().y), Scalar(0, 255, 255), -1);
        putText(image, labels[idx], Point(boxes[index].tl().x, boxes[index].tl().y), FONT_HERSHEY_PLAIN, 2.0, Scalar(255, 0, 0), 2, 8);
    }
    float t = (getTickCount() - start) / static_cast<float>(getTickFrequency());
    putText(image, format("FPS: %.2f", 1 / t), Point(20, 40), FONT_HERSHEY_PLAIN, 2.0, Scalar(255, 0, 0), 2, 8);
    imshow("YOLO11-ONNXRUNTIME", image);
    waitKey(0);
    return 0;
}

Python-Openvino

import cv2
import numpy as np
from openvino.runtime import Core
def readClassesNames(file_path):
    with open(file_path, encoding='utf-8') as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]
    return class_names
classes_names = 'coco.names'
classes = readClassesNames(classes_names)
image = cv2.imread('bus.jpg')
image_height, image_width = image.shape[:2]
model_path = 'yolo11n.onnx'
start_time = cv2.getTickCount()
core = Core()
model = core.compile_model(model_path, 'CPU')
input_shape = model.inputs[0].shape
output_shape = model.outputs[0].shape
conf_thresold = 0.25
iou_threshold = 0.45
score_thresold = 0.25
input_height, input_width = input_shape[2:]
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
resized = cv2.resize(image_rgb, (input_width, input_height))
input_image = resized / 255.0
input_image = input_image.transpose(2,0,1)
input_tensor = input_image[np.newaxis, :, :, :].astype(np.float32)
outputs = model([input_tensor])[model.outputs[0]]
predictions = np.squeeze(outputs).T
scores = np.max(predictions[:, 4:], axis=1)
predictions = predictions[scores > score_thresold, :]
scores = scores[scores > score_thresold]
class_ids = np.argmax(predictions[:, 4:], axis=1)
boxes = predictions[:, :4]
input_shape = np.array([input_width, input_height, input_width, input_height])
boxes = np.divide(boxes, input_shape, dtype=np.float32)
boxes *= np.array([image_width, image_height, image_width, image_height])
boxes = boxes.astype(np.int32)
indices = cv2.dnn.NMSBoxes(boxes, scores, score_threshold=conf_thresold, nms_threshold=iou_threshold)
detections = []
def xywh2xyxy(x):
    y = np.copy(x)
    y[..., 0] = x[..., 0] - x[..., 2] / 2
    y[..., 1] = x[..., 1] - x[..., 3] / 2
    y[..., 2] = x[..., 0] + x[..., 2] / 2
    y[..., 3] = x[..., 1] + x[..., 3] / 2
    return y
for (bbox, score, label) in zip(xywh2xyxy(boxes[indices]), scores[indices], class_ids[indices]):
    bbox = bbox.round().astype(np.int32).tolist()
    cls_id = int(label)
    cls = classes[cls_id]
    cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), (0,0,255), 2, 8)
    cv2.rectangle(image, (bbox[0], (bbox[1]-20)), (bbox[2], bbox[1]), (0,255,255), -1)
    cv2.putText(image, f'{cls}', (bbox[0], bbox[1] - 5),
                cv2.FONT_HERSHEY_PLAIN,2, [225, 0, 0], thickness=2)
end_time = cv2.getTickCount()
t = (end_time - start_time) / cv2.getTickFrequency()
fps = 1 / t
print(f"EStimated FPS: {fps:.2f}")
cv2.putText(image, 'FPS: {:.2f}'.format(fps), (20, 40), cv2.FONT_HERSHEY_PLAIN, 2, [225, 0, 0], 2, 8);
cv2.imshow("YOLO11-OPENVINO", image)
cv2.waitKey(0)

Cpp-Openvino

#include <fstream>
#include <opencv2/opencv.hpp>
#include <openvino/openvino.hpp> 
using namespace std;
using namespace cv;
using namespace ov;
vector<string> readClassNames(const string& filename) {
    vector<string> classNames;
    ifstream file(filename);
    if (!file.is_open()) {
        cerr << "Error opening file: " << filename << endl;
        return classNames;
    }
    string line;
    while (getline(file, line)) {
        if (!line.empty()) {
            classNames.push_back(line);
        }
    }
    file.close();
    return classNames;
}
int main(int argc, char** argv)
{
    string filename = "coco.names";
    vector<string> labels = readClassNames(filename);
    Mat image = imread("bus.jpg");
    int ih = image.rows;
    int iw = image.cols;
    string model_path = "yolo11n.onnx";
    float conf_threshold = 0.25;
    float nms_threshold = 0.4;
    float score_threshold = 0.25;
    Core core;
    auto compiled_model = core.compile_model(model_path, "CPU");
    auto input_port = compiled_model.input();
    auto input_shape = input_port.get_shape();
    InferRequest infer_request = compiled_model.create_infer_request();
    int64 start = getTickCount();
    int w = image.cols;
    int h = image.rows;
    int _max = max(h, w);
    Mat image_ = Mat::zeros(Size(_max, _max), CV_8UC3);
    Rect roi(0, 0, w, h);
    image.copyTo(image_(roi));
    int input_w = input_shape[2];
    int input_h = input_shape[3];
    float x_factor = image_.cols / static_cast<float>(input_w);
    float y_factor = image_.rows / static_cast<float>(input_h);
    Mat blob = dnn::blobFromImage(image, 1.0 / 255.0, Size(input_w, input_h), Scalar(), true);
    Tensor input_tensor(input_port.get_element_type(), input_port.get_shape(), blob.ptr(0));
    infer_request.set_input_tensor(input_tensor);
    infer_request.infer();
    auto output = infer_request.get_output_tensor(0);
    auto output_shape = output.get_shape();
    float* data = output.data<float>();
    Mat output_buffer(output_shape[1], output_shape[2], CV_32F, data);
    transpose(output_buffer, output_buffer);
    vector<Rect> boxes;
    vector<int> classIds;
    vector<float> confidences;
    for (int i = 0; i < output_buffer.rows; i++) {
        Mat classes_scores = output_buffer.row(i).colRange(4, 84);
        Point classIdPoint;
        double score;
        minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);
        if (score > score_threshold)
        {
            float cx = output_buffer.at<float>(i, 0);
            float cy = output_buffer.at<float>(i, 1);
            float ow = output_buffer.at<float>(i, 2);
            float oh = output_buffer.at<float>(i, 3);
            int x = static_cast<int>((cx - 0.5 * ow) * x_factor);
            int y = static_cast<int>((cy - 0.5 * oh) * y_factor);
            int width = static_cast<int>(ow * x_factor);
            int height = static_cast<int>(oh * y_factor);
            Rect box;
            box.x = x;
            box.y = y;
            box.width = width;
            box.height = height;
            boxes.push_back(box);
            classIds.push_back(classIdPoint.x);
            confidences.push_back(score);
        }
    }
    vector<int> indexes;
    dnn::NMSBoxes(boxes, confidences, conf_threshold, nms_threshold, indexes);
    for (size_t i = 0; i < indexes.size(); i++) {
        int index = indexes[i];
        int idx = classIds[index];
        rectangle(image, boxes[index], Scalar(0, 0, 255), 2, 8);
        rectangle(image, Point(boxes[index].tl().x, boxes[index].tl().y - 20),
            Point(boxes[index].br().x, boxes[index].tl().y), Scalar(0, 255, 255), -1);
        putText(image, labels[idx], Point(boxes[index].tl().x, boxes[index].tl().y), FONT_HERSHEY_PLAIN, 2.0, Scalar(255, 0, 0), 2, 8);
    }
    float t = (getTickCount() - start) / static_cast<float>(getTickFrequency());
    putText(image, format("FPS: %.2f", 1 / t), Point(20, 40), FONT_HERSHEY_PLAIN, 2.0, Scalar(255, 0, 0), 2, 8);
    imshow("YOLO11-OPENVINO", image);
    waitKey(0);
    return 0;
}

Python-Tensorrt

import os
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
import cv2
class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem
    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
    def __repr__(self):
        return self.__str__()
def onnx_to_engine(onnx_file_path, engine_file_path, precision_type=None):
    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
    parser = trt.OnnxParser(network, TRT_LOGGER)
    with open(onnx_file_path, 'rb') as model:
        if not parser.parse(model.read()):
            print('ERROR: Failed to parse the ONNX file.')
            for error in range(parser.num_errors):
                print(parser.get_error(error))
    config = builder.create_builder_config()
    config.max_workspace_size = 1 << 30
    if precision_type == 'fp16':
        config.set_flag(trt.BuilderFlag.FP16)
    else:
        print('WARNING: FP32 is used by default.')
    profile = builder.create_optimization_profile()
    config.add_optimization_profile(profile)
    engine = builder.build_engine(network, config)
    with open(engine_file_path, 'wb') as f:
        f.write(engine.serialize())
def readClassesNames(file_path):
    with open(file_path, encoding='utf-8') as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]
    return class_names
conf_thresold = 0.25
iou_threshold = 0.45
score_thresold = 0.25
classes_names = 'coco.names'
onnx_path = 'yolo11n.onnx'
engine_path = 'yolo11n.engine'
classes = readClassesNames(classes_names)
image = cv2.imread("bus.jpg")
image_height, image_width = image.shape[:2]
TRT_LOGGER = trt.Logger(trt.Logger.ERROR)
if os.path.exists(engine_path):
    with open(engine_path, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
        engine = runtime.deserialize_cuda_engine(f.read())
    input_shape = engine.get_binding_shape(engine.get_binding_index('images'))
    input_width, input_height = input_shape[2:]
    resized = cv2.resize(image, (input_width, input_height))
    input_image = resized / 255.0
    input_image = input_image.transpose(2, 0, 1)
    input_tensor = input_image[np.newaxis, :, :, :].astype(np.float32)
    start_time = cv2.getTickCount()
    inputs_alloc_buf = []
    outputs_alloc_buf = []
    bindings_alloc_buf = []
    stream_alloc_buf = cuda.Stream()
    context = engine.create_execution_context()
    data_type = []
    for binding in engine:
        if engine.binding_is_input(binding):
            size = input_tensor.shape[0] * input_tensor.shape[1] * input_tensor.shape[2] * input_tensor.shape[3]
            dtype = trt.nptype(engine.get_binding_dtype(binding))
            data_type.append(dtype)
            host_mem = cuda.pagelocked_empty(size, dtype)
            device_mem = cuda.mem_alloc(host_mem.nbytes)
            bindings_alloc_buf.append(int(device_mem))
            inputs_alloc_buf.append(HostDeviceMem(host_mem, device_mem))
        else:
            size = trt.volume(engine.get_binding_shape(binding)[1:]) * engine.max_batch_size
            host_mem = cuda.pagelocked_empty(size, data_type[0])
            device_mem = cuda.mem_alloc(host_mem.nbytes)
            bindings_alloc_buf.append(int(device_mem))
            outputs_alloc_buf.append(HostDeviceMem(host_mem, device_mem))
    inputs_alloc_buf[0].host = input_tensor.reshape(-1)
    for inp in inputs_alloc_buf:
        cuda.memcpy_htod_async(inp.device, inp.host, stream_alloc_buf)
    context.set_binding_shape(0, input_tensor.shape)
    context.execute_async(batch_size=1, bindings=bindings_alloc_buf, stream_handle=stream_alloc_buf.handle)
    for out in outputs_alloc_buf:
        cuda.memcpy_dtoh_async(out.host, out.device, stream_alloc_buf)
    stream_alloc_buf.synchronize()
    net_output = [out.host for out in outputs_alloc_buf]
    outputs = net_output[0].reshape(84, 8400)
    predictions = np.squeeze(outputs).T
    print(predictions.shape)
    scores = np.max(predictions[:, 4:], axis=1)
    predictions = predictions[scores > score_thresold, :]
    scores = scores[scores > score_thresold]
    class_ids = np.argmax(predictions[:, 4:], axis=1)
    boxes = predictions[:, :4]
    input_shape = np.array([input_width, input_height, input_width, input_height])
    boxes = np.divide(boxes, input_shape, dtype=np.float32)
    boxes *= np.array([image_width, image_height, image_width, image_height])
    boxes = boxes.astype(np.int32)
    indices = cv2.dnn.NMSBoxes(boxes, scores, score_threshold=conf_thresold, nms_threshold=iou_threshold)
    detections = []
    def xywh2xyxy(x):
        y = np.copy(x)
        y[..., 0] = x[..., 0] - x[..., 2] / 2
        y[..., 1] = x[..., 1] - x[..., 3] / 2
        y[..., 2] = x[..., 0] + x[..., 2] / 2
        y[..., 3] = x[..., 1] + x[..., 3] / 2
        return y
    for (bbox, score, label) in zip(xywh2xyxy(boxes[indices]), scores[indices], class_ids[indices]):
        bbox = bbox.round().astype(np.int32).tolist()
        cls_id = int(label)
        cls = classes[cls_id]
        cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), (0, 0, 255), 2, 8)
        cv2.rectangle(image, (bbox[0], (bbox[1] - 20)), (bbox[2], bbox[1]), (0, 255, 255), -1)
        cv2.putText(image, f'{cls}', (bbox[0], bbox[1] - 5),
                    cv2.FONT_HERSHEY_PLAIN, 2, [225, 0, 0], thickness=2)
    end_time = cv2.getTickCount()
    t = (end_time - start_time) / cv2.getTickFrequency()
    fps = 1 / t
    print(f"EStimated FPS: {fps:.2f}")
    cv2.putText(image, 'FPS: {:.2f}'.format(fps), (20, 40), cv2.FONT_HERSHEY_PLAIN, 2, [225, 0, 0], 2, 8);
    cv2.imshow("Python + Tensorrt + Yolo11 推理结果", image)
    cv2.waitKey(0)
else:
    onnx_to_engine(onnx_path, engine_path, 'fp16')

Cpp-Tensorrt

#include <windows.h>
#include <fstream>
#include <iostream>
#include <vector>
#include <NvInfer.h>
#include <NvOnnxParser.h>
#include <opencv2/opencv.hpp>
std::vector<std::string> read_class_names(std::string path_name)
{
	std::vector<std::string> class_names;
	std::ifstream infile;
	infile.open(path_name.data());
	assert(infile.is_open());
	std::string str;
	while (getline(infile, str)) {
		class_names.push_back(str);
		str.clear();
	}
	infile.close();
	return class_names;
}
class Logger : public nvinfer1::ILogger
{
	void log(Severity severity, const char* message)  noexcept
	{
		if (severity != Severity::kINFO)
			std::cout << message << std::endl;
	}
} gLogger;
void onnx_to_engine(std::string onnx_file_path, std::string engine_file_path, std::string type) {
	nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger);
	const auto explicitBatch = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
	nvinfer1::INetworkDefinition* network = builder->createNetworkV2(explicitBatch);
	nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, gLogger);
	parser->parseFromFile(onnx_file_path.c_str(), 2);
	for (int i = 0; i < parser->getNbErrors(); ++i)
	{
		std::cout << "load error: " << parser->getError(i)->desc() << std::endl;
	}
	printf("tensorRT load mask onnx model successfully!!!...\n");
	nvinfer1::IBuilderConfig* config = builder->createBuilderConfig();
	config->setMaxWorkspaceSize(1 << 30);
	if (type == "fp16") {
		config->setFlag(nvinfer1::BuilderFlag::kFP16);
	}
	else {
		std::cout << "WARNING: FP32 is used by default." << std::endl;
	}
	nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
	std::cout << "try to save engine file now~~~" << std::endl;
	std::ofstream file_ptr(engine_file_path, std::ios::binary);
	if (!file_ptr) {
		std::cerr << "could not open plan output file" << std::endl;
		return;
	}
	nvinfer1::IHostMemory* model_stream = engine->serialize();
	file_ptr.write(reinterpret_cast<const char*>(model_stream->data()), model_stream->size());
	model_stream->destroy();
	engine->destroy();
	network->destroy();
	parser->destroy();
	std::cout << "convert onnx model to TensorRT engine model successfully!" << std::endl;
}
int main()
{
	const char* model_path_onnx = "yolo11n.onnx";
	const char* model_path_engine = "yolo11n.engine";
	const char* image_path = "bus.jpg";
	std::string label_path = "coco.names";
	const char* input_node_name = "images";
	const char* output_node_name = "output0";
	int num_ionode = 2;
	std::vector<std::string> class_names;
	float factor;
	std::ifstream f(model_path_engine);
	bool engine_file_exist = f.good();
	std::ifstream file_ptr(model_path_engine, std::ios::binary);
	if (engine_file_exist) {
		size_t size = 0;
		file_ptr.seekg(0, file_ptr.end);
		size = file_ptr.tellg();
		file_ptr.seekg(0, file_ptr.beg);
		char* model_stream = new char[size];
		file_ptr.read(model_stream, size);
		file_ptr.close();
		Logger logger;
		nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(logger);
		nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(model_stream, size);
		nvinfer1::IExecutionContext* context = engine->createExecutionContext();
		delete[] model_stream;
		void** data_buffer = new void* [num_ionode];
		int input_node_index = engine->getBindingIndex(input_node_name);
		nvinfer1::Dims input_node_dim = engine->getBindingDimensions(input_node_index);
		size_t input_data_length = input_node_dim.d[1] * input_node_dim.d[2] * input_node_dim.d[3];
		cudaMalloc(&(data_buffer[input_node_index]), input_data_length * sizeof(float));
		int output_node_index = engine->getBindingIndex(output_node_name);
		nvinfer1::Dims output_node_dim = engine->getBindingDimensions(output_node_index);
		size_t output_data_length = output_node_dim.d[1] * output_node_dim.d[2];
		cudaMalloc(&(data_buffer[output_node_index]), output_data_length * sizeof(float));
		cv::Mat image = cv::imread(image_path);
		int max_side_length = std::max(image.cols, image.rows);
		cv::Mat max_image = cv::Mat::zeros(cv::Size(max_side_length, max_side_length), CV_8UC3);
		cv::Rect roi(0, 0, image.cols, image.rows);
		image.copyTo(max_image(roi));
		cv::Size input_node_shape(input_node_dim.d[2], input_node_dim.d[3]);
		int64 start = cv::getTickCount();
		cv::Mat BN_image = cv::dnn::blobFromImage(max_image, 1 / 255.0, input_node_shape, cv::Scalar(0, 0, 0), true, false);
		cudaStream_t stream;
		cudaStreamCreate(&stream);
		std::vector<float> input_data(input_data_length);
		memcpy(input_data.data(), BN_image.ptr<float>(), input_data_length * sizeof(float));
		cudaMemcpyAsync(data_buffer[input_node_index], input_data.data(), input_data_length * sizeof(float), cudaMemcpyHostToDevice, stream);
		context->enqueueV2(data_buffer, stream, nullptr);
		float* result_array = new float[output_data_length];
		cudaMemcpyAsync(result_array, data_buffer[output_node_index], output_data_length * sizeof(float), cudaMemcpyDeviceToHost, stream);
		factor = max_side_length / (float)input_node_dim.d[2];
		class_names = read_class_names(label_path);
		cv::Mat det_output = cv::Mat(84, 8400, CV_32F, result_array);
		transpose(det_output, det_output);
		std::vector<cv::Rect> position_boxes;
		std::vector<int> classIds;
		std::vector<float> confidences;
		std::cout << det_output.rows << std::endl;
		for (int i = 0; i < det_output.rows; i++) {
			cv::Mat classes_scores = det_output.row(i).colRange(4, 84);
			cv::Point classIdPoint;
			double score;
			minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);
			if (score > 0.25)
			{
				float cx = det_output.at<float>(i, 0);
				float cy = det_output.at<float>(i, 1);
				float ow = det_output.at<float>(i, 2);
				float oh = det_output.at<float>(i, 3);
				int x = static_cast<int>((cx - 0.5 * ow) * factor);
				int y = static_cast<int>((cy - 0.5 * oh) * factor);
				int width = static_cast<int>(ow * factor);
				int height = static_cast<int>(oh * factor);
				cv::Rect box;
				box.x = x;
				box.y = y;
				box.width = width;
				box.height = height;
				position_boxes.push_back(box);
				classIds.push_back(classIdPoint.x);
				confidences.push_back(score);
			}
		}
		std::vector<int> indexes;
		cv::dnn::NMSBoxes(position_boxes, confidences, 0.25, 0.45, indexes);
		for (size_t i = 0; i < indexes.size(); i++) {
			int index = indexes[i];
			int idx = classIds[index];
			cv::rectangle(image, position_boxes[index], cv::Scalar(0, 0, 255), 2, 8);
			cv::rectangle(image, cv::Point(position_boxes[index].tl().x, position_boxes[index].tl().y - 20),
				cv::Point(position_boxes[index].br().x, position_boxes[index].tl().y), cv::Scalar(0, 255, 255), -1);
			cv::putText(image, class_names[idx], cv::Point(position_boxes[index].tl().x, position_boxes[index].tl().y), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 0, 0), 2, 8);
		}
		float t = (cv::getTickCount() - start) / static_cast<float>(cv::getTickFrequency());
		cv::putText(image, cv::format("FPS: %.2f", 1 / t), cv::Point(20, 40), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 0, 0), 2, 8);
		cv::imshow("C++ + Tensorrt + Yolo11 推理结果", image);
		cv::waitKey(0);
	}
	else
	{
		onnx_to_engine(model_path_onnx, model_path_engine, "fp16");
	}
	return 0;
}
Logo

有“AI”的1024 = 2048,欢迎大家加入2048 AI社区

更多推荐