Yolo12/Yolo11推理
【代码】Yolo11推理。
·
Python-Export-Onnx
from ultralytics import YOLO
model = YOLO('yolo11n.pt')
#model = YOLO('yolo12n.pt')
model.train(data="coco128.yaml", epochs=4, batch=2, imgsz=640)
model.val(data="coco128.yaml", imgsz=640, batch=2, conf=0.25, iou=0.6)
model.predict("bus.jpg")
model.export(format="onnx")
Python-Onnxruntime
import cv2
import numpy as np
import onnxruntime as ort
def readClassesNames(file_path):
with open(file_path, encoding='utf-8') as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
classes_names = 'coco.names'
classes = readClassesNames(classes_names)
image = cv2.imread('bus.jpg')
image_height, image_width = image.shape[:2]
model_path = 'yolo11n.onnx'
start_time = cv2.getTickCount()
session = ort.InferenceSession(model_path, providers=['CPUExecutionProvider'])
conf_thresold = 0.45
iou_threshold = 0.25
model_inputs = session.get_inputs()
input_names = [model_inputs[i].name for i in range(len(model_inputs))]
input_shape = model_inputs[0].shape
model_output = session.get_outputs()
output_names = [model_output[i].name for i in range(len(model_output))]
input_height, input_width = input_shape[2:]
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
resized = cv2.resize(image_rgb, (input_width, input_height))
input_image = resized / 255.0
input_image = input_image.transpose(2,0,1)
input_tensor = input_image[np.newaxis, :, :, :].astype(np.float32)
outputs = session.run(output_names, {input_names[0]: input_tensor})[0]
predictions = np.squeeze(outputs).T
scores = np.max(predictions[:, 4:], axis=1)
predictions = predictions[scores > conf_thresold, :]
scores = scores[scores > conf_thresold]
class_ids = np.argmax(predictions[:, 4:], axis=1)
boxes = predictions[:, :4]
input_shape = np.array([input_width, input_height, input_width, input_height])
boxes = np.divide(boxes, input_shape, dtype=np.float32)
boxes *= np.array([image_width, image_height, image_width, image_height])
boxes = boxes.astype(np.int32)
indices = cv2.dnn.NMSBoxes(boxes, scores, score_threshold=conf_thresold, nms_threshold=iou_threshold)
detections = []
def xywh2xyxy(x):
y = np.copy(x)
y[..., 0] = x[..., 0] - x[..., 2] / 2
y[..., 1] = x[..., 1] - x[..., 3] / 2
y[..., 2] = x[..., 0] + x[..., 2] / 2
y[..., 3] = x[..., 1] + x[..., 3] / 2
return y
for (bbox, score, label) in zip(xywh2xyxy(boxes[indices]), scores[indices], class_ids[indices]):
bbox = bbox.round().astype(np.int32).tolist()
cls_id = int(label)
cls = classes[cls_id]
cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), (0,0,255), 2, 8)
cv2.rectangle(image, (bbox[0], (bbox[1]-20)), (bbox[2], bbox[1]), (0,255,255), -1)
cv2.putText(image, f'{cls}', (bbox[0], bbox[1] - 5),
cv2.FONT_HERSHEY_PLAIN,2, [225, 0, 0], thickness=2)
end_time = cv2.getTickCount()
t = (end_time - start_time)/cv2.getTickFrequency()
fps = 1/t
print(f"EStimated FPS: {fps:.2f}")
cv2.putText(image, 'FPS: {:.2f}'.format(fps), (20, 40), cv2.FONT_HERSHEY_PLAIN, 2, [225, 0, 0], 2, 8);
cv2.imshow("YOLO11-ONNXRUNTIME", image)
cv2.waitKey(0)
Cpp-Onnxruntime
#include <fstream>
#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h>
using namespace std;
using namespace cv;
using namespace Ort;
vector<string> readClassNames(const string& filename) {
vector<string> classNames;
ifstream file(filename);
if (!file.is_open()) {
cerr << "Error opening file: " << filename << endl;
return classNames;
}
string line;
while (getline(file, line)) {
if (!line.empty()) {
classNames.push_back(line);
}
}
file.close();
return classNames;
}
int main(int argc, char** argv)
{
string filename = "coco.names";
vector<string> labels = readClassNames(filename);
Mat image = imread("bus.jpg");
int ih = image.rows;
int iw = image.cols;
string onnxpath = "yolo11n.onnx";
wstring modelPath = wstring(onnxpath.begin(), onnxpath.end());
SessionOptions session_options;
Env env = Env(ORT_LOGGING_LEVEL_ERROR, "yolo11n");
session_options.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
Session session_(env, modelPath.c_str(), session_options);
vector<string> input_node_names;
vector<string> output_node_names;
size_t numInputNodes = session_.GetInputCount();
size_t numOutputNodes = session_.GetOutputCount();
AllocatorWithDefaultOptions allocator;
input_node_names.reserve(numInputNodes);
int input_w = 0;
int input_h = 0;
for (int i = 0; i < numInputNodes; i++) {
auto input_name = session_.GetInputNameAllocated(i, allocator);
input_node_names.push_back(input_name.get());
TypeInfo input_type_info = session_.GetInputTypeInfo(i);
auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo();
auto input_dims = input_tensor_info.GetShape();
input_w = input_dims[3];
input_h = input_dims[2];
cout << "input format: NxCxHxW = " << input_dims[0] << "x" << input_dims[1] << "x" << input_dims[2] << "x" << input_dims[3] << endl;
}
int output_h = 0;
int output_w = 0;
TypeInfo output_type_info = session_.GetOutputTypeInfo(0);
auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo();
auto output_dims = output_tensor_info.GetShape();
output_h = output_dims[1];
output_w = output_dims[2];
cout << "output format : HxW = " << output_dims[1] << "x" << output_dims[2] << endl;
for (int i = 0; i < numOutputNodes; i++) {
auto out_name = session_.GetOutputNameAllocated(i, allocator);
output_node_names.push_back(out_name.get());
}
cout << "input: " << input_node_names[0] << " output: " << output_node_names[0] << endl;
int64 start = getTickCount();
int w = image.cols;
int h = image.rows;
int _max = max(h, w);
Mat image_ = Mat::zeros(Size(_max, _max), CV_8UC3);
Rect roi(0, 0, w, h);
image.copyTo(image_(roi));
float x_factor = image_.cols / static_cast<float>(input_w);
float y_factor = image_.rows / static_cast<float>(input_h);
Mat blob = dnn::blobFromImage(image_, 1 / 255.0, Size(input_w, input_h), Scalar(0, 0, 0), true, false);
size_t tpixels = input_h * input_w * 3;
array<int64_t, 4> input_shape_info{ 1, 3, input_h, input_w };
auto allocator_info = MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
Value input_tensor_ = Value::CreateTensor<float>(allocator_info, blob.ptr<float>(), tpixels, input_shape_info.data(), input_shape_info.size());
const array<const char*, 1> inputNames = { input_node_names[0].c_str() };
const array<const char*, 1> outNames = { output_node_names[0].c_str() };
vector<Value> ort_outputs;
try {
ort_outputs = session_.Run(RunOptions{ nullptr }, inputNames.data(), &input_tensor_, 1, outNames.data(), outNames.size());
}
catch (exception e) {
cout << e.what() << endl;
}
const float* pdata = ort_outputs[0].GetTensorMutableData<float>();
Mat dout(output_h, output_w, CV_32F, (float*)pdata);
Mat det_output = dout.t();
session_options.release();
session_.release();
vector<Rect> boxes;
vector<int> classIds;
vector<float> confidences;
for (int i = 0; i < det_output.rows; i++) {
Mat classes_scores = det_output.row(i).colRange(4, 84);
Point classIdPoint;
double score;
minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);
if (score > 0.25)
{
float cx = det_output.at<float>(i, 0);
float cy = det_output.at<float>(i, 1);
float ow = det_output.at<float>(i, 2);
float oh = det_output.at<float>(i, 3);
int x = static_cast<int>((cx - 0.5 * ow) * x_factor);
int y = static_cast<int>((cy - 0.5 * oh) * y_factor);
int width = static_cast<int>(ow * x_factor);
int height = static_cast<int>(oh * y_factor);
Rect box;
box.x = x;
box.y = y;
box.width = width;
box.height = height;
boxes.push_back(box);
classIds.push_back(classIdPoint.x);
confidences.push_back(score);
}
}
vector<int> indexes;
dnn::NMSBoxes(boxes, confidences, 0.25, 0.45, indexes);
for (size_t i = 0; i < indexes.size(); i++) {
int index = indexes[i];
int idx = classIds[index];
rectangle(image, boxes[index], Scalar(0, 0, 255), 2, 8);
rectangle(image, Point(boxes[index].tl().x, boxes[index].tl().y - 20),
Point(boxes[index].br().x, boxes[index].tl().y), Scalar(0, 255, 255), -1);
putText(image, labels[idx], Point(boxes[index].tl().x, boxes[index].tl().y), FONT_HERSHEY_PLAIN, 2.0, Scalar(255, 0, 0), 2, 8);
}
float t = (getTickCount() - start) / static_cast<float>(getTickFrequency());
putText(image, format("FPS: %.2f", 1 / t), Point(20, 40), FONT_HERSHEY_PLAIN, 2.0, Scalar(255, 0, 0), 2, 8);
imshow("YOLO11-ONNXRUNTIME", image);
waitKey(0);
return 0;
}
Python-Openvino
import cv2
import numpy as np
from openvino.runtime import Core
def readClassesNames(file_path):
with open(file_path, encoding='utf-8') as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
classes_names = 'coco.names'
classes = readClassesNames(classes_names)
image = cv2.imread('bus.jpg')
image_height, image_width = image.shape[:2]
model_path = 'yolo11n.onnx'
start_time = cv2.getTickCount()
core = Core()
model = core.compile_model(model_path, 'CPU')
input_shape = model.inputs[0].shape
output_shape = model.outputs[0].shape
conf_thresold = 0.25
iou_threshold = 0.45
score_thresold = 0.25
input_height, input_width = input_shape[2:]
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
resized = cv2.resize(image_rgb, (input_width, input_height))
input_image = resized / 255.0
input_image = input_image.transpose(2,0,1)
input_tensor = input_image[np.newaxis, :, :, :].astype(np.float32)
outputs = model([input_tensor])[model.outputs[0]]
predictions = np.squeeze(outputs).T
scores = np.max(predictions[:, 4:], axis=1)
predictions = predictions[scores > score_thresold, :]
scores = scores[scores > score_thresold]
class_ids = np.argmax(predictions[:, 4:], axis=1)
boxes = predictions[:, :4]
input_shape = np.array([input_width, input_height, input_width, input_height])
boxes = np.divide(boxes, input_shape, dtype=np.float32)
boxes *= np.array([image_width, image_height, image_width, image_height])
boxes = boxes.astype(np.int32)
indices = cv2.dnn.NMSBoxes(boxes, scores, score_threshold=conf_thresold, nms_threshold=iou_threshold)
detections = []
def xywh2xyxy(x):
y = np.copy(x)
y[..., 0] = x[..., 0] - x[..., 2] / 2
y[..., 1] = x[..., 1] - x[..., 3] / 2
y[..., 2] = x[..., 0] + x[..., 2] / 2
y[..., 3] = x[..., 1] + x[..., 3] / 2
return y
for (bbox, score, label) in zip(xywh2xyxy(boxes[indices]), scores[indices], class_ids[indices]):
bbox = bbox.round().astype(np.int32).tolist()
cls_id = int(label)
cls = classes[cls_id]
cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), (0,0,255), 2, 8)
cv2.rectangle(image, (bbox[0], (bbox[1]-20)), (bbox[2], bbox[1]), (0,255,255), -1)
cv2.putText(image, f'{cls}', (bbox[0], bbox[1] - 5),
cv2.FONT_HERSHEY_PLAIN,2, [225, 0, 0], thickness=2)
end_time = cv2.getTickCount()
t = (end_time - start_time) / cv2.getTickFrequency()
fps = 1 / t
print(f"EStimated FPS: {fps:.2f}")
cv2.putText(image, 'FPS: {:.2f}'.format(fps), (20, 40), cv2.FONT_HERSHEY_PLAIN, 2, [225, 0, 0], 2, 8);
cv2.imshow("YOLO11-OPENVINO", image)
cv2.waitKey(0)
Cpp-Openvino
#include <fstream>
#include <opencv2/opencv.hpp>
#include <openvino/openvino.hpp>
using namespace std;
using namespace cv;
using namespace ov;
vector<string> readClassNames(const string& filename) {
vector<string> classNames;
ifstream file(filename);
if (!file.is_open()) {
cerr << "Error opening file: " << filename << endl;
return classNames;
}
string line;
while (getline(file, line)) {
if (!line.empty()) {
classNames.push_back(line);
}
}
file.close();
return classNames;
}
int main(int argc, char** argv)
{
string filename = "coco.names";
vector<string> labels = readClassNames(filename);
Mat image = imread("bus.jpg");
int ih = image.rows;
int iw = image.cols;
string model_path = "yolo11n.onnx";
float conf_threshold = 0.25;
float nms_threshold = 0.4;
float score_threshold = 0.25;
Core core;
auto compiled_model = core.compile_model(model_path, "CPU");
auto input_port = compiled_model.input();
auto input_shape = input_port.get_shape();
InferRequest infer_request = compiled_model.create_infer_request();
int64 start = getTickCount();
int w = image.cols;
int h = image.rows;
int _max = max(h, w);
Mat image_ = Mat::zeros(Size(_max, _max), CV_8UC3);
Rect roi(0, 0, w, h);
image.copyTo(image_(roi));
int input_w = input_shape[2];
int input_h = input_shape[3];
float x_factor = image_.cols / static_cast<float>(input_w);
float y_factor = image_.rows / static_cast<float>(input_h);
Mat blob = dnn::blobFromImage(image, 1.0 / 255.0, Size(input_w, input_h), Scalar(), true);
Tensor input_tensor(input_port.get_element_type(), input_port.get_shape(), blob.ptr(0));
infer_request.set_input_tensor(input_tensor);
infer_request.infer();
auto output = infer_request.get_output_tensor(0);
auto output_shape = output.get_shape();
float* data = output.data<float>();
Mat output_buffer(output_shape[1], output_shape[2], CV_32F, data);
transpose(output_buffer, output_buffer);
vector<Rect> boxes;
vector<int> classIds;
vector<float> confidences;
for (int i = 0; i < output_buffer.rows; i++) {
Mat classes_scores = output_buffer.row(i).colRange(4, 84);
Point classIdPoint;
double score;
minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);
if (score > score_threshold)
{
float cx = output_buffer.at<float>(i, 0);
float cy = output_buffer.at<float>(i, 1);
float ow = output_buffer.at<float>(i, 2);
float oh = output_buffer.at<float>(i, 3);
int x = static_cast<int>((cx - 0.5 * ow) * x_factor);
int y = static_cast<int>((cy - 0.5 * oh) * y_factor);
int width = static_cast<int>(ow * x_factor);
int height = static_cast<int>(oh * y_factor);
Rect box;
box.x = x;
box.y = y;
box.width = width;
box.height = height;
boxes.push_back(box);
classIds.push_back(classIdPoint.x);
confidences.push_back(score);
}
}
vector<int> indexes;
dnn::NMSBoxes(boxes, confidences, conf_threshold, nms_threshold, indexes);
for (size_t i = 0; i < indexes.size(); i++) {
int index = indexes[i];
int idx = classIds[index];
rectangle(image, boxes[index], Scalar(0, 0, 255), 2, 8);
rectangle(image, Point(boxes[index].tl().x, boxes[index].tl().y - 20),
Point(boxes[index].br().x, boxes[index].tl().y), Scalar(0, 255, 255), -1);
putText(image, labels[idx], Point(boxes[index].tl().x, boxes[index].tl().y), FONT_HERSHEY_PLAIN, 2.0, Scalar(255, 0, 0), 2, 8);
}
float t = (getTickCount() - start) / static_cast<float>(getTickFrequency());
putText(image, format("FPS: %.2f", 1 / t), Point(20, 40), FONT_HERSHEY_PLAIN, 2.0, Scalar(255, 0, 0), 2, 8);
imshow("YOLO11-OPENVINO", image);
waitKey(0);
return 0;
}
Python-Tensorrt
import os
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
import cv2
class HostDeviceMem(object):
def __init__(self, host_mem, device_mem):
self.host = host_mem
self.device = device_mem
def __str__(self):
return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
def __repr__(self):
return self.__str__()
def onnx_to_engine(onnx_file_path, engine_file_path, precision_type=None):
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
parser = trt.OnnxParser(network, TRT_LOGGER)
with open(onnx_file_path, 'rb') as model:
if not parser.parse(model.read()):
print('ERROR: Failed to parse the ONNX file.')
for error in range(parser.num_errors):
print(parser.get_error(error))
config = builder.create_builder_config()
config.max_workspace_size = 1 << 30
if precision_type == 'fp16':
config.set_flag(trt.BuilderFlag.FP16)
else:
print('WARNING: FP32 is used by default.')
profile = builder.create_optimization_profile()
config.add_optimization_profile(profile)
engine = builder.build_engine(network, config)
with open(engine_file_path, 'wb') as f:
f.write(engine.serialize())
def readClassesNames(file_path):
with open(file_path, encoding='utf-8') as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
conf_thresold = 0.25
iou_threshold = 0.45
score_thresold = 0.25
classes_names = 'coco.names'
onnx_path = 'yolo11n.onnx'
engine_path = 'yolo11n.engine'
classes = readClassesNames(classes_names)
image = cv2.imread("bus.jpg")
image_height, image_width = image.shape[:2]
TRT_LOGGER = trt.Logger(trt.Logger.ERROR)
if os.path.exists(engine_path):
with open(engine_path, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
engine = runtime.deserialize_cuda_engine(f.read())
input_shape = engine.get_binding_shape(engine.get_binding_index('images'))
input_width, input_height = input_shape[2:]
resized = cv2.resize(image, (input_width, input_height))
input_image = resized / 255.0
input_image = input_image.transpose(2, 0, 1)
input_tensor = input_image[np.newaxis, :, :, :].astype(np.float32)
start_time = cv2.getTickCount()
inputs_alloc_buf = []
outputs_alloc_buf = []
bindings_alloc_buf = []
stream_alloc_buf = cuda.Stream()
context = engine.create_execution_context()
data_type = []
for binding in engine:
if engine.binding_is_input(binding):
size = input_tensor.shape[0] * input_tensor.shape[1] * input_tensor.shape[2] * input_tensor.shape[3]
dtype = trt.nptype(engine.get_binding_dtype(binding))
data_type.append(dtype)
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
bindings_alloc_buf.append(int(device_mem))
inputs_alloc_buf.append(HostDeviceMem(host_mem, device_mem))
else:
size = trt.volume(engine.get_binding_shape(binding)[1:]) * engine.max_batch_size
host_mem = cuda.pagelocked_empty(size, data_type[0])
device_mem = cuda.mem_alloc(host_mem.nbytes)
bindings_alloc_buf.append(int(device_mem))
outputs_alloc_buf.append(HostDeviceMem(host_mem, device_mem))
inputs_alloc_buf[0].host = input_tensor.reshape(-1)
for inp in inputs_alloc_buf:
cuda.memcpy_htod_async(inp.device, inp.host, stream_alloc_buf)
context.set_binding_shape(0, input_tensor.shape)
context.execute_async(batch_size=1, bindings=bindings_alloc_buf, stream_handle=stream_alloc_buf.handle)
for out in outputs_alloc_buf:
cuda.memcpy_dtoh_async(out.host, out.device, stream_alloc_buf)
stream_alloc_buf.synchronize()
net_output = [out.host for out in outputs_alloc_buf]
outputs = net_output[0].reshape(84, 8400)
predictions = np.squeeze(outputs).T
print(predictions.shape)
scores = np.max(predictions[:, 4:], axis=1)
predictions = predictions[scores > score_thresold, :]
scores = scores[scores > score_thresold]
class_ids = np.argmax(predictions[:, 4:], axis=1)
boxes = predictions[:, :4]
input_shape = np.array([input_width, input_height, input_width, input_height])
boxes = np.divide(boxes, input_shape, dtype=np.float32)
boxes *= np.array([image_width, image_height, image_width, image_height])
boxes = boxes.astype(np.int32)
indices = cv2.dnn.NMSBoxes(boxes, scores, score_threshold=conf_thresold, nms_threshold=iou_threshold)
detections = []
def xywh2xyxy(x):
y = np.copy(x)
y[..., 0] = x[..., 0] - x[..., 2] / 2
y[..., 1] = x[..., 1] - x[..., 3] / 2
y[..., 2] = x[..., 0] + x[..., 2] / 2
y[..., 3] = x[..., 1] + x[..., 3] / 2
return y
for (bbox, score, label) in zip(xywh2xyxy(boxes[indices]), scores[indices], class_ids[indices]):
bbox = bbox.round().astype(np.int32).tolist()
cls_id = int(label)
cls = classes[cls_id]
cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), (0, 0, 255), 2, 8)
cv2.rectangle(image, (bbox[0], (bbox[1] - 20)), (bbox[2], bbox[1]), (0, 255, 255), -1)
cv2.putText(image, f'{cls}', (bbox[0], bbox[1] - 5),
cv2.FONT_HERSHEY_PLAIN, 2, [225, 0, 0], thickness=2)
end_time = cv2.getTickCount()
t = (end_time - start_time) / cv2.getTickFrequency()
fps = 1 / t
print(f"EStimated FPS: {fps:.2f}")
cv2.putText(image, 'FPS: {:.2f}'.format(fps), (20, 40), cv2.FONT_HERSHEY_PLAIN, 2, [225, 0, 0], 2, 8);
cv2.imshow("Python + Tensorrt + Yolo11 推理结果", image)
cv2.waitKey(0)
else:
onnx_to_engine(onnx_path, engine_path, 'fp16')
Cpp-Tensorrt
#include <windows.h>
#include <fstream>
#include <iostream>
#include <vector>
#include <NvInfer.h>
#include <NvOnnxParser.h>
#include <opencv2/opencv.hpp>
std::vector<std::string> read_class_names(std::string path_name)
{
std::vector<std::string> class_names;
std::ifstream infile;
infile.open(path_name.data());
assert(infile.is_open());
std::string str;
while (getline(infile, str)) {
class_names.push_back(str);
str.clear();
}
infile.close();
return class_names;
}
class Logger : public nvinfer1::ILogger
{
void log(Severity severity, const char* message) noexcept
{
if (severity != Severity::kINFO)
std::cout << message << std::endl;
}
} gLogger;
void onnx_to_engine(std::string onnx_file_path, std::string engine_file_path, std::string type) {
nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger);
const auto explicitBatch = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
nvinfer1::INetworkDefinition* network = builder->createNetworkV2(explicitBatch);
nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, gLogger);
parser->parseFromFile(onnx_file_path.c_str(), 2);
for (int i = 0; i < parser->getNbErrors(); ++i)
{
std::cout << "load error: " << parser->getError(i)->desc() << std::endl;
}
printf("tensorRT load mask onnx model successfully!!!...\n");
nvinfer1::IBuilderConfig* config = builder->createBuilderConfig();
config->setMaxWorkspaceSize(1 << 30);
if (type == "fp16") {
config->setFlag(nvinfer1::BuilderFlag::kFP16);
}
else {
std::cout << "WARNING: FP32 is used by default." << std::endl;
}
nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
std::cout << "try to save engine file now~~~" << std::endl;
std::ofstream file_ptr(engine_file_path, std::ios::binary);
if (!file_ptr) {
std::cerr << "could not open plan output file" << std::endl;
return;
}
nvinfer1::IHostMemory* model_stream = engine->serialize();
file_ptr.write(reinterpret_cast<const char*>(model_stream->data()), model_stream->size());
model_stream->destroy();
engine->destroy();
network->destroy();
parser->destroy();
std::cout << "convert onnx model to TensorRT engine model successfully!" << std::endl;
}
int main()
{
const char* model_path_onnx = "yolo11n.onnx";
const char* model_path_engine = "yolo11n.engine";
const char* image_path = "bus.jpg";
std::string label_path = "coco.names";
const char* input_node_name = "images";
const char* output_node_name = "output0";
int num_ionode = 2;
std::vector<std::string> class_names;
float factor;
std::ifstream f(model_path_engine);
bool engine_file_exist = f.good();
std::ifstream file_ptr(model_path_engine, std::ios::binary);
if (engine_file_exist) {
size_t size = 0;
file_ptr.seekg(0, file_ptr.end);
size = file_ptr.tellg();
file_ptr.seekg(0, file_ptr.beg);
char* model_stream = new char[size];
file_ptr.read(model_stream, size);
file_ptr.close();
Logger logger;
nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(logger);
nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(model_stream, size);
nvinfer1::IExecutionContext* context = engine->createExecutionContext();
delete[] model_stream;
void** data_buffer = new void* [num_ionode];
int input_node_index = engine->getBindingIndex(input_node_name);
nvinfer1::Dims input_node_dim = engine->getBindingDimensions(input_node_index);
size_t input_data_length = input_node_dim.d[1] * input_node_dim.d[2] * input_node_dim.d[3];
cudaMalloc(&(data_buffer[input_node_index]), input_data_length * sizeof(float));
int output_node_index = engine->getBindingIndex(output_node_name);
nvinfer1::Dims output_node_dim = engine->getBindingDimensions(output_node_index);
size_t output_data_length = output_node_dim.d[1] * output_node_dim.d[2];
cudaMalloc(&(data_buffer[output_node_index]), output_data_length * sizeof(float));
cv::Mat image = cv::imread(image_path);
int max_side_length = std::max(image.cols, image.rows);
cv::Mat max_image = cv::Mat::zeros(cv::Size(max_side_length, max_side_length), CV_8UC3);
cv::Rect roi(0, 0, image.cols, image.rows);
image.copyTo(max_image(roi));
cv::Size input_node_shape(input_node_dim.d[2], input_node_dim.d[3]);
int64 start = cv::getTickCount();
cv::Mat BN_image = cv::dnn::blobFromImage(max_image, 1 / 255.0, input_node_shape, cv::Scalar(0, 0, 0), true, false);
cudaStream_t stream;
cudaStreamCreate(&stream);
std::vector<float> input_data(input_data_length);
memcpy(input_data.data(), BN_image.ptr<float>(), input_data_length * sizeof(float));
cudaMemcpyAsync(data_buffer[input_node_index], input_data.data(), input_data_length * sizeof(float), cudaMemcpyHostToDevice, stream);
context->enqueueV2(data_buffer, stream, nullptr);
float* result_array = new float[output_data_length];
cudaMemcpyAsync(result_array, data_buffer[output_node_index], output_data_length * sizeof(float), cudaMemcpyDeviceToHost, stream);
factor = max_side_length / (float)input_node_dim.d[2];
class_names = read_class_names(label_path);
cv::Mat det_output = cv::Mat(84, 8400, CV_32F, result_array);
transpose(det_output, det_output);
std::vector<cv::Rect> position_boxes;
std::vector<int> classIds;
std::vector<float> confidences;
std::cout << det_output.rows << std::endl;
for (int i = 0; i < det_output.rows; i++) {
cv::Mat classes_scores = det_output.row(i).colRange(4, 84);
cv::Point classIdPoint;
double score;
minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);
if (score > 0.25)
{
float cx = det_output.at<float>(i, 0);
float cy = det_output.at<float>(i, 1);
float ow = det_output.at<float>(i, 2);
float oh = det_output.at<float>(i, 3);
int x = static_cast<int>((cx - 0.5 * ow) * factor);
int y = static_cast<int>((cy - 0.5 * oh) * factor);
int width = static_cast<int>(ow * factor);
int height = static_cast<int>(oh * factor);
cv::Rect box;
box.x = x;
box.y = y;
box.width = width;
box.height = height;
position_boxes.push_back(box);
classIds.push_back(classIdPoint.x);
confidences.push_back(score);
}
}
std::vector<int> indexes;
cv::dnn::NMSBoxes(position_boxes, confidences, 0.25, 0.45, indexes);
for (size_t i = 0; i < indexes.size(); i++) {
int index = indexes[i];
int idx = classIds[index];
cv::rectangle(image, position_boxes[index], cv::Scalar(0, 0, 255), 2, 8);
cv::rectangle(image, cv::Point(position_boxes[index].tl().x, position_boxes[index].tl().y - 20),
cv::Point(position_boxes[index].br().x, position_boxes[index].tl().y), cv::Scalar(0, 255, 255), -1);
cv::putText(image, class_names[idx], cv::Point(position_boxes[index].tl().x, position_boxes[index].tl().y), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 0, 0), 2, 8);
}
float t = (cv::getTickCount() - start) / static_cast<float>(cv::getTickFrequency());
cv::putText(image, cv::format("FPS: %.2f", 1 / t), cv::Point(20, 40), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 0, 0), 2, 8);
cv::imshow("C++ + Tensorrt + Yolo11 推理结果", image);
cv::waitKey(0);
}
else
{
onnx_to_engine(model_path_onnx, model_path_engine, "fp16");
}
return 0;
}
更多推荐



所有评论(0)