Rk3566/入口588 yolov5部署(一)Ubuntu系统烧写及cmake安装
RK3566/RK3588 yolov5部署(二)NPU冒烟测试及rknn_api使用
RK3566/RK3588 YoloV5部署(四) yolov5多线程部署
代码同时支持RK3588和RK3566,运行时注意选择对应rknn模型:
通过网盘分享的文件:YoloV5_first_test
链接: https://pan.baidu.com/s/1ZSuBvndMpP6xuhZsuVX8ow?pwd=4xh9 提取码: 4xh9
1.代码编译运行
解压后进入目录,在cmakelists.txt路径下输入
cmake -S . -B build
cmake --build build
编译结束后进入build文件夹下输入
./yolov5_video ../weight/rk3566/yolov5s.rknn ../video/test.mp4
注意以上为rk3566平台,如使用rk3588,使用下面的命令
./yolov5_video ../weight/rk3588/yolov5s.rknn ../video/test.mp4
使用yolov5s模型对video下的test.mp4检测,输出结果保存在build文件夹下output.avi中
推理过程中终端会打印实时FPS
可以看到rk3566平台下单线程运行yolov5s模型的帧率大概13帧,经过测试yolov5n模型帧率能达到25帧。
结果:
开一个新终端,输入htop查看当前cpu占用
再开一个新终端输入 sudo watch -n 1 cat /sys/kernel/debug/rknpu/load 查看npu占用
可以看到即使是rk3566平台,单线程跑yolov5s,NPU和CPU均未占满,在之后引入线程池并行推理,帧率还会有提升(rk3588有3个NPU,在多线程下帧率将明显提升)。
下面附上main.cpp代码(后处理部分使用瑞芯微官方代码):
//引入opencv,rknn等库
#include <opencv2/opencv.hpp>
#include <opencv2/videoio.hpp>
#include "rknn_api.h" //rknn_api.h内部已经包含extern "C"
#include "../include/postprocess.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <chrono>
#include <fstream>
#include <iostream>
#include <vector>
using namespace std;
using namespace cv;
unsigned char* load_model(const char* model_path, int* model_len)
{
FILE *fp = fopen(model_path,"rb");
if(fp == nullptr)
{
cerr << "open model faile!" << endl;
return NULL;
}
fseek(fp,0,SEEK_END);
int model_size = ftell(fp);
unsigned char *model = (unsigned char *)malloc(model_size);
fseek(fp,0,SEEK_SET);
if(model_size != fread(model,1,model_size,fp))
{
cerr << "read model faile!" << endl;
return NULL;
}
*model_len = model_size;
if(fp)
{
fclose(fp);
}
return model;
}
void dump_tensor_attr_info(rknn_tensor_attr *attr)
{
cout << "index= " << attr->index << endl;
cout << "name= " << attr->name << endl;
cout << "n_dims= " << attr->n_dims << endl;
cout << "dims= " ;
for(int i = 0; i < attr->n_dims; i++)
{
cout << attr->dims[i] << " ";
}
cout << endl;
cout << "n_elems= " << attr->n_elems << endl;
cout << "size= " << attr->size << endl;
cout << "fmt= " << get_format_string(attr->fmt) << endl;
cout << "type= " << get_type_string(attr->type) << endl;
cout << "qnt_type= " << get_qnt_type_string(attr->qnt_type) << endl;
cout << "zp= " << attr->zp << endl;
cout << "scale= " << attr->scale << endl;
}
int main(int argc, char* argv[])
{
int img_width = 0;
int img_height = 0;
int channel = 3;
int width = 0;
int height = 0;
const float nms_threshold = NMS_THRESH;
const float box_conf_threshold = BOX_THRESH;
Mat orig_img;
Mat img;
Mat resized_img;
VideoCapture capture;
rknn_context ctx = 0;
int model_len = 0;
unsigned char *model = 0;
float f;
float FPS[16];
int i, Fcnt=0;
std::chrono::steady_clock::time_point Tbegin, Tend;
for(int i = 0; i < 16; i++) FPS[i] = 0.0;
if (argc < 2)
{
cerr <<"Usage:"<< argv[0] << "<rknn model> <video_path>" << endl;
return -1;
}
const char *model_path = argv[1];
char *video_path = nullptr;
if(argc > 2)
{
video_path = argv[2];
}
cout << "load model ..." << endl;
model = load_model(model_path, &model_len);
int ret = rknn_init(&ctx, model, model_len,0,NULL);
if(ret < 0)
{
cerr << "rknn init fail!" << endl;
return -1;
}
rknn_input_output_num io_num;
ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
if(ret != RKNN_SUCC)
{
cerr << "rknn query num fail!" << endl;
return -1;
}
cout << "model input num: " << io_num.n_input << " ,output num: " << io_num.n_output << endl;
cout << "model input attr: " << endl;
rknn_tensor_attr input_attrs[io_num.n_input];
memset(input_attrs, 0, sizeof(input_attrs));
for(int i = 0; i < io_num.n_input; i++)
{
input_attrs[i].index = i;
ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]), sizeof(rknn_tensor_attr));
if(ret != RKNN_SUCC)
{
cerr << "rknn query input_attr fail!" << endl;
return -1;
}
dump_tensor_attr_info(&(input_attrs[i]));
}
cout << "model output attr: " << endl;
rknn_tensor_attr output_attrs[io_num.n_output];
memset(output_attrs, 0, sizeof(output_attrs));
for(int i = 0; i < io_num.n_output; i++)
{
output_attrs[i].index = i;
ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]), sizeof(rknn_tensor_attr));
if(ret != RKNN_SUCC)
{
cerr << "rknn query output_attr fail!" << endl;
return -1;
}
dump_tensor_attr_info(&(output_attrs[i]));
}
if(input_attrs[0].fmt == RKNN_TENSOR_NCHW)
{
cout << "model input fmt RKNN_TENSOR_NCHW" << endl;
channel = input_attrs[0].dims[1];
height = input_attrs[0].dims[2];
width = input_attrs[0].dims[3];
}
if(input_attrs[0].fmt == RKNN_TENSOR_NHWC)
{
cout << "model input fmt RKNN_TENSOR_NHWC" << endl;
height = input_attrs[0].dims[1];
width = input_attrs[0].dims[2];
channel = input_attrs[0].dims[3];
}
cout << "input image height: " << height << " width: " << width << " channels: " << channel << endl;
rknn_input inputs[io_num.n_input];
memset(inputs, 0, sizeof(inputs));
for(int i = 0; i < io_num.n_input; i++)
{
inputs[i].index = i;
inputs[i].type = RKNN_TENSOR_UINT8;
inputs[i].size = height * width * channel;
inputs[i].fmt = input_attrs[i].fmt;
inputs[i].pass_through = 0;
}
int set_fps = 10;
if (video_path == nullptr)
{
if (!capture.open(0))
{
cerr << "capture.open(0) failed" << endl;
return -1;
}
}
else
{
if (!capture.open(video_path))
{
cerr << "capture.open( " << video_path << " ) failed" << endl;
return -1;
}
set_fps = static_cast<int>(capture.get(CAP_PROP_FPS));
}
Size size = Size(capture.get(CAP_PROP_FRAME_WIDTH), capture.get(CAP_PROP_FRAME_HEIGHT));
VideoWriter writer;
writer.open("./output.avi", VideoWriter::fourcc('M', 'J', 'P', 'G'), set_fps, size, true);
cout << "press ctrl c to terminated..." << endl;
while(capture.read(orig_img))
{
if(orig_img.empty())
{
cerr << "capture.read error!" << endl;
break;
}
Tbegin = chrono::steady_clock::now();
cvtColor(orig_img, img, COLOR_BGR2RGB);
int img_width = img.cols;
int img_height = img.rows;
if (img_width != width || img_height != height)
{
resize(img,resized_img,Size(width,height));
inputs[0].buf = (void*)resized_img.data;
}
else
{
inputs[0].buf = (void*)img.data;
}
ret = rknn_inputs_set(ctx, io_num.n_input, inputs);
rknn_output outputs[io_num.n_output];
memset(outputs, 0, sizeof(outputs));
for(int i = 0; i < io_num.n_output; i++)
{
outputs[i].index = i;
outputs[i].want_float = 0;
}
ret = rknn_run(ctx, NULL);
ret = rknn_outputs_get(ctx, io_num.n_output, outputs, NULL);
float scale_w = (float)width / img_width;
float scale_h = (float)height / img_height;
detect_result_group_t detect_result_group;
vector<float> out_scales;
vector<int32_t> out_zps;
for(int i = 0; i < io_num.n_output; i++)
{
out_scales.push_back(output_attrs[i].scale);
out_zps.push_back(output_attrs[i].zp);
}
post_process((int8_t *)outputs[0].buf, (int8_t *)outputs[1].buf, (int8_t *)outputs[2].buf,
height, width,box_conf_threshold, nms_threshold, scale_w, scale_h,
out_zps, out_scales, &detect_result_group);
char text[256];
for (int i = 0; i < detect_result_group.count; i++)
{
detect_result_t* det_result = &(detect_result_group.results[i]);
int x1 = det_result->box.left;
int y1 = det_result->box.top;
int x2 = det_result->box.right;
int y2 = det_result->box.bottom;
rectangle(orig_img, Point(x1, y1), Point(x2, y2), Scalar(255, 0, 0),10);
sprintf(text, "%s %.1f%%", det_result->name, det_result->prop * 100);
int baseLine = 0;
Size label_size = getTextSize(text, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = det_result->box.left;
int y = det_result->box.top - label_size.height - baseLine;
if (y < 0) y = 0;
if (x + label_size.width > orig_img.cols) x = orig_img.cols - label_size.width;
rectangle(orig_img, Rect(Point(x, y), Size(label_size.width, label_size.height + baseLine)), Scalar(255, 255, 255), -1);
putText(orig_img, text, Point(x, y + label_size.height), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0));
}
Tend = chrono::steady_clock::now();
ret = rknn_outputs_release(ctx, io_num.n_output, outputs);
f = chrono::duration_cast <chrono::milliseconds> (Tend - Tbegin).count();
if(f > 0.0) FPS[((Fcnt++)&0x0F)]=1000.0/f;
for(f=0.0, i=0;i<16;i++){ f+=FPS[i]; }
putText(orig_img, format("FPS %0.2f", f/16),Point(10,20),FONT_HERSHEY_SIMPLEX,0.6, Scalar(0, 0, 255));
cout << "FPS" << f/16 << endl;
writer.write(orig_img);
}
if (ctx > 0)
{
rknn_destroy(ctx);
}
if(model)
{
free(model);
}
return 0;
}
cmakelists.txt:
cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
project(rk3566-demo VERSION 0.0.1 LANGUAGES CXX)
message(STATUS "System: ${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_VERSION}")
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
set(LIB_ARCH "aarch64")
set(RKNN_API_PATH ${CMAKE_CURRENT_SOURCE_DIR}/librknn_api)
set(RKNN_API_INCLUDE_PATH ${RKNN_API_PATH}/include)
set(RKNN_API_LIB_PATH ${RKNN_API_PATH}/${LIB_ARCH}/librknnrt.so)
set(3RDPARTY_PATH ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty)
set(OpenCV_DIR ${3RDPARTY_PATH}/opencv/opencv-linux-${LIB_ARCH}/lib)
find_package(OpenCV REQUIRED)
message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}")
include_directories(
${OpenCV_INCLUDE_DIRS}
${RKNN_API_INCLUDE_PATH}
${CMAKE_CURRENT_SOURCE_DIR}/include
)
add_library(nn_process SHARED
src/postprocess.cpp
)
add_executable(yolov5_video src/main.cpp)
target_link_libraries(yolov5_video
nn_process
${RKNN_API_LIB_PATH}
${OpenCV_LIBS}
)