准备工作:
1、点云数据标注:https://blog.csdn.net/m0_64293675/article/details/144189633?spm=1001.2014.3001.5502
2、数据集制作:https://blog.csdn.net/m0_64293675/article/details/144188916?spm=1001.2014.3001.5502
3、模型训练:https://blog.csdn.net/m0_64293675/article/details/144294201?spm=1001.2014.3001.5502
4、模型转换:https://blog.csdn.net/m0_64293675/article/details/146528600?spm=1001.2014.3001.5502
TensorRT部署将会用到2个模型文件:3d稀疏卷积网络onnx模型、neck+head 网络的.plan模型。
目录
- 一、拉取源码
- 二、基于数据集和模型去适配代码
- Lidar_AI_Solution/CUDA-CenterPoint/main.cpp
- Lidar_AI_Solution/CUDA-CenterPoint/include/common.h
- Lidar_AI_Solution/CUDA-CenterPoint/include/postprocess.h
- Lidar_AI_Solution/CUDA-CenterPoint/include/centerpoint.h
- Lidar_AI_Solution/CUDA-CenterPoint/src/centerpoint.cpp
- Lidar_AI_Solution/CUDA-CenterPoint/src/postprocess_kernels.cu
- Lidar_AI_Solution/CUDA-CenterPoint/CMakeLists.txt
- 关于libspconv库
- 三、编译运行
一、拉取源码
git clone https://github.com/NVIDIA-AI-IOT/Lidar_AI_Solution.git
得到如下目录结构,只需要关注CUDA-CenterPoint中的文件就行
二、基于数据集和模型去适配代码
由于这个工程是基于nucs数据集的centerpoint目标检测,输出结果里有速度值,所以代码中有关目标速度的信息都要做修改,相应数据的维度也要修改,如下:
Lidar_AI_Solution/CUDA-CenterPoint/main.cpp
1、Model_File的路径修改成自己的.plan文件的路径
2、SaveBoxPred函数,注释ofs << box.vx << " ";
和ofs << box.vy << " ";
Lidar_AI_Solution/CUDA-CenterPoint/include/common.h
1、DET_CHANNEL改成9,NUM_TASKS改成1
2、Params类中,各种参数要根据实际的训练参数去修改。需要注意的是,feature_num要改成传入点云的数据维度,比如读取到的点云数据是xyzi,feature_num就要改成4,如果是xyz,feature_num就要改成3
Lidar_AI_Solution/CUDA-CenterPoint/include/postprocess.h
1、Bndbox类
Bndbox(float x_, float y_, float z_, float w_, float l_, float h_, float rt_, int id_, float score_)
: x(x_), y(y_), z(z_), w(w_), l(l_), h(h_),rt(rt_), id(id_), score(score_) {}
Lidar_AI_Solution/CUDA-CenterPoint/include/centerpoint.h
1、新增一行typedef struct float9 { float val[9]; } float9;
2、将std::vector<float11> detections_;
改成std::vector<float9> detections_;
Lidar_AI_Solution/CUDA-CenterPoint/src/centerpoint.cpp
1、在构造函数中修改3d稀疏卷积网络onnx的路径
2、在构造函数中修改detections_.resize
detections_.resize(MAX_DET_NUM, {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
3、在构造函数中注释速度相关的部分
4、在析构函数中注释速度相关的部分
5、doinfer函数
// auto result = scn_engine_->forward(
// {valid_num, 5}, spconv::DType::Float16, d_voxel_features,
// {valid_num, 4}, spconv::DType::Int32, d_voxel_indices,
// 1, sparse_shape, stream
// );
auto result = scn_engine_->forward(
{valid_num, 4}, spconv::DType::Float16, d_voxel_features,
{valid_num, 4}, spconv::DType::Int32, d_voxel_indices,
1, sparse_shape, stream
);
timing_scn_engine_.push_back(timer_.stop("3D Backbone", verbose_));
timer_.start(stream);
// trt_->forward({result->features_data(), d_reg_[0], d_height_[0], d_dim_[0], d_rot_[0], d_vel_[0], d_hm_[0],
// d_reg_[1], d_height_[1], d_dim_[1], d_rot_[1], d_vel_[1], d_hm_[1],
// d_reg_[2], d_height_[2], d_dim_[2], d_rot_[2], d_vel_[2], d_hm_[2],
// d_reg_[3], d_height_[3], d_dim_[3], d_rot_[3], d_vel_[3], d_hm_[3],
// d_reg_[4], d_height_[4], d_dim_[4], d_rot_[4], d_vel_[4], d_hm_[4],
// d_reg_[5], d_height_[5], d_dim_[5], d_rot_[5], d_vel_[5], d_hm_[5]}, stream);
trt_->forward({result->features_data(), d_reg_[0], d_height_[0], d_dim_[0], d_rot_[0], d_hm_[0]},stream);
6、在doPostDecodeCuda之前,新增速度相关的赋值
d_vel_[i_task] = nullptr;
vel_c_ = 0;
7、修改float11为float9,修改下角标数字
std::sort(detections_.begin(), detections_.end(),
[](float9 boxes1, float9 boxes2) { return boxes1.val[8] > boxes2.val[8]; });
nms_pred_.push_back(Bndbox(detections_[i_nms].val[0], detections_[i_nms].val[1], detections_[i_nms].val[2],
detections_[i_nms].val[3], detections_[i_nms].val[4], detections_[i_nms].val[5],
detections_[i_nms].val[6],
params_.task_num_stride[i_task] + static_cast<int>(detections_[i_nms].val[7]), detections_[i_nms].val[8]));
Lidar_AI_Solution/CUDA-CenterPoint/src/postprocess_kernels.cu
1、核函数predictKernel,注释速度相关的部分,修改rs的计算
float3 dim_;
dim_.x = exp(__half2float(dim[n * C_dim * HW + 0 * HW + h * W + w]));
dim_.y = exp(__half2float(dim[n * C_dim * HW + 1 * HW + h * W + w]));
dim_.z = exp(__half2float(dim[n * C_dim * HW + 2 * HW + h * W + w]));
// auto vx = __half2float(vel[n * C_vel * HW + 0 * HW + h * W + w]);
// auto vy = __half2float(vel[n * C_vel * HW + 1 * HW + h * W + w]);
// auto rs = atan2(__half2float(rot[n * C_rot * HW + h * W + w]), __half2float(rot[n * C_rot * HW + HW + h * W + w]));
auto rs = atan2(__half2float(rot[n * C_rot * HW + HW + h * W + w]), __half2float(rot[n * C_rot * HW + 0*HW + h * W + w]));
*(float3 *)(&detections[n * MAX_DET_NUM * DET_CHANNEL + DET_CHANNEL * curDet + 0]) = make_float3(xs, ys, zs);
*(float3 *)(&detections[n * MAX_DET_NUM * DET_CHANNEL + DET_CHANNEL * curDet + 3]) = dim_;
// detections[n * MAX_DET_NUM * DET_CHANNEL + DET_CHANNEL * curDet + 6] = vx;
// detections[n * MAX_DET_NUM * DET_CHANNEL + DET_CHANNEL * curDet + 7] = vy;
// *(float3 *)(&detections[n * MAX_DET_NUM * DET_CHANNEL + DET_CHANNEL * curDet + 8]) = make_float3(rs, label, score);
*(float3 *)(&detections[n * MAX_DET_NUM * DET_CHANNEL + DET_CHANNEL * curDet + 6]) = make_float3(rs, label, score);
2、核函数permute_cuda
__global__ void permute_cuda(const float *boxes_sorted,
float * permute_boxes_sorted, int boxes_num){
const int tid = threadIdx.x;
if(tid < boxes_num){
permute_boxes_sorted[tid * DET_CHANNEL + 0] = boxes_sorted[tid * DET_CHANNEL + 0];
permute_boxes_sorted[tid * DET_CHANNEL + 1] = boxes_sorted[tid * DET_CHANNEL + 1];
permute_boxes_sorted[tid * DET_CHANNEL + 2] = boxes_sorted[tid * DET_CHANNEL + 2];
permute_boxes_sorted[tid * DET_CHANNEL + 3] = boxes_sorted[tid * DET_CHANNEL + 4];
permute_boxes_sorted[tid * DET_CHANNEL + 4] = boxes_sorted[tid * DET_CHANNEL + 3];
permute_boxes_sorted[tid * DET_CHANNEL + 5] = boxes_sorted[tid * DET_CHANNEL + 5];
// HALF_PI : 6 航向角
// permute_boxes_sorted[tid * DET_CHANNEL + 6] = -boxes_sorted[tid * DET_CHANNEL + 8] - HALF_PI;
permute_boxes_sorted[tid * DET_CHANNEL + 6] = -boxes_sorted[tid * DET_CHANNEL + 6] - HALF_PI;
}
}
Lidar_AI_Solution/CUDA-CenterPoint/CMakeLists.txt
1、只需要根据自己服务器的架构修改cuda和tensorrt路径即可,如下所示
关于libspconv库
Lidar_AI_Solution工程的readme里提到libspconv库只支持三种平台,如下所示
但是我自己在多台服务器上(NVIDIA平台)部署过相同的代码,都是没啥问题的,根据架构选择libspconv.so即可,如下所示
如果直接使用Lidar_AI_Solution工程里的libspconv.so库有问题的话,Issues里有人给出了libspconv源码,可以自行下载编译:https://github.com/NVIDIA-AI-IOT/Lidar_AI_Solution/issues/281
三、编译运行
编译
cd Lidar_AI_Solution/CUDA-CenterPoint
mkdir build
cd build
cmake ..
make
运行
./centerpoint_infer ../data/test/ --verbose"