【caffe源码研究】第三章：源码篇(10) ：ConvolutionLayer

最新推荐文章于 2021-04-27 18:03:50 发布

FrankJingle

最新推荐文章于 2021-04-27 18:03:50 发布

阅读量2k

点赞数

CC 4.0 BY-SA版权

分类专栏： Deep Learning Caffe

本文链接：https://blog.csdn.net/fangjin_kl/article/details/54124548

Deep Learning 同时被 2 个专栏收录

46 篇文章

订阅专栏

Caffe

33 篇文章

订阅专栏

本文详细介绍CNN中的卷积层实现原理，包括卷积核配置、权重初始化、正向传播及反向传播过程。并介绍了BaseConvolutionLayer、ConvolutionLayer及CuDNNConvolutionLayer等关键类的功能与实现。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

简介

CNN最经典的几个层，这里介绍一下卷积层。

这里写图片描述

这张图总结的不太全面，但是基本表现出了卷积层的继承关系。

BaseConvolutionLayer

其继承自Layer，是一个卷积以及反卷积操作的基类，首先我们来看BaseConvolutionLayer的LayerSetUp函数

void BaseConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
      //首先这里主要是在配置卷积kernel 的size,padding,stride以及inputs
      ConvolutionParameter conv_param = this->layer_param_.convolution_param();
      force_nd_im2col_ = conv_param.force_nd_im2col();
      channel_axis_ = bottom[0]->CanonicalAxisIndex(conv_param.axis());
      const int first_spatial_axis = channel_axis_ + 1;
      const int num_axes = bottom[0]->num_axes();
      num_spatial_axes_ = num_axes - first_spatial_axis;
      CHECK_GE(num_spatial_axes_, 0);
      vector<int> bottom_dim_blob_shape(1, num_spatial_axes_ + 1);
      vector<int> spatial_dim_blob_shape(1, std::max(num_spatial_axes_, 1));
      // 设置kernel的dimensions
      kernel_shape_.Reshape(spatial_dim_blob_shape);
      int* kernel_shape_data = kernel_shape_.mutable_cpu_data();

接着是设置相应的stride dimensions，对于2D，设置在h和w方向上的stride，代码太长列出简要的

pad_.Reshape(spatial_dim_blob_shape);
int* pad_data = pad_.mutable_cpu_data();
pad_data[0] = conv_param.pad_h();
pad_data[1] = conv_param.pad_w();
......一堆if else判断

对于kernel的pad也做相应设置

pad_.Reshape(spatial_dim_blob_shape);
int* pad_data = pad_.mutable_cpu_data();
pad_data[0] = conv_param.pad_h();
pad_data[1] = conv_param.pad_w();

接下来是对weights 和bias设置和填充，其中blob[0]里面存放的是filter weights,而blob[1]里面存放的是biases，当然biases是可选的，也可以没有

//设置相应的shape，并检查
vector<int> weight_shape(2);
weight_shape[0] = conv_out_channels_;
weight_shape[1] = conv_in_channels_ / group_;

bias_term_ = this->layer_param_.convolution_param().bias_term();
vector<int> bias_shape(bias_term_, num_output_);

//填充权重
this->blobs_[0].reset(new Blob<Dtype>(weight_shape));
shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>(
    this->layer_param_.convolution_param().weight_filler()));
weight_filler->Fill(this->blobs_[0].get());
//填充偏置项
if (bias_term_) {
  this->blobs_[1].reset(new Blob<Dtype>(bias_shape));
  shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>(
      this->layer_param_.convolution_param().bias_filler()));
  bias_filler->Fill(this->blobs_[1].get());
}

ConvolutionLayer

ConvolutionLayer继承了BaseConvolutionLayer，主要作用就是将一副image做卷积操作，使用学到的filter的参数和biaes。同时在Caffe里面，卷积操作做了优化，变成了一个矩阵相乘的操作。其中有两个比较主要的函数是im2col以及col2im。
图中上半部分是一个传统卷积，下图是一个矩阵相乘的版本
这里写图片描述

下图是在一个卷积层中将卷积操作展开的具体操作过程，他里面按照卷积核的大小取数据然后展开，在同一张图里的不同卷积核选取的逐行摆放，不同N的话，就在同一行后面继续拼接，不同个可以是多个通道，但是需要注意的是同一行里面每一段都应该对应的是原图中中一个位置的卷积窗口。

这里写图片描述

对于卷积层中的卷积操作，还有一个group的概念要说明一下，groups是代表filter 组的个数。引入gruop主要是为了选择性的连接卷积层的输入端和输出端的channels，否则参数会太多。每一个group 和1/ group的input 通道和 1/group 的output通道进行卷积操作。比如有4个input， 8个output，那么1-4属于第一组，5-8属于第二个gruop。

ConvolutionLayer里面，主要重写了Forward_cpu和Backward_cpu

void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  const Dtype* weight = this->blobs_[0]->cpu_data();
  for (int i = 0; i < bottom.size(); ++i) {
    const Dtype* bottom_data = bottom[i]->cpu_data();
    Dtype* top_data = top[i]->mutable_cpu_data();
    for (int n = 0; n < this->num_; ++n) {
      this->forward_cpu_gemm(bottom_data + n * this->bottom_dim_, weight,
          top_data + n * this->top_dim_);
      if (this->bias_term_) {
        const Dtype* bias = this->blobs_[1]->cpu_data();
        this->forward_cpu_bias(top_data + n * this->top_dim_, bias);
      }
    }
  }
}

可以看到其实这里面他调用了forward_cpu_gemm，而这个函数内部又调用了math_function里面的caffe_cpu_gemm的通用矩阵相乘接口，GEMM的全称是General Matrix Matrix Multiply。其基本形式如下：

C = a l p h a * o p (A) * o p (B) + b e t a * C,

$C=alpha∗op(A)∗op(B)+beta∗C,$

注意，其中this->forward_cpu_gemm(bottom_data + n * this->bottom_dim_, weight, top_data + n * this->top_dim_); 在循环里，this->num_指的是batch的个数，bottom_data + n * this->bottom_dim_也就是跳到第i个图来做矩阵运算。

反向传播计算过程是残差与卷积模板的转置做卷积。因此代码类似。

反向传播代码如下。

template <typename Dtype>
void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
   //反向传播梯度误差
  const Dtype* weight = this->blobs_[0]->cpu_data();
  Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff();
  for (int i = 0; i < top.size(); ++i) {
    const Dtype* top_diff = top[i]->cpu_diff();
    const Dtype* bottom_data = bottom[i]->cpu_data();
    Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();

    //如果有bias项，计算Bias导数
    if (this->bias_term_ && this->param_propagate_down_[1]) {
      Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff();
      for (int n = 0; n < this->num_; ++n) {
        this->backward_cpu_bias(bias_diff, top_diff + n * this->top_dim_);
      }
    }
    //计算weight
    if (this->param_propagate_down_[0] || propagate_down[i]) {
      for (int n = 0; n < this->num_; ++n) {
        // 计算weights权重的梯度
        if (this->param_propagate_down_[0]) {
          this->weight_cpu_gemm(bottom_data + n * this->bottom_dim_,
              top_diff + n * this->top_dim_, weight_diff);
        }
        //计算botttom数据的梯度，下后传递
        if (propagate_down[i]) {
          this->backward_cpu_gemm(top_diff + n * this->top_dim_, weight,
              bottom_diff + n * this->bottom_dim_);
        }
      }
    }
  }
}

CuDNNConvolutionLayer

直接调用了cudnnConvolutionForward来计算前向过程。

template <typename Dtype>
void CuDNNConvolutionLayer<Dtype>::Forward_gpu(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  const Dtype* weight = this->blobs_[0]->gpu_data();
  for (int i = 0; i < bottom.size(); ++i) {
    const Dtype* bottom_data = bottom[i]->gpu_data();
    Dtype* top_data = top[i]->mutable_gpu_data();

    // Forward through cuDNN in parallel over groups.
    for (int g = 0; g < this->group_; g++) {
      // Filters.
      CUDNN_CHECK(cudnnConvolutionForward(handle_[g],
            cudnn::dataType<Dtype>::one,
            bottom_descs_[i], bottom_data + bottom_offset_ * g,
            filter_desc_, weight + this->weight_offset_ * g,
            conv_descs_[i],
            fwd_algo_[i], workspace[g], workspace_fwd_sizes_[i],
            cudnn::dataType<Dtype>::zero,
            top_descs_[i], top_data + top_offset_ * g));

      // Bias.
      if (this->bias_term_) {
        const Dtype* bias_data = this->blobs_[1]->gpu_data();
        CUDNN_CHECK(cudnnAddTensor(handle_[g],
              cudnn::dataType<Dtype>::one,
              bias_desc_, bias_data + bias_offset_ * g,
              cudnn::dataType<Dtype>::one,
              top_descs_[i], top_data + top_offset_ * g));
      }
    }

    // Synchronize the work across groups, each of which went into its own
    // stream, by launching an empty kernel into the default (null) stream.
    // NOLINT_NEXT_LINE(whitespace/operators)
    sync_conv_groups<<<1, 1>>>();
  }
}

反向过程类似，略过。