BVLC · mavenlin · Sep 13, 2014 · Sep 13, 2014 · Sep 13, 2014
diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp
@@ -16,6 +16,47 @@
 
 namespace caffe {
 
+/**
+ * @brief Cascadable Cross Channel Parametric Pooling.
+ *        Perform weighted recombination of the channels.
+ *        Equivalent to 1x1 convolution.
+ *
+ * When applied on top of convolutional layer, it is equivalent to
+ * fully connected layer applied on all patches of the underlying input.
+ * Stacking multiple of CCCPLayer results in a nonlinear mapping from each
+ * input patch to the output feature vector.
+ * It is equivalent to 1x1 convolution.
+ * But convolution in caffe needs im2col which is unnecessary.
+ * Refer to Network in Network [http://arxiv.org/abs/1312.4400].
+*/
+template <typename Dtype>
+class CCCPLayer : public Layer<Dtype> {
+ public:
+  explicit CCCPLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
+
+ protected:
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
+
+  int num_;
+  int channels_;
+  int height_;
+  int width_;
+  int num_output_;
+  int group_;
+  Blob<Dtype> bias_multiplier_;
+  bool bias_term_;
+};
+
 /**
  * @brief Convolves the input image with a bank of learned filters,
  *        and (optionally) adds biases.

diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp
@@ -187,6 +187,8 @@ Layer<Dtype>* GetLayer(const LayerParameter& param) {
     return new ArgMaxLayer<Dtype>(param);
   case LayerParameter_LayerType_BNLL:
     return new BNLLLayer<Dtype>(param);
+  case LayerParameter_LayerType_CCCP:
+    return new CCCPLayer<Dtype>(param);
   case LayerParameter_LayerType_CONCAT:
     return new ConcatLayer<Dtype>(param);
   case LayerParameter_LayerType_CONVOLUTION:

diff --git a/src/caffe/layers/cccp_layer.cpp b/src/caffe/layers/cccp_layer.cpp
@@ -0,0 +1,155 @@
+#include <vector>
+
+#include "caffe/filler.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/util/im2col.hpp"
+#include "caffe/util/math_functions.hpp"
+#include "caffe/vision_layers.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void CCCPLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top) {
+  CHECK_EQ(bottom.size(), 1)
+      << "CCCP Pooling Layer takes a single blob as input.";
+  CHECK_EQ(top->size(), 1)
+      << "CCCP Pooling Layer takes a single blob as output.";
+
+  num_output_ = this->layer_param_.cccp_param().num_output();
+  group_      = this->layer_param_.cccp_param().group();
+  bias_term_   = this->layer_param_.cccp_param().bias_term();
+
+  // Figure out the dimensions
+  channels_ = bottom[0]->channels();
+  width_ = bottom[0]->width();
+  height_ = bottom[0]->height();
+  num_ = bottom[0]->num();
+
+  CHECK_GT(num_output_, 0);
+  CHECK_EQ(channels_ % group_, 0);
+
+  (*top)[0]->Reshape(bottom[0]->num(), num_output_, height_, width_);
+
+  // Check if we need to set up the weights
+  if (this->blobs_.size() > 0) {
+    LOG(INFO) << "Skipping parameter initialization";
+  } else {
+    if (bias_term_) {
+      this->blobs_.resize(2);
+    } else {
+      this->blobs_.resize(1);
+    }
+    // Intialize the weight
+    this->blobs_[0].reset(new Blob<Dtype>(num_output_,
+            channels_ / group_, 1, 1));
+    // fill the weights
+    shared_ptr<Filler<Dtype> > weight_filler(
+        GetFiller<Dtype>(this->layer_param_.cccp_param().weight_filler()));
+    weight_filler->Fill(this->blobs_[0].get());
+    // If necessary, intiialize and fill the bias term
+    if (bias_term_) {
+      this->blobs_[1].reset(new Blob<Dtype>(1, num_output_, 1, 1));
+      shared_ptr<Filler<Dtype> > bias_filler(
+          GetFiller<Dtype>(this->layer_param_.cccp_param().bias_filler()));
+      bias_filler->Fill(this->blobs_[1].get());
+    }
+  }  // parameter initialization
+  // Setting up the bias multiplier
+  if (bias_term_) {
+    bias_multiplier_.Reshape(1, 1, 1, width_ * height_);
+    caffe_set(width_ * height_, Dtype(1), bias_multiplier_.mutable_cpu_data());
+  }
+  this->param_propagate_down_.resize(this->blobs_.size(), true);
+}
+
 template <typename Dtype>
+void CCCPLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top) {
+  const Dtype* bottom_data = bottom[0]->cpu_data();
+  Dtype* top_data = (*top)[0]->mutable_cpu_data();
+  const Dtype* weight = this->blobs_[0]->cpu_data();
+  const int weight_offset = num_output_ / group_ * channels_ / group_;
+  const int bottom_group_offset = width_ * height_ * channels_ / group_;
+  const int top_group_offset = width_ * height_ * num_output_ / group_;
+
+  for (int n = 0; n < num_; ++n) {
+    for (int g = 0; g < group_; ++g) {
+      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_ / group_,
+          width_ * height_, channels_ / group_, (Dtype)1.,
+          weight + g * weight_offset,
+          bottom_data + bottom[0]->offset(n) + g * bottom_group_offset,
+          (Dtype)0., top_data + (*top)[0]->offset(n) + g * top_group_offset);
+    }
+    if (bias_term_) {
+      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
+          width_ * height_, 1, (Dtype)1., this->blobs_[1]->cpu_data(),
+          reinterpret_cast<const Dtype*>(bias_multiplier_.cpu_data()),
+          (Dtype)1., top_data + (*top)[0]->offset(n));
+    }
+  }
+}
+
+template <typename Dtype>
+void CCCPLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
+  const Dtype* top_diff = top[0]->cpu_diff();
+  const Dtype* bottom_data = (*bottom)[0]->cpu_data();
+  const Dtype* weight = this->blobs_[0]->cpu_data();
+  Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff();
+  Dtype* bias_diff = NULL;
+  Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
+
+  const int weight_offset = num_output_ / group_ * channels_ / group_;
+  const int bottom_group_offset = width_ * height_ * channels_ / group_;
+  const int top_group_offset = width_ * height_ * num_output_ / group_;
+
+  // Gradient with respect to bias
+  if (bias_term_ && this->param_propagate_down_[1]) {
+    bias_diff = this->blobs_[1]->mutable_cpu_diff();
+    caffe_memset(sizeof(Dtype) * this->blobs_[1]->count(), 0, bias_diff);
+    for (int n = 0; n < num_; ++n) {
+      caffe_cpu_gemv<Dtype>(CblasNoTrans, num_output_,
+          width_ * height_, (Dtype)1., top_diff + top[0]->offset(n),
+          reinterpret_cast<const Dtype*>(bias_multiplier_.cpu_data()),
+          (Dtype)1., bias_diff);
+    }
+  }
+
+  if (this->param_propagate_down_[0] || propagate_down[0]) {
+    if (this->param_propagate_down_[0]) {
+      caffe_memset(sizeof(Dtype) * this->blobs_[0]->count(), 0, weight_diff);
+    }
+    for (int n = 0; n < num_; ++n) {
+      if (this->param_propagate_down_[0]) {
+        // The gradient will be accumulated
+        for (int g = 0; g < group_; ++g) {
+          // Gradient with respect to weight
+          caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, num_output_ / group_,
+              channels_ / group_, width_ * height_, (Dtype)1.,
+              top_diff + top[0]->offset(n) + g * top_group_offset,
+              bottom_data + (*bottom)[0]->offset(n) + g * bottom_group_offset,
+              (Dtype)1., weight_diff + g * weight_offset);
+        }
+      }
+      if (propagate_down[0]) {
+        for (int g = 0; g < group_; ++g) {
+          // Gradient w.r.t. bottom data if necessary
+          caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, channels_ / group_,
+              width_ * height_, num_output_ / group_, (Dtype)1.,
+              weight + g * weight_offset,
+              top_diff + top[0]->offset(n) + g * top_group_offset, (Dtype)0.,
+              bottom_diff + (*bottom)[0]->offset(n) + g * bottom_group_offset);
+        }
+      }
+    }
+  }
+}
+
+#ifdef CPU_ONLY
+STUB_GPU(CCCPLayer);
+#endif
+
+INSTANTIATE_CLASS(CCCPLayer);
+
+}  // namespace caffe
diff --git a/src/caffe/layers/cccp_layer.cu b/src/caffe/layers/cccp_layer.cu
@@ -0,0 +1,97 @@
+#include <vector>
+
+#include "caffe/filler.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/util/im2col.hpp"
+#include "caffe/util/math_functions.hpp"
+#include "caffe/vision_layers.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void CCCPLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top) {
+  const Dtype* bottom_data = bottom[0]->gpu_data();
+  Dtype* top_data = (*top)[0]->mutable_gpu_data();
+  const Dtype* weight = this->blobs_[0]->gpu_data();
+  const int weight_offset = num_output_ /group_ * channels_ / group_;
+  const int bottom_group_offset = width_ * height_ * channels_ / group_;
+  const int top_group_offset = width_ * height_ * num_output_ / group_;
+
+  for (int n = 0; n < num_; ++n) {
+    for (int g = 0; g < group_; ++g) {
+      caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_ / group_,
+          width_ * height_, channels_ / group_, (Dtype)1.,
+          weight + g * weight_offset,
+          bottom_data + bottom[0]->offset(n) + g * bottom_group_offset,
+          (Dtype)0., top_data + (*top)[0]->offset(n) + g * top_group_offset);
+    }
+    if (bias_term_) {
+      caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
+          width_ * height_, 1, (Dtype)1., this->blobs_[1]->gpu_data(),
+          reinterpret_cast<const Dtype*>(bias_multiplier_.gpu_data()),
+          (Dtype)1., top_data + (*top)[0]->offset(n));
+    }
+  }
+}
+
+template <typename Dtype>
+void CCCPLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
+  const Dtype* top_diff = top[0]->gpu_diff();
+  const Dtype* bottom_data = (*bottom)[0]->gpu_data();
+  const Dtype* weight = this->blobs_[0]->gpu_data();
+  Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff();
+  Dtype* bias_diff = NULL;
+  Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
+
+  const int weight_offset = num_output_ / group_ * channels_ / group_;
+  const int bottom_group_offset = width_ * height_ * channels_ / group_;
+  const int top_group_offset = width_ * height_ * num_output_ / group_;
+
+  // Gradient with respect to bias
+  if (bias_term_ && this->param_propagate_down_[1]) {
+    bias_diff = this->blobs_[1]->mutable_gpu_diff();
+    caffe_gpu_memset(sizeof(Dtype) * this->blobs_[1]->count(), 0, bias_diff);
+    for (int n = 0; n < num_; ++n) {
+      caffe_gpu_gemv<Dtype>(CblasNoTrans, num_output_,
+          width_ * height_, (Dtype)1., top_diff + top[0]->offset(n),
+          reinterpret_cast<const Dtype*>(bias_multiplier_.gpu_data()),
+          (Dtype)1., bias_diff);
+    }
+  }
+
+  if (this->param_propagate_down_[0] || propagate_down[0]) {
+    if (this->param_propagate_down_[0]) {
+      caffe_gpu_memset(sizeof(Dtype) * this->blobs_[0]->count(),
+          0, weight_diff);
+    }
+    for (int n = 0; n < num_; ++n) {
+      if (this->param_propagate_down_[0]) {
+        // The gradient will be accumulated
+        for (int g = 0; g < group_; ++g) {
+          // Gradient with respect to weight
+          caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, num_output_ / group_,
+              channels_ / group_, width_ * height_, (Dtype)1.,
+              top_diff + top[0]->offset(n) + g * top_group_offset,
+              bottom_data + (*bottom)[0]->offset(n) + g * bottom_group_offset,
+              (Dtype)1., weight_diff + g * weight_offset);
+        }
+      }
+      if (propagate_down[0]) {
+        for (int g = 0; g < group_; ++g) {
+          // Gradient w.r.t. bottom data if necessary
+          caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, channels_ / group_,
+              width_ * height_, num_output_ / group_, (Dtype)1.,
+              weight + g * weight_offset,
+              top_diff + top[0]->offset(n) + g * top_group_offset, (Dtype)0.,
+              bottom_diff + (*bottom)[0]->offset(n) + g * bottom_group_offset);
+        }
+      }
+    }
+  }
+}
+
+INSTANTIATE_CLASS(CCCPLayer);
+
+}  // namespace caffe
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
@@ -198,7 +198,7 @@ message NetStateRule {
 // NOTE
 // Update the next available ID when you add a new LayerParameter field.
 //
-// LayerParameter next available ID: 40 (last added: softmax_param)
+// LayerParameter next available ID: 41 (last added: cccp_param)
 message LayerParameter {
   repeated string bottom = 2; // the name of the bottom blobs
   repeated string top = 3; // the name of the top blobs
@@ -219,7 +219,7 @@ message LayerParameter {
   // line above the enum. Update the next available ID when you add a new
   // LayerType.
   //
-  // LayerType next available ID: 37 (last added: SILENCE)
+  // LayerType next available ID: 38 (last added: CCCP)
   enum LayerType {
     // "NONE" layer type is 0th enum element so that we don't cause confusion
     // by defaulting to an existent LayerType (instead, should usually error if
@@ -229,6 +229,7 @@ message LayerParameter {
     ACCURACY = 1;
     ARGMAX = 30;
     BNLL = 2;
+    CCCP = 37;
     CONCAT = 3;
     CONVOLUTION = 4;
     DATA = 5;
@@ -291,6 +292,7 @@ message LayerParameter {
 
   optional AccuracyParameter accuracy_param = 27;
   optional ArgMaxParameter argmax_param = 23;
+  optional CCCPParameter cccp_param = 40;
   optional ConcatParameter concat_param = 9;
   optional ConvolutionParameter convolution_param = 10;
   optional DataParameter data_param = 11;
@@ -359,6 +361,15 @@ message ArgMaxParameter {
   optional uint32 top_k = 2 [default = 1];
 }
 
+// Message that stores parameters used by CCCPLayer
+message CCCPParameter {
+  optional uint32 num_output = 1; // The number of outputs for the layer
+  optional bool bias_term = 2 [default = true]; // whether to have bias terms
+  optional uint32 group = 3 [default = 1]; // group size
+  optional FillerParameter weight_filler = 4; // The filler for the weight
+  optional FillerParameter bias_filler = 5; // The filler for the bias
+}
+
 // Message that stores parameters used by ConcatLayer
 message ConcatParameter {
   // Concat Layer needs to specify the dimension along the concat will happen,