From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Thu, 3 Jul 2014 22:33:12 +0000 (-0700)
Subject: ConvolutionLayer can take N bottom blobs and N top blobs
X-Git-Tag: submit/tizen/20180823.020014~653^2~88^2
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a2b287472d7d85f997d2621bc4a2486c3837f6ba;p=platform%2Fupstream%2Fcaffeonacl.git

ConvolutionLayer can take N bottom blobs and N top blobs
---

diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp
index 9ec8da4..67913bf 100644
--- a/src/caffe/layers/conv_layer.cpp
+++ b/src/caffe/layers/conv_layer.cpp
@@ -22,6 +22,16 @@ void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
   channels_ = bottom[0]->channels();
   height_ = bottom[0]->height();
   width_ = bottom[0]->width();
+  // TODO: generalize to handle inputs of different shapes.
+  for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) {
+    CHECK_EQ(num_, bottom[bottom_id]->num()) << "Inputs must have same num.";
+    CHECK_EQ(channels_, bottom[bottom_id]->channels())
+        << "Inputs must have same channels.";
+    CHECK_EQ(height_, bottom[bottom_id]->height())
+        << "Inputs must have same height.";
+    CHECK_EQ(width_, bottom[bottom_id]->width())
+        << "Inputs must have same width.";
+  }
   num_output_ = this->layer_param_.convolution_param().num_output();
   CHECK_GT(num_output_, 0);
   CHECK_EQ(channels_ % group_, 0);
@@ -39,7 +49,9 @@ void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
   M_ = num_output_ / group_;
   K_ = channels_ * kernel_size_ * kernel_size_ / group_;
   N_ = height_out * width_out;
-  (*top)[0]->Reshape(bottom[0]->num(), num_output_, height_out, width_out);
+  for (int top_id = 0; top_id < top->size(); ++top_id) {
+    (*top)[top_id]->Reshape(num_, num_output_, height_out, width_out);
+  }
   // Check if we need to set up the weights
   if (this->blobs_.size() > 0) {
     LOG(INFO) << "Skipping parameter initialization";
@@ -56,7 +68,7 @@ void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
     shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>(
         this->layer_param_.convolution_param().weight_filler()));
     weight_filler->Fill(this->blobs_[0].get());
-    // If necessary, intiialize and fill the bias term
+    // If necessary, initialize and fill the bias term
     if (bias_term_) {
       this->blobs_[1].reset(new Blob<Dtype>(1, 1, 1, num_output_));
       shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>(
@@ -79,29 +91,31 @@ void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
 template <typename Dtype>
 Dtype ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top) {
-  const Dtype* bottom_data = bottom[0]->cpu_data();
-  Dtype* top_data = (*top)[0]->mutable_cpu_data();
-  Dtype* col_data = col_buffer_.mutable_cpu_data();
-  const Dtype* weight = this->blobs_[0]->cpu_data();
-  int weight_offset = M_ * K_;
-  int col_offset = K_ * N_;
-  int top_offset = M_ * N_;
-  for (int n = 0; n < num_; ++n) {
-    // First, im2col
-    im2col_cpu(bottom_data + bottom[0]->offset(n), channels_, height_,
-                      width_, kernel_size_, pad_, stride_, col_data);
-    // Second, innerproduct with groups
-    for (int g = 0; g < group_; ++g) {
-      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
-        (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
-        (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
-    }
-    // third, add bias
-    if (bias_term_) {
-      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
-          N_, 1, (Dtype)1., this->blobs_[1]->cpu_data(),
-          reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()),
-          (Dtype)1., top_data + (*top)[0]->offset(n));
+  for (int i = 0; i < bottom.size(); ++i) {
+    const Dtype* bottom_data = bottom[i]->cpu_data();
+    Dtype* top_data = (*top)[i]->mutable_cpu_data();
+    Dtype* col_data = col_buffer_.mutable_cpu_data();
+    const Dtype* weight = this->blobs_[0]->cpu_data();
+    int weight_offset = M_ * K_;
+    int col_offset = K_ * N_;
+    int top_offset = M_ * N_;
+    for (int n = 0; n < num_; ++n) {
+      // First, im2col
+      im2col_cpu(bottom_data + bottom[i]->offset(n), channels_, height_,
+                        width_, kernel_size_, pad_, stride_, col_data);
+      // Second, innerproduct with groups
+      for (int g = 0; g < group_; ++g) {
+        caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
+          (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
+          (Dtype)0., top_data + (*top)[i]->offset(n) + top_offset * g);
+      }
+      // third, add bias
+      if (bias_term_) {
+        caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
+            N_, 1, (Dtype)1., this->blobs_[1]->cpu_data(),
+            reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()),
+            (Dtype)1., top_data + (*top)[i]->offset(n));
+      }
     }
   }
   return Dtype(0.);
@@ -110,54 +124,57 @@ Dtype ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 template <typename Dtype>
 void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
-  const Dtype* top_diff = top[0]->cpu_diff();
   const Dtype* weight = this->blobs_[0]->cpu_data();
   Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff();
-  const Dtype* bottom_data = (*bottom)[0]->cpu_data();
-  Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
-  Dtype* col_data = col_buffer_.mutable_cpu_data();
-  Dtype* col_diff = col_buffer_.mutable_cpu_diff();
-  // bias gradient if necessary
+  memset(weight_diff, 0, sizeof(Dtype) * this->blobs_[0]->count());
   Dtype* bias_diff = NULL;
-
   if (bias_term_) {
     bias_diff = this->blobs_[1]->mutable_cpu_diff();
     memset(bias_diff, 0, sizeof(Dtype) * this->blobs_[1]->count());
-    for (int n = 0; n < num_; ++n) {
-      caffe_cpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
-          1., top_diff + top[0]->offset(n),
-          reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()), 1.,
-          bias_diff);
-    }
   }
+  const int weight_offset = M_ * K_;
+  const int col_offset = K_ * N_;
+  const int top_offset = M_ * N_;
+  for (int i = 0; i < top.size(); ++i) {
+    const Dtype* top_diff = top[i]->cpu_diff();
+    const Dtype* bottom_data = (*bottom)[i]->cpu_data();
+    Dtype* bottom_diff = (*bottom)[i]->mutable_cpu_diff();
+    Dtype* col_data = col_buffer_.mutable_cpu_data();
+    Dtype* col_diff = col_buffer_.mutable_cpu_diff();
 
-  int weight_offset = M_ * K_;
-  int col_offset = K_ * N_;
-  int top_offset = M_ * N_;
-  memset(weight_diff, 0, sizeof(Dtype) * this->blobs_[0]->count());
-  for (int n = 0; n < num_; ++n) {
-    // since we saved memory in the forward pass by not storing all col data,
-    // we will need to recompute them.
-    im2col_cpu(bottom_data + (*bottom)[0]->offset(n), channels_, height_,
-                      width_, kernel_size_, pad_, stride_, col_data);
-    // gradient w.r.t. weight. Note that we will accumulate diffs.
-    for (int g = 0; g < group_; ++g) {
-      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
-        (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
-        col_data + col_offset * g, (Dtype)1.,
-        weight_diff + weight_offset * g);
+    // Bias gradient, if necessary.
+    if (bias_term_) {
+      for (int n = 0; n < num_; ++n) {
+        caffe_cpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
+            1., top_diff + top[0]->offset(n),
+            static_cast<const Dtype*>(bias_multiplier_->cpu_data()), 1.,
+            bias_diff);
+      }
     }
-    // gradient w.r.t. bottom data, if necessary
-    if (propagate_down[0]) {
+    for (int n = 0; n < num_; ++n) {
+      // Since we saved memory in the forward pass by not storing all col data,
+      // we will need to recompute them.
+      im2col_cpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_,
+                 width_, kernel_size_, pad_, stride_, col_data);
+      // gradient w.r.t. weight. Note that we will accumulate diffs.
       for (int g = 0; g < group_; ++g) {
-        caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
-          (Dtype)1., weight + weight_offset * g,
-          top_diff + top[0]->offset(n) + top_offset * g,
-          (Dtype)0., col_diff + col_offset * g);
+        caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
+          (Dtype)1., top_diff + top[i]->offset(n) + top_offset * g,
+          col_data + col_offset * g, (Dtype)1.,
+          weight_diff + weight_offset * g);
+      }
+      // gradient w.r.t. bottom data, if necessary
+      if (propagate_down[i]) {
+        for (int g = 0; g < group_; ++g) {
+          caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
+            (Dtype)1., weight + weight_offset * g,
+            top_diff + top[i]->offset(n) + top_offset * g,
+            (Dtype)0., col_diff + col_offset * g);
+        }
+        // col2im back to the data
+        col2im_cpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
+            stride_, bottom_diff + (*bottom)[i]->offset(n));
       }
-      // col2im back to the data
-      col2im_cpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
-          stride_, bottom_diff + (*bottom)[0]->offset(n));
     }
   }
 }
diff --git a/src/caffe/layers/conv_layer.cu b/src/caffe/layers/conv_layer.cu
index 85f95fd..71b00c9 100644
--- a/src/caffe/layers/conv_layer.cu
+++ b/src/caffe/layers/conv_layer.cu
@@ -13,29 +13,31 @@ namespace caffe {
 template <typename Dtype>
 Dtype ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top) {
-  const Dtype* bottom_data = bottom[0]->gpu_data();
-  Dtype* top_data = (*top)[0]->mutable_gpu_data();
-  Dtype* col_data = col_buffer_.mutable_gpu_data();
-  const Dtype* weight = this->blobs_[0]->gpu_data();
-  int weight_offset = M_ * K_;
-  int col_offset = K_ * N_;
-  int top_offset = M_ * N_;
-  for (int n = 0; n < num_; ++n) {
-    // First, im2col
-    im2col_gpu(bottom_data + bottom[0]->offset(n), channels_, height_,
-                      width_, kernel_size_, pad_, stride_, col_data);
-    // Second, innerproduct with groups
-    for (int g = 0; g < group_; ++g) {
-      caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
-        (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
-        (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
-    }
-    // third, add bias
-    if (bias_term_) {
-      caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
-          N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(),
-          reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
-          (Dtype)1., top_data + (*top)[0]->offset(n));
+  for (int i = 0; i < bottom.size(); ++i) {
+    const Dtype* bottom_data = bottom[i]->gpu_data();
+    Dtype* top_data = (*top)[i]->mutable_gpu_data();
+    Dtype* col_data = col_buffer_.mutable_gpu_data();
+    const Dtype* weight = this->blobs_[0]->gpu_data();
+    int weight_offset = M_ * K_;
+    int col_offset = K_ * N_;
+    int top_offset = M_ * N_;
+    for (int n = 0; n < num_; ++n) {
+      // First, im2col
+      im2col_gpu(bottom_data + bottom[i]->offset(n), channels_, height_,
+                        width_, kernel_size_, pad_, stride_, col_data);
+      // Second, innerproduct with groups
+      for (int g = 0; g < group_; ++g) {
+        caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
+          (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
+          (Dtype)0., top_data + (*top)[i]->offset(n) + top_offset * g);
+      }
+      // third, add bias
+      if (bias_term_) {
+        caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
+            N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(),
+            reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
+            (Dtype)1., top_data + (*top)[i]->offset(n));
+      }
     }
   }
   return Dtype(0.);
@@ -44,56 +46,58 @@ Dtype ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
 template <typename Dtype>
 void ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
-  const Dtype* top_diff = top[0]->gpu_diff();
   const Dtype* weight = this->blobs_[0]->gpu_data();
   Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff();
-  const Dtype* bottom_data = (*bottom)[0]->gpu_data();
-  Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
+  CUDA_CHECK(cudaMemset(weight_diff, 0,
+      sizeof(Dtype) * this->blobs_[0]->count()));
   Dtype* col_data = col_buffer_.mutable_gpu_data();
   Dtype* col_diff = col_buffer_.mutable_gpu_diff();
-  // bias gradient if necessary
   Dtype* bias_diff = NULL;
-
   if (bias_term_) {
     bias_diff = this->blobs_[1]->mutable_gpu_diff();
     CUDA_CHECK(cudaMemset(bias_diff, 0,
         sizeof(Dtype) * this->blobs_[1]->count()));
-    for (int n = 0; n < num_; ++n) {
-      caffe_gpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
-          1., top_diff + top[0]->offset(n),
-          reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
-          1., bias_diff);
-    }
   }
-
-  int weight_offset = M_ * K_;
-  int col_offset = K_ * N_;
-  int top_offset = M_ * N_;
-  CUDA_CHECK(cudaMemset(weight_diff, 0,
-      sizeof(Dtype) * this->blobs_[0]->count()));
-  for (int n = 0; n < num_; ++n) {
-    // since we saved memory in the forward pass by not storing all col data,
-    // we will need to recompute them.
-    im2col_gpu(bottom_data + (*bottom)[0]->offset(n), channels_, height_,
-                      width_, kernel_size_, pad_, stride_, col_data);
-    // gradient w.r.t. weight. Note that we will accumulate diffs.
-    for (int g = 0; g < group_; ++g) {
-      caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
-        (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
-        col_data + col_offset * g, (Dtype)1.,
-        weight_diff + weight_offset * g);
+  const int weight_offset = M_ * K_;
+  const int col_offset = K_ * N_;
+  const int top_offset = M_ * N_;
+  for (int i = 0; i < top.size(); ++i) {
+    const Dtype* top_diff = top[i]->gpu_diff();
+    const Dtype* bottom_data = (*bottom)[i]->gpu_data();
+    Dtype* bottom_diff = (*bottom)[i]->mutable_gpu_diff();
+    // Bias gradient, if necessary.
+    if (bias_term_) {
+      for (int n = 0; n < num_; ++n) {
+        caffe_gpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
+            1., top_diff + top[0]->offset(n),
+            static_cast<const Dtype*>(bias_multiplier_->gpu_data()),
+            1., bias_diff);
+      }
     }
-    // gradient w.r.t. bottom data, if necessary
-    if (propagate_down[0]) {
+    for (int n = 0; n < num_; ++n) {
+      // since we saved memory in the forward pass by not storing all col data,
+      // we will need to recompute them.
+      im2col_gpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_,
+                        width_, kernel_size_, pad_, stride_, col_data);
+      // gradient w.r.t. weight. Note that we will accumulate diffs.
       for (int g = 0; g < group_; ++g) {
-        caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
-          (Dtype)1., weight + weight_offset * g,
-          top_diff + top[0]->offset(n) + top_offset * g,
-          (Dtype)0., col_diff + col_offset * g);
+        caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
+          (Dtype)1., top_diff + top[i]->offset(n) + top_offset * g,
+          col_data + col_offset * g, (Dtype)1.,
+          weight_diff + weight_offset * g);
+      }
+      // gradient w.r.t. bottom data, if necessary
+      if (propagate_down[i]) {
+        for (int g = 0; g < group_; ++g) {
+          caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
+            (Dtype)1., weight + weight_offset * g,
+            top_diff + top[i]->offset(n) + top_offset * g,
+            (Dtype)0., col_diff + col_offset * g);
+        }
+        // col2im back to the data
+        col2im_gpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
+            stride_, bottom_diff + (*bottom)[i]->offset(n));
       }
-      // col2im back to the data
-      col2im_gpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
-          stride_, bottom_diff + (*bottom)[0]->offset(n));
     }
   }
 }
diff --git a/src/caffe/test/test_convolution_layer.cpp b/src/caffe/test/test_convolution_layer.cpp
index b08486e..f740101 100644
--- a/src/caffe/test/test_convolution_layer.cpp
+++ b/src/caffe/test/test_convolution_layer.cpp
@@ -21,22 +21,32 @@ template <typename Dtype>
 class ConvolutionLayerTest : public ::testing::Test {
  protected:
   ConvolutionLayerTest()
-      : blob_bottom_(new Blob<Dtype>()),
-        blob_top_(new Blob<Dtype>()) {}
+      : blob_bottom_(new Blob<Dtype>(2, 3, 6, 4)),
+        blob_bottom_2_(new Blob<Dtype>(2, 3, 6, 4)),
+        blob_top_(new Blob<Dtype>()),
+        blob_top_2_(new Blob<Dtype>()) {}
   virtual void SetUp() {
-    blob_bottom_->Reshape(2, 3, 6, 4);
     // fill the values
     FillerParameter filler_param;
     filler_param.set_value(1.);
     GaussianFiller<Dtype> filler(filler_param);
     filler.Fill(this->blob_bottom_);
+    filler.Fill(this->blob_bottom_2_);
     blob_bottom_vec_.push_back(blob_bottom_);
     blob_top_vec_.push_back(blob_top_);
   }
 
-  virtual ~ConvolutionLayerTest() { delete blob_bottom_; delete blob_top_; }
+  virtual ~ConvolutionLayerTest() {
+    delete blob_bottom_;
+    delete blob_bottom_2_;
+    delete blob_top_;
+    delete blob_top_2_;
+  }
+
   Blob<Dtype>* const blob_bottom_;
+  Blob<Dtype>* const blob_bottom_2_;
   Blob<Dtype>* const blob_top_;
+  Blob<Dtype>* const blob_top_2_;
   vector<Blob<Dtype>*> blob_bottom_vec_;
   vector<Blob<Dtype>*> blob_top_vec_;
 };
@@ -51,6 +61,8 @@ TYPED_TEST(ConvolutionLayerTest, TestSetup) {
   convolution_param->set_kernel_size(3);
   convolution_param->set_stride(2);
   convolution_param->set_num_output(4);
+  this->blob_bottom_vec_.push_back(this->blob_bottom_2_);
+  this->blob_top_vec_.push_back(this->blob_top_2_);
   shared_ptr<Layer<TypeParam> > layer(
       new ConvolutionLayer<TypeParam>(layer_param));
   layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
@@ -58,6 +70,10 @@ TYPED_TEST(ConvolutionLayerTest, TestSetup) {
   EXPECT_EQ(this->blob_top_->channels(), 4);
   EXPECT_EQ(this->blob_top_->height(), 2);
   EXPECT_EQ(this->blob_top_->width(), 1);
+  EXPECT_EQ(this->blob_top_2_->num(), 2);
+  EXPECT_EQ(this->blob_top_2_->channels(), 4);
+  EXPECT_EQ(this->blob_top_2_->height(), 2);
+  EXPECT_EQ(this->blob_top_2_->width(), 1);
   // setting group should not change the shape
   convolution_param->set_num_output(3);
   convolution_param->set_group(3);
@@ -67,14 +83,24 @@ TYPED_TEST(ConvolutionLayerTest, TestSetup) {
   EXPECT_EQ(this->blob_top_->channels(), 3);
   EXPECT_EQ(this->blob_top_->height(), 2);
   EXPECT_EQ(this->blob_top_->width(), 1);
+  EXPECT_EQ(this->blob_top_2_->num(), 2);
+  EXPECT_EQ(this->blob_top_2_->channels(), 3);
+  EXPECT_EQ(this->blob_top_2_->height(), 2);
+  EXPECT_EQ(this->blob_top_2_->width(), 1);
 }
 
 TYPED_TEST(ConvolutionLayerTest, TestCPUSimpleConvolution) {
   // We will simply see if the convolution layer carries out averaging well.
+  shared_ptr<ConstantFiller<TypeParam> > filler;
   FillerParameter filler_param;
   filler_param.set_value(1.);
-  ConstantFiller<TypeParam> filler(filler_param);
-  filler.Fill(this->blob_bottom_);
+  filler.reset(new ConstantFiller<TypeParam>(filler_param));
+  filler->Fill(this->blob_bottom_);
+  filler_param.set_value(2.);
+  filler.reset(new ConstantFiller<TypeParam>(filler_param));
+  filler->Fill(this->blob_bottom_2_);
+  this->blob_bottom_vec_.push_back(this->blob_bottom_2_);
+  this->blob_top_vec_.push_back(this->blob_top_2_);
   LayerParameter layer_param;
   ConvolutionParameter* convolution_param =
       layer_param.mutable_convolution_param();
@@ -95,14 +121,24 @@ TYPED_TEST(ConvolutionLayerTest, TestCPUSimpleConvolution) {
   for (int i = 0; i < this->blob_top_->count(); ++i) {
     EXPECT_NEAR(top_data[i], 27.1, 1e-4);
   }
+  top_data = this->blob_top_2_->cpu_data();
+  for (int i = 0; i < this->blob_top_2_->count(); ++i) {
+    EXPECT_NEAR(top_data[i], 54.1, 1e-4);
+  }
 }
 
 TYPED_TEST(ConvolutionLayerTest, TestGPUSimpleConvolution) {
   // We will simply see if the convolution layer carries out averaging well.
+  shared_ptr<ConstantFiller<TypeParam> > filler;
   FillerParameter filler_param;
   filler_param.set_value(1.);
-  ConstantFiller<TypeParam> filler(filler_param);
-  filler.Fill(this->blob_bottom_);
+  filler.reset(new ConstantFiller<TypeParam>(filler_param));
+  filler->Fill(this->blob_bottom_);
+  filler_param.set_value(2.);
+  filler.reset(new ConstantFiller<TypeParam>(filler_param));
+  filler->Fill(this->blob_bottom_2_);
+  this->blob_bottom_vec_.push_back(this->blob_bottom_2_);
+  this->blob_top_vec_.push_back(this->blob_top_2_);
   LayerParameter layer_param;
   ConvolutionParameter* convolution_param =
       layer_param.mutable_convolution_param();
@@ -123,6 +159,10 @@ TYPED_TEST(ConvolutionLayerTest, TestGPUSimpleConvolution) {
   for (int i = 0; i < this->blob_top_->count(); ++i) {
     EXPECT_NEAR(top_data[i], 27.1, 1e-4);
   }
+  top_data = this->blob_top_2_->cpu_data();
+  for (int i = 0; i < this->blob_top_2_->count(); ++i) {
+    EXPECT_NEAR(top_data[i], 54.1, 1e-4);
+  }
 }
 
 TYPED_TEST(ConvolutionLayerTest, TestCPUSimpleConvolutionGroup) {
@@ -223,6 +263,8 @@ TYPED_TEST(ConvolutionLayerTest, TestCPUGradient) {
   LayerParameter layer_param;
   ConvolutionParameter* convolution_param =
       layer_param.mutable_convolution_param();
+  this->blob_bottom_vec_.push_back(this->blob_bottom_2_);
+  this->blob_top_vec_.push_back(this->blob_top_2_);
   convolution_param->set_kernel_size(3);
   convolution_param->set_stride(2);
   convolution_param->set_num_output(2);
@@ -256,6 +298,8 @@ TYPED_TEST(ConvolutionLayerTest, TestGPUGradient) {
   LayerParameter layer_param;
   ConvolutionParameter* convolution_param =
       layer_param.mutable_convolution_param();
+  this->blob_bottom_vec_.push_back(this->blob_bottom_2_);
+  this->blob_top_vec_.push_back(this->blob_top_2_);
   convolution_param->set_kernel_size(3);
   convolution_param->set_stride(2);
   convolution_param->set_num_output(2);