From: Jeff Donahue Date: Thu, 3 Jul 2014 22:33:12 +0000 (-0700) Subject: ConvolutionLayer can take N bottom blobs and N top blobs X-Git-Tag: submit/tizen/20180823.020014~653^2~88^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a2b287472d7d85f997d2621bc4a2486c3837f6ba;p=platform%2Fupstream%2Fcaffeonacl.git ConvolutionLayer can take N bottom blobs and N top blobs --- diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp index 9ec8da4..67913bf 100644 --- a/src/caffe/layers/conv_layer.cpp +++ b/src/caffe/layers/conv_layer.cpp @@ -22,6 +22,16 @@ void ConvolutionLayer::SetUp(const vector*>& bottom, channels_ = bottom[0]->channels(); height_ = bottom[0]->height(); width_ = bottom[0]->width(); + // TODO: generalize to handle inputs of different shapes. + for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) { + CHECK_EQ(num_, bottom[bottom_id]->num()) << "Inputs must have same num."; + CHECK_EQ(channels_, bottom[bottom_id]->channels()) + << "Inputs must have same channels."; + CHECK_EQ(height_, bottom[bottom_id]->height()) + << "Inputs must have same height."; + CHECK_EQ(width_, bottom[bottom_id]->width()) + << "Inputs must have same width."; + } num_output_ = this->layer_param_.convolution_param().num_output(); CHECK_GT(num_output_, 0); CHECK_EQ(channels_ % group_, 0); @@ -39,7 +49,9 @@ void ConvolutionLayer::SetUp(const vector*>& bottom, M_ = num_output_ / group_; K_ = channels_ * kernel_size_ * kernel_size_ / group_; N_ = height_out * width_out; - (*top)[0]->Reshape(bottom[0]->num(), num_output_, height_out, width_out); + for (int top_id = 0; top_id < top->size(); ++top_id) { + (*top)[top_id]->Reshape(num_, num_output_, height_out, width_out); + } // Check if we need to set up the weights if (this->blobs_.size() > 0) { LOG(INFO) << "Skipping parameter initialization"; @@ -56,7 +68,7 @@ void ConvolutionLayer::SetUp(const vector*>& bottom, shared_ptr > weight_filler(GetFiller( this->layer_param_.convolution_param().weight_filler())); weight_filler->Fill(this->blobs_[0].get()); - // If necessary, intiialize and fill the bias term + // If necessary, initialize and fill the bias term if (bias_term_) { this->blobs_[1].reset(new Blob(1, 1, 1, num_output_)); shared_ptr > bias_filler(GetFiller( @@ -79,29 +91,31 @@ void ConvolutionLayer::SetUp(const vector*>& bottom, template Dtype ConvolutionLayer::Forward_cpu(const vector*>& bottom, vector*>* top) { - const Dtype* bottom_data = bottom[0]->cpu_data(); - Dtype* top_data = (*top)[0]->mutable_cpu_data(); - Dtype* col_data = col_buffer_.mutable_cpu_data(); - const Dtype* weight = this->blobs_[0]->cpu_data(); - int weight_offset = M_ * K_; - int col_offset = K_ * N_; - int top_offset = M_ * N_; - for (int n = 0; n < num_; ++n) { - // First, im2col - im2col_cpu(bottom_data + bottom[0]->offset(n), channels_, height_, - width_, kernel_size_, pad_, stride_, col_data); - // Second, innerproduct with groups - for (int g = 0; g < group_; ++g) { - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, M_, N_, K_, - (Dtype)1., weight + weight_offset * g, col_data + col_offset * g, - (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g); - } - // third, add bias - if (bias_term_) { - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num_output_, - N_, 1, (Dtype)1., this->blobs_[1]->cpu_data(), - reinterpret_cast(bias_multiplier_->cpu_data()), - (Dtype)1., top_data + (*top)[0]->offset(n)); + for (int i = 0; i < bottom.size(); ++i) { + const Dtype* bottom_data = bottom[i]->cpu_data(); + Dtype* top_data = (*top)[i]->mutable_cpu_data(); + Dtype* col_data = col_buffer_.mutable_cpu_data(); + const Dtype* weight = this->blobs_[0]->cpu_data(); + int weight_offset = M_ * K_; + int col_offset = K_ * N_; + int top_offset = M_ * N_; + for (int n = 0; n < num_; ++n) { + // First, im2col + im2col_cpu(bottom_data + bottom[i]->offset(n), channels_, height_, + width_, kernel_size_, pad_, stride_, col_data); + // Second, innerproduct with groups + for (int g = 0; g < group_; ++g) { + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, M_, N_, K_, + (Dtype)1., weight + weight_offset * g, col_data + col_offset * g, + (Dtype)0., top_data + (*top)[i]->offset(n) + top_offset * g); + } + // third, add bias + if (bias_term_) { + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num_output_, + N_, 1, (Dtype)1., this->blobs_[1]->cpu_data(), + reinterpret_cast(bias_multiplier_->cpu_data()), + (Dtype)1., top_data + (*top)[i]->offset(n)); + } } } return Dtype(0.); @@ -110,54 +124,57 @@ Dtype ConvolutionLayer::Forward_cpu(const vector*>& bottom, template void ConvolutionLayer::Backward_cpu(const vector*>& top, const vector& propagate_down, vector*>* bottom) { - const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* weight = this->blobs_[0]->cpu_data(); Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff(); - const Dtype* bottom_data = (*bottom)[0]->cpu_data(); - Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); - Dtype* col_data = col_buffer_.mutable_cpu_data(); - Dtype* col_diff = col_buffer_.mutable_cpu_diff(); - // bias gradient if necessary + memset(weight_diff, 0, sizeof(Dtype) * this->blobs_[0]->count()); Dtype* bias_diff = NULL; - if (bias_term_) { bias_diff = this->blobs_[1]->mutable_cpu_diff(); memset(bias_diff, 0, sizeof(Dtype) * this->blobs_[1]->count()); - for (int n = 0; n < num_; ++n) { - caffe_cpu_gemv(CblasNoTrans, num_output_, N_, - 1., top_diff + top[0]->offset(n), - reinterpret_cast(bias_multiplier_->cpu_data()), 1., - bias_diff); - } } + const int weight_offset = M_ * K_; + const int col_offset = K_ * N_; + const int top_offset = M_ * N_; + for (int i = 0; i < top.size(); ++i) { + const Dtype* top_diff = top[i]->cpu_diff(); + const Dtype* bottom_data = (*bottom)[i]->cpu_data(); + Dtype* bottom_diff = (*bottom)[i]->mutable_cpu_diff(); + Dtype* col_data = col_buffer_.mutable_cpu_data(); + Dtype* col_diff = col_buffer_.mutable_cpu_diff(); - int weight_offset = M_ * K_; - int col_offset = K_ * N_; - int top_offset = M_ * N_; - memset(weight_diff, 0, sizeof(Dtype) * this->blobs_[0]->count()); - for (int n = 0; n < num_; ++n) { - // since we saved memory in the forward pass by not storing all col data, - // we will need to recompute them. - im2col_cpu(bottom_data + (*bottom)[0]->offset(n), channels_, height_, - width_, kernel_size_, pad_, stride_, col_data); - // gradient w.r.t. weight. Note that we will accumulate diffs. - for (int g = 0; g < group_; ++g) { - caffe_cpu_gemm(CblasNoTrans, CblasTrans, M_, K_, N_, - (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g, - col_data + col_offset * g, (Dtype)1., - weight_diff + weight_offset * g); + // Bias gradient, if necessary. + if (bias_term_) { + for (int n = 0; n < num_; ++n) { + caffe_cpu_gemv(CblasNoTrans, num_output_, N_, + 1., top_diff + top[0]->offset(n), + static_cast(bias_multiplier_->cpu_data()), 1., + bias_diff); + } } - // gradient w.r.t. bottom data, if necessary - if (propagate_down[0]) { + for (int n = 0; n < num_; ++n) { + // Since we saved memory in the forward pass by not storing all col data, + // we will need to recompute them. + im2col_cpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_, + width_, kernel_size_, pad_, stride_, col_data); + // gradient w.r.t. weight. Note that we will accumulate diffs. for (int g = 0; g < group_; ++g) { - caffe_cpu_gemm(CblasTrans, CblasNoTrans, K_, N_, M_, - (Dtype)1., weight + weight_offset * g, - top_diff + top[0]->offset(n) + top_offset * g, - (Dtype)0., col_diff + col_offset * g); + caffe_cpu_gemm(CblasNoTrans, CblasTrans, M_, K_, N_, + (Dtype)1., top_diff + top[i]->offset(n) + top_offset * g, + col_data + col_offset * g, (Dtype)1., + weight_diff + weight_offset * g); + } + // gradient w.r.t. bottom data, if necessary + if (propagate_down[i]) { + for (int g = 0; g < group_; ++g) { + caffe_cpu_gemm(CblasTrans, CblasNoTrans, K_, N_, M_, + (Dtype)1., weight + weight_offset * g, + top_diff + top[i]->offset(n) + top_offset * g, + (Dtype)0., col_diff + col_offset * g); + } + // col2im back to the data + col2im_cpu(col_diff, channels_, height_, width_, kernel_size_, pad_, + stride_, bottom_diff + (*bottom)[i]->offset(n)); } - // col2im back to the data - col2im_cpu(col_diff, channels_, height_, width_, kernel_size_, pad_, - stride_, bottom_diff + (*bottom)[0]->offset(n)); } } } diff --git a/src/caffe/layers/conv_layer.cu b/src/caffe/layers/conv_layer.cu index 85f95fd..71b00c9 100644 --- a/src/caffe/layers/conv_layer.cu +++ b/src/caffe/layers/conv_layer.cu @@ -13,29 +13,31 @@ namespace caffe { template Dtype ConvolutionLayer::Forward_gpu(const vector*>& bottom, vector*>* top) { - const Dtype* bottom_data = bottom[0]->gpu_data(); - Dtype* top_data = (*top)[0]->mutable_gpu_data(); - Dtype* col_data = col_buffer_.mutable_gpu_data(); - const Dtype* weight = this->blobs_[0]->gpu_data(); - int weight_offset = M_ * K_; - int col_offset = K_ * N_; - int top_offset = M_ * N_; - for (int n = 0; n < num_; ++n) { - // First, im2col - im2col_gpu(bottom_data + bottom[0]->offset(n), channels_, height_, - width_, kernel_size_, pad_, stride_, col_data); - // Second, innerproduct with groups - for (int g = 0; g < group_; ++g) { - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, M_, N_, K_, - (Dtype)1., weight + weight_offset * g, col_data + col_offset * g, - (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g); - } - // third, add bias - if (bias_term_) { - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num_output_, - N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(), - reinterpret_cast(bias_multiplier_->gpu_data()), - (Dtype)1., top_data + (*top)[0]->offset(n)); + for (int i = 0; i < bottom.size(); ++i) { + const Dtype* bottom_data = bottom[i]->gpu_data(); + Dtype* top_data = (*top)[i]->mutable_gpu_data(); + Dtype* col_data = col_buffer_.mutable_gpu_data(); + const Dtype* weight = this->blobs_[0]->gpu_data(); + int weight_offset = M_ * K_; + int col_offset = K_ * N_; + int top_offset = M_ * N_; + for (int n = 0; n < num_; ++n) { + // First, im2col + im2col_gpu(bottom_data + bottom[i]->offset(n), channels_, height_, + width_, kernel_size_, pad_, stride_, col_data); + // Second, innerproduct with groups + for (int g = 0; g < group_; ++g) { + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, M_, N_, K_, + (Dtype)1., weight + weight_offset * g, col_data + col_offset * g, + (Dtype)0., top_data + (*top)[i]->offset(n) + top_offset * g); + } + // third, add bias + if (bias_term_) { + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num_output_, + N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(), + reinterpret_cast(bias_multiplier_->gpu_data()), + (Dtype)1., top_data + (*top)[i]->offset(n)); + } } } return Dtype(0.); @@ -44,56 +46,58 @@ Dtype ConvolutionLayer::Forward_gpu(const vector*>& bottom, template void ConvolutionLayer::Backward_gpu(const vector*>& top, const vector& propagate_down, vector*>* bottom) { - const Dtype* top_diff = top[0]->gpu_diff(); const Dtype* weight = this->blobs_[0]->gpu_data(); Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff(); - const Dtype* bottom_data = (*bottom)[0]->gpu_data(); - Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff(); + CUDA_CHECK(cudaMemset(weight_diff, 0, + sizeof(Dtype) * this->blobs_[0]->count())); Dtype* col_data = col_buffer_.mutable_gpu_data(); Dtype* col_diff = col_buffer_.mutable_gpu_diff(); - // bias gradient if necessary Dtype* bias_diff = NULL; - if (bias_term_) { bias_diff = this->blobs_[1]->mutable_gpu_diff(); CUDA_CHECK(cudaMemset(bias_diff, 0, sizeof(Dtype) * this->blobs_[1]->count())); - for (int n = 0; n < num_; ++n) { - caffe_gpu_gemv(CblasNoTrans, num_output_, N_, - 1., top_diff + top[0]->offset(n), - reinterpret_cast(bias_multiplier_->gpu_data()), - 1., bias_diff); - } } - - int weight_offset = M_ * K_; - int col_offset = K_ * N_; - int top_offset = M_ * N_; - CUDA_CHECK(cudaMemset(weight_diff, 0, - sizeof(Dtype) * this->blobs_[0]->count())); - for (int n = 0; n < num_; ++n) { - // since we saved memory in the forward pass by not storing all col data, - // we will need to recompute them. - im2col_gpu(bottom_data + (*bottom)[0]->offset(n), channels_, height_, - width_, kernel_size_, pad_, stride_, col_data); - // gradient w.r.t. weight. Note that we will accumulate diffs. - for (int g = 0; g < group_; ++g) { - caffe_gpu_gemm(CblasNoTrans, CblasTrans, M_, K_, N_, - (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g, - col_data + col_offset * g, (Dtype)1., - weight_diff + weight_offset * g); + const int weight_offset = M_ * K_; + const int col_offset = K_ * N_; + const int top_offset = M_ * N_; + for (int i = 0; i < top.size(); ++i) { + const Dtype* top_diff = top[i]->gpu_diff(); + const Dtype* bottom_data = (*bottom)[i]->gpu_data(); + Dtype* bottom_diff = (*bottom)[i]->mutable_gpu_diff(); + // Bias gradient, if necessary. + if (bias_term_) { + for (int n = 0; n < num_; ++n) { + caffe_gpu_gemv(CblasNoTrans, num_output_, N_, + 1., top_diff + top[0]->offset(n), + static_cast(bias_multiplier_->gpu_data()), + 1., bias_diff); + } } - // gradient w.r.t. bottom data, if necessary - if (propagate_down[0]) { + for (int n = 0; n < num_; ++n) { + // since we saved memory in the forward pass by not storing all col data, + // we will need to recompute them. + im2col_gpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_, + width_, kernel_size_, pad_, stride_, col_data); + // gradient w.r.t. weight. Note that we will accumulate diffs. for (int g = 0; g < group_; ++g) { - caffe_gpu_gemm(CblasTrans, CblasNoTrans, K_, N_, M_, - (Dtype)1., weight + weight_offset * g, - top_diff + top[0]->offset(n) + top_offset * g, - (Dtype)0., col_diff + col_offset * g); + caffe_gpu_gemm(CblasNoTrans, CblasTrans, M_, K_, N_, + (Dtype)1., top_diff + top[i]->offset(n) + top_offset * g, + col_data + col_offset * g, (Dtype)1., + weight_diff + weight_offset * g); + } + // gradient w.r.t. bottom data, if necessary + if (propagate_down[i]) { + for (int g = 0; g < group_; ++g) { + caffe_gpu_gemm(CblasTrans, CblasNoTrans, K_, N_, M_, + (Dtype)1., weight + weight_offset * g, + top_diff + top[i]->offset(n) + top_offset * g, + (Dtype)0., col_diff + col_offset * g); + } + // col2im back to the data + col2im_gpu(col_diff, channels_, height_, width_, kernel_size_, pad_, + stride_, bottom_diff + (*bottom)[i]->offset(n)); } - // col2im back to the data - col2im_gpu(col_diff, channels_, height_, width_, kernel_size_, pad_, - stride_, bottom_diff + (*bottom)[0]->offset(n)); } } } diff --git a/src/caffe/test/test_convolution_layer.cpp b/src/caffe/test/test_convolution_layer.cpp index b08486e..f740101 100644 --- a/src/caffe/test/test_convolution_layer.cpp +++ b/src/caffe/test/test_convolution_layer.cpp @@ -21,22 +21,32 @@ template class ConvolutionLayerTest : public ::testing::Test { protected: ConvolutionLayerTest() - : blob_bottom_(new Blob()), - blob_top_(new Blob()) {} + : blob_bottom_(new Blob(2, 3, 6, 4)), + blob_bottom_2_(new Blob(2, 3, 6, 4)), + blob_top_(new Blob()), + blob_top_2_(new Blob()) {} virtual void SetUp() { - blob_bottom_->Reshape(2, 3, 6, 4); // fill the values FillerParameter filler_param; filler_param.set_value(1.); GaussianFiller filler(filler_param); filler.Fill(this->blob_bottom_); + filler.Fill(this->blob_bottom_2_); blob_bottom_vec_.push_back(blob_bottom_); blob_top_vec_.push_back(blob_top_); } - virtual ~ConvolutionLayerTest() { delete blob_bottom_; delete blob_top_; } + virtual ~ConvolutionLayerTest() { + delete blob_bottom_; + delete blob_bottom_2_; + delete blob_top_; + delete blob_top_2_; + } + Blob* const blob_bottom_; + Blob* const blob_bottom_2_; Blob* const blob_top_; + Blob* const blob_top_2_; vector*> blob_bottom_vec_; vector*> blob_top_vec_; }; @@ -51,6 +61,8 @@ TYPED_TEST(ConvolutionLayerTest, TestSetup) { convolution_param->set_kernel_size(3); convolution_param->set_stride(2); convolution_param->set_num_output(4); + this->blob_bottom_vec_.push_back(this->blob_bottom_2_); + this->blob_top_vec_.push_back(this->blob_top_2_); shared_ptr > layer( new ConvolutionLayer(layer_param)); layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_)); @@ -58,6 +70,10 @@ TYPED_TEST(ConvolutionLayerTest, TestSetup) { EXPECT_EQ(this->blob_top_->channels(), 4); EXPECT_EQ(this->blob_top_->height(), 2); EXPECT_EQ(this->blob_top_->width(), 1); + EXPECT_EQ(this->blob_top_2_->num(), 2); + EXPECT_EQ(this->blob_top_2_->channels(), 4); + EXPECT_EQ(this->blob_top_2_->height(), 2); + EXPECT_EQ(this->blob_top_2_->width(), 1); // setting group should not change the shape convolution_param->set_num_output(3); convolution_param->set_group(3); @@ -67,14 +83,24 @@ TYPED_TEST(ConvolutionLayerTest, TestSetup) { EXPECT_EQ(this->blob_top_->channels(), 3); EXPECT_EQ(this->blob_top_->height(), 2); EXPECT_EQ(this->blob_top_->width(), 1); + EXPECT_EQ(this->blob_top_2_->num(), 2); + EXPECT_EQ(this->blob_top_2_->channels(), 3); + EXPECT_EQ(this->blob_top_2_->height(), 2); + EXPECT_EQ(this->blob_top_2_->width(), 1); } TYPED_TEST(ConvolutionLayerTest, TestCPUSimpleConvolution) { // We will simply see if the convolution layer carries out averaging well. + shared_ptr > filler; FillerParameter filler_param; filler_param.set_value(1.); - ConstantFiller filler(filler_param); - filler.Fill(this->blob_bottom_); + filler.reset(new ConstantFiller(filler_param)); + filler->Fill(this->blob_bottom_); + filler_param.set_value(2.); + filler.reset(new ConstantFiller(filler_param)); + filler->Fill(this->blob_bottom_2_); + this->blob_bottom_vec_.push_back(this->blob_bottom_2_); + this->blob_top_vec_.push_back(this->blob_top_2_); LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); @@ -95,14 +121,24 @@ TYPED_TEST(ConvolutionLayerTest, TestCPUSimpleConvolution) { for (int i = 0; i < this->blob_top_->count(); ++i) { EXPECT_NEAR(top_data[i], 27.1, 1e-4); } + top_data = this->blob_top_2_->cpu_data(); + for (int i = 0; i < this->blob_top_2_->count(); ++i) { + EXPECT_NEAR(top_data[i], 54.1, 1e-4); + } } TYPED_TEST(ConvolutionLayerTest, TestGPUSimpleConvolution) { // We will simply see if the convolution layer carries out averaging well. + shared_ptr > filler; FillerParameter filler_param; filler_param.set_value(1.); - ConstantFiller filler(filler_param); - filler.Fill(this->blob_bottom_); + filler.reset(new ConstantFiller(filler_param)); + filler->Fill(this->blob_bottom_); + filler_param.set_value(2.); + filler.reset(new ConstantFiller(filler_param)); + filler->Fill(this->blob_bottom_2_); + this->blob_bottom_vec_.push_back(this->blob_bottom_2_); + this->blob_top_vec_.push_back(this->blob_top_2_); LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); @@ -123,6 +159,10 @@ TYPED_TEST(ConvolutionLayerTest, TestGPUSimpleConvolution) { for (int i = 0; i < this->blob_top_->count(); ++i) { EXPECT_NEAR(top_data[i], 27.1, 1e-4); } + top_data = this->blob_top_2_->cpu_data(); + for (int i = 0; i < this->blob_top_2_->count(); ++i) { + EXPECT_NEAR(top_data[i], 54.1, 1e-4); + } } TYPED_TEST(ConvolutionLayerTest, TestCPUSimpleConvolutionGroup) { @@ -223,6 +263,8 @@ TYPED_TEST(ConvolutionLayerTest, TestCPUGradient) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); + this->blob_bottom_vec_.push_back(this->blob_bottom_2_); + this->blob_top_vec_.push_back(this->blob_top_2_); convolution_param->set_kernel_size(3); convolution_param->set_stride(2); convolution_param->set_num_output(2); @@ -256,6 +298,8 @@ TYPED_TEST(ConvolutionLayerTest, TestGPUGradient) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); + this->blob_bottom_vec_.push_back(this->blob_bottom_2_); + this->blob_top_vec_.push_back(this->blob_top_2_); convolution_param->set_kernel_size(3); convolution_param->set_stride(2); convolution_param->set_num_output(2);