channels_ = bottom[0]->channels();
height_ = bottom[0]->height();
width_ = bottom[0]->width();
+ // TODO: generalize to handle inputs of different shapes.
+ for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) {
+ CHECK_EQ(num_, bottom[bottom_id]->num()) << "Inputs must have same num.";
+ CHECK_EQ(channels_, bottom[bottom_id]->channels())
+ << "Inputs must have same channels.";
+ CHECK_EQ(height_, bottom[bottom_id]->height())
+ << "Inputs must have same height.";
+ CHECK_EQ(width_, bottom[bottom_id]->width())
+ << "Inputs must have same width.";
+ }
num_output_ = this->layer_param_.convolution_param().num_output();
CHECK_GT(num_output_, 0);
CHECK_EQ(channels_ % group_, 0);
M_ = num_output_ / group_;
K_ = channels_ * kernel_size_ * kernel_size_ / group_;
N_ = height_out * width_out;
- (*top)[0]->Reshape(bottom[0]->num(), num_output_, height_out, width_out);
+ for (int top_id = 0; top_id < top->size(); ++top_id) {
+ (*top)[top_id]->Reshape(num_, num_output_, height_out, width_out);
+ }
// Check if we need to set up the weights
if (this->blobs_.size() > 0) {
LOG(INFO) << "Skipping parameter initialization";
shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>(
this->layer_param_.convolution_param().weight_filler()));
weight_filler->Fill(this->blobs_[0].get());
- // If necessary, intiialize and fill the bias term
+ // If necessary, initialize and fill the bias term
if (bias_term_) {
this->blobs_[1].reset(new Blob<Dtype>(1, 1, 1, num_output_));
shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>(
template <typename Dtype>
Dtype ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
- const Dtype* bottom_data = bottom[0]->cpu_data();
- Dtype* top_data = (*top)[0]->mutable_cpu_data();
- Dtype* col_data = col_buffer_.mutable_cpu_data();
- const Dtype* weight = this->blobs_[0]->cpu_data();
- int weight_offset = M_ * K_;
- int col_offset = K_ * N_;
- int top_offset = M_ * N_;
- for (int n = 0; n < num_; ++n) {
- // First, im2col
- im2col_cpu(bottom_data + bottom[0]->offset(n), channels_, height_,
- width_, kernel_size_, pad_, stride_, col_data);
- // Second, innerproduct with groups
- for (int g = 0; g < group_; ++g) {
- caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
- (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
- (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
- }
- // third, add bias
- if (bias_term_) {
- caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
- N_, 1, (Dtype)1., this->blobs_[1]->cpu_data(),
- reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()),
- (Dtype)1., top_data + (*top)[0]->offset(n));
+ for (int i = 0; i < bottom.size(); ++i) {
+ const Dtype* bottom_data = bottom[i]->cpu_data();
+ Dtype* top_data = (*top)[i]->mutable_cpu_data();
+ Dtype* col_data = col_buffer_.mutable_cpu_data();
+ const Dtype* weight = this->blobs_[0]->cpu_data();
+ int weight_offset = M_ * K_;
+ int col_offset = K_ * N_;
+ int top_offset = M_ * N_;
+ for (int n = 0; n < num_; ++n) {
+ // First, im2col
+ im2col_cpu(bottom_data + bottom[i]->offset(n), channels_, height_,
+ width_, kernel_size_, pad_, stride_, col_data);
+ // Second, innerproduct with groups
+ for (int g = 0; g < group_; ++g) {
+ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
+ (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
+ (Dtype)0., top_data + (*top)[i]->offset(n) + top_offset * g);
+ }
+ // third, add bias
+ if (bias_term_) {
+ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
+ N_, 1, (Dtype)1., this->blobs_[1]->cpu_data(),
+ reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()),
+ (Dtype)1., top_data + (*top)[i]->offset(n));
+ }
}
}
return Dtype(0.);
template <typename Dtype>
void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
- const Dtype* top_diff = top[0]->cpu_diff();
const Dtype* weight = this->blobs_[0]->cpu_data();
Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff();
- const Dtype* bottom_data = (*bottom)[0]->cpu_data();
- Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
- Dtype* col_data = col_buffer_.mutable_cpu_data();
- Dtype* col_diff = col_buffer_.mutable_cpu_diff();
- // bias gradient if necessary
+ memset(weight_diff, 0, sizeof(Dtype) * this->blobs_[0]->count());
Dtype* bias_diff = NULL;
-
if (bias_term_) {
bias_diff = this->blobs_[1]->mutable_cpu_diff();
memset(bias_diff, 0, sizeof(Dtype) * this->blobs_[1]->count());
- for (int n = 0; n < num_; ++n) {
- caffe_cpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
- 1., top_diff + top[0]->offset(n),
- reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()), 1.,
- bias_diff);
- }
}
+ const int weight_offset = M_ * K_;
+ const int col_offset = K_ * N_;
+ const int top_offset = M_ * N_;
+ for (int i = 0; i < top.size(); ++i) {
+ const Dtype* top_diff = top[i]->cpu_diff();
+ const Dtype* bottom_data = (*bottom)[i]->cpu_data();
+ Dtype* bottom_diff = (*bottom)[i]->mutable_cpu_diff();
+ Dtype* col_data = col_buffer_.mutable_cpu_data();
+ Dtype* col_diff = col_buffer_.mutable_cpu_diff();
- int weight_offset = M_ * K_;
- int col_offset = K_ * N_;
- int top_offset = M_ * N_;
- memset(weight_diff, 0, sizeof(Dtype) * this->blobs_[0]->count());
- for (int n = 0; n < num_; ++n) {
- // since we saved memory in the forward pass by not storing all col data,
- // we will need to recompute them.
- im2col_cpu(bottom_data + (*bottom)[0]->offset(n), channels_, height_,
- width_, kernel_size_, pad_, stride_, col_data);
- // gradient w.r.t. weight. Note that we will accumulate diffs.
- for (int g = 0; g < group_; ++g) {
- caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
- (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
- col_data + col_offset * g, (Dtype)1.,
- weight_diff + weight_offset * g);
+ // Bias gradient, if necessary.
+ if (bias_term_) {
+ for (int n = 0; n < num_; ++n) {
+ caffe_cpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
+ 1., top_diff + top[0]->offset(n),
+ static_cast<const Dtype*>(bias_multiplier_->cpu_data()), 1.,
+ bias_diff);
+ }
}
- // gradient w.r.t. bottom data, if necessary
- if (propagate_down[0]) {
+ for (int n = 0; n < num_; ++n) {
+ // Since we saved memory in the forward pass by not storing all col data,
+ // we will need to recompute them.
+ im2col_cpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_,
+ width_, kernel_size_, pad_, stride_, col_data);
+ // gradient w.r.t. weight. Note that we will accumulate diffs.
for (int g = 0; g < group_; ++g) {
- caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
- (Dtype)1., weight + weight_offset * g,
- top_diff + top[0]->offset(n) + top_offset * g,
- (Dtype)0., col_diff + col_offset * g);
+ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
+ (Dtype)1., top_diff + top[i]->offset(n) + top_offset * g,
+ col_data + col_offset * g, (Dtype)1.,
+ weight_diff + weight_offset * g);
+ }
+ // gradient w.r.t. bottom data, if necessary
+ if (propagate_down[i]) {
+ for (int g = 0; g < group_; ++g) {
+ caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
+ (Dtype)1., weight + weight_offset * g,
+ top_diff + top[i]->offset(n) + top_offset * g,
+ (Dtype)0., col_diff + col_offset * g);
+ }
+ // col2im back to the data
+ col2im_cpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
+ stride_, bottom_diff + (*bottom)[i]->offset(n));
}
- // col2im back to the data
- col2im_cpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
- stride_, bottom_diff + (*bottom)[0]->offset(n));
}
}
}
template <typename Dtype>
Dtype ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
- const Dtype* bottom_data = bottom[0]->gpu_data();
- Dtype* top_data = (*top)[0]->mutable_gpu_data();
- Dtype* col_data = col_buffer_.mutable_gpu_data();
- const Dtype* weight = this->blobs_[0]->gpu_data();
- int weight_offset = M_ * K_;
- int col_offset = K_ * N_;
- int top_offset = M_ * N_;
- for (int n = 0; n < num_; ++n) {
- // First, im2col
- im2col_gpu(bottom_data + bottom[0]->offset(n), channels_, height_,
- width_, kernel_size_, pad_, stride_, col_data);
- // Second, innerproduct with groups
- for (int g = 0; g < group_; ++g) {
- caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
- (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
- (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
- }
- // third, add bias
- if (bias_term_) {
- caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
- N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(),
- reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
- (Dtype)1., top_data + (*top)[0]->offset(n));
+ for (int i = 0; i < bottom.size(); ++i) {
+ const Dtype* bottom_data = bottom[i]->gpu_data();
+ Dtype* top_data = (*top)[i]->mutable_gpu_data();
+ Dtype* col_data = col_buffer_.mutable_gpu_data();
+ const Dtype* weight = this->blobs_[0]->gpu_data();
+ int weight_offset = M_ * K_;
+ int col_offset = K_ * N_;
+ int top_offset = M_ * N_;
+ for (int n = 0; n < num_; ++n) {
+ // First, im2col
+ im2col_gpu(bottom_data + bottom[i]->offset(n), channels_, height_,
+ width_, kernel_size_, pad_, stride_, col_data);
+ // Second, innerproduct with groups
+ for (int g = 0; g < group_; ++g) {
+ caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
+ (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
+ (Dtype)0., top_data + (*top)[i]->offset(n) + top_offset * g);
+ }
+ // third, add bias
+ if (bias_term_) {
+ caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
+ N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(),
+ reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
+ (Dtype)1., top_data + (*top)[i]->offset(n));
+ }
}
}
return Dtype(0.);
template <typename Dtype>
void ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
- const Dtype* top_diff = top[0]->gpu_diff();
const Dtype* weight = this->blobs_[0]->gpu_data();
Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff();
- const Dtype* bottom_data = (*bottom)[0]->gpu_data();
- Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
+ CUDA_CHECK(cudaMemset(weight_diff, 0,
+ sizeof(Dtype) * this->blobs_[0]->count()));
Dtype* col_data = col_buffer_.mutable_gpu_data();
Dtype* col_diff = col_buffer_.mutable_gpu_diff();
- // bias gradient if necessary
Dtype* bias_diff = NULL;
-
if (bias_term_) {
bias_diff = this->blobs_[1]->mutable_gpu_diff();
CUDA_CHECK(cudaMemset(bias_diff, 0,
sizeof(Dtype) * this->blobs_[1]->count()));
- for (int n = 0; n < num_; ++n) {
- caffe_gpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
- 1., top_diff + top[0]->offset(n),
- reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
- 1., bias_diff);
- }
}
-
- int weight_offset = M_ * K_;
- int col_offset = K_ * N_;
- int top_offset = M_ * N_;
- CUDA_CHECK(cudaMemset(weight_diff, 0,
- sizeof(Dtype) * this->blobs_[0]->count()));
- for (int n = 0; n < num_; ++n) {
- // since we saved memory in the forward pass by not storing all col data,
- // we will need to recompute them.
- im2col_gpu(bottom_data + (*bottom)[0]->offset(n), channels_, height_,
- width_, kernel_size_, pad_, stride_, col_data);
- // gradient w.r.t. weight. Note that we will accumulate diffs.
- for (int g = 0; g < group_; ++g) {
- caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
- (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
- col_data + col_offset * g, (Dtype)1.,
- weight_diff + weight_offset * g);
+ const int weight_offset = M_ * K_;
+ const int col_offset = K_ * N_;
+ const int top_offset = M_ * N_;
+ for (int i = 0; i < top.size(); ++i) {
+ const Dtype* top_diff = top[i]->gpu_diff();
+ const Dtype* bottom_data = (*bottom)[i]->gpu_data();
+ Dtype* bottom_diff = (*bottom)[i]->mutable_gpu_diff();
+ // Bias gradient, if necessary.
+ if (bias_term_) {
+ for (int n = 0; n < num_; ++n) {
+ caffe_gpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
+ 1., top_diff + top[0]->offset(n),
+ static_cast<const Dtype*>(bias_multiplier_->gpu_data()),
+ 1., bias_diff);
+ }
}
- // gradient w.r.t. bottom data, if necessary
- if (propagate_down[0]) {
+ for (int n = 0; n < num_; ++n) {
+ // since we saved memory in the forward pass by not storing all col data,
+ // we will need to recompute them.
+ im2col_gpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_,
+ width_, kernel_size_, pad_, stride_, col_data);
+ // gradient w.r.t. weight. Note that we will accumulate diffs.
for (int g = 0; g < group_; ++g) {
- caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
- (Dtype)1., weight + weight_offset * g,
- top_diff + top[0]->offset(n) + top_offset * g,
- (Dtype)0., col_diff + col_offset * g);
+ caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
+ (Dtype)1., top_diff + top[i]->offset(n) + top_offset * g,
+ col_data + col_offset * g, (Dtype)1.,
+ weight_diff + weight_offset * g);
+ }
+ // gradient w.r.t. bottom data, if necessary
+ if (propagate_down[i]) {
+ for (int g = 0; g < group_; ++g) {
+ caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
+ (Dtype)1., weight + weight_offset * g,
+ top_diff + top[i]->offset(n) + top_offset * g,
+ (Dtype)0., col_diff + col_offset * g);
+ }
+ // col2im back to the data
+ col2im_gpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
+ stride_, bottom_diff + (*bottom)[i]->offset(n));
}
- // col2im back to the data
- col2im_gpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
- stride_, bottom_diff + (*bottom)[0]->offset(n));
}
}
}
class ConvolutionLayerTest : public ::testing::Test {
protected:
ConvolutionLayerTest()
- : blob_bottom_(new Blob<Dtype>()),
- blob_top_(new Blob<Dtype>()) {}
+ : blob_bottom_(new Blob<Dtype>(2, 3, 6, 4)),
+ blob_bottom_2_(new Blob<Dtype>(2, 3, 6, 4)),
+ blob_top_(new Blob<Dtype>()),
+ blob_top_2_(new Blob<Dtype>()) {}
virtual void SetUp() {
- blob_bottom_->Reshape(2, 3, 6, 4);
// fill the values
FillerParameter filler_param;
filler_param.set_value(1.);
GaussianFiller<Dtype> filler(filler_param);
filler.Fill(this->blob_bottom_);
+ filler.Fill(this->blob_bottom_2_);
blob_bottom_vec_.push_back(blob_bottom_);
blob_top_vec_.push_back(blob_top_);
}
- virtual ~ConvolutionLayerTest() { delete blob_bottom_; delete blob_top_; }
+ virtual ~ConvolutionLayerTest() {
+ delete blob_bottom_;
+ delete blob_bottom_2_;
+ delete blob_top_;
+ delete blob_top_2_;
+ }
+
Blob<Dtype>* const blob_bottom_;
+ Blob<Dtype>* const blob_bottom_2_;
Blob<Dtype>* const blob_top_;
+ Blob<Dtype>* const blob_top_2_;
vector<Blob<Dtype>*> blob_bottom_vec_;
vector<Blob<Dtype>*> blob_top_vec_;
};
convolution_param->set_kernel_size(3);
convolution_param->set_stride(2);
convolution_param->set_num_output(4);
+ this->blob_bottom_vec_.push_back(this->blob_bottom_2_);
+ this->blob_top_vec_.push_back(this->blob_top_2_);
shared_ptr<Layer<TypeParam> > layer(
new ConvolutionLayer<TypeParam>(layer_param));
layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
EXPECT_EQ(this->blob_top_->channels(), 4);
EXPECT_EQ(this->blob_top_->height(), 2);
EXPECT_EQ(this->blob_top_->width(), 1);
+ EXPECT_EQ(this->blob_top_2_->num(), 2);
+ EXPECT_EQ(this->blob_top_2_->channels(), 4);
+ EXPECT_EQ(this->blob_top_2_->height(), 2);
+ EXPECT_EQ(this->blob_top_2_->width(), 1);
// setting group should not change the shape
convolution_param->set_num_output(3);
convolution_param->set_group(3);
EXPECT_EQ(this->blob_top_->channels(), 3);
EXPECT_EQ(this->blob_top_->height(), 2);
EXPECT_EQ(this->blob_top_->width(), 1);
+ EXPECT_EQ(this->blob_top_2_->num(), 2);
+ EXPECT_EQ(this->blob_top_2_->channels(), 3);
+ EXPECT_EQ(this->blob_top_2_->height(), 2);
+ EXPECT_EQ(this->blob_top_2_->width(), 1);
}
TYPED_TEST(ConvolutionLayerTest, TestCPUSimpleConvolution) {
// We will simply see if the convolution layer carries out averaging well.
+ shared_ptr<ConstantFiller<TypeParam> > filler;
FillerParameter filler_param;
filler_param.set_value(1.);
- ConstantFiller<TypeParam> filler(filler_param);
- filler.Fill(this->blob_bottom_);
+ filler.reset(new ConstantFiller<TypeParam>(filler_param));
+ filler->Fill(this->blob_bottom_);
+ filler_param.set_value(2.);
+ filler.reset(new ConstantFiller<TypeParam>(filler_param));
+ filler->Fill(this->blob_bottom_2_);
+ this->blob_bottom_vec_.push_back(this->blob_bottom_2_);
+ this->blob_top_vec_.push_back(this->blob_top_2_);
LayerParameter layer_param;
ConvolutionParameter* convolution_param =
layer_param.mutable_convolution_param();
for (int i = 0; i < this->blob_top_->count(); ++i) {
EXPECT_NEAR(top_data[i], 27.1, 1e-4);
}
+ top_data = this->blob_top_2_->cpu_data();
+ for (int i = 0; i < this->blob_top_2_->count(); ++i) {
+ EXPECT_NEAR(top_data[i], 54.1, 1e-4);
+ }
}
TYPED_TEST(ConvolutionLayerTest, TestGPUSimpleConvolution) {
// We will simply see if the convolution layer carries out averaging well.
+ shared_ptr<ConstantFiller<TypeParam> > filler;
FillerParameter filler_param;
filler_param.set_value(1.);
- ConstantFiller<TypeParam> filler(filler_param);
- filler.Fill(this->blob_bottom_);
+ filler.reset(new ConstantFiller<TypeParam>(filler_param));
+ filler->Fill(this->blob_bottom_);
+ filler_param.set_value(2.);
+ filler.reset(new ConstantFiller<TypeParam>(filler_param));
+ filler->Fill(this->blob_bottom_2_);
+ this->blob_bottom_vec_.push_back(this->blob_bottom_2_);
+ this->blob_top_vec_.push_back(this->blob_top_2_);
LayerParameter layer_param;
ConvolutionParameter* convolution_param =
layer_param.mutable_convolution_param();
for (int i = 0; i < this->blob_top_->count(); ++i) {
EXPECT_NEAR(top_data[i], 27.1, 1e-4);
}
+ top_data = this->blob_top_2_->cpu_data();
+ for (int i = 0; i < this->blob_top_2_->count(); ++i) {
+ EXPECT_NEAR(top_data[i], 54.1, 1e-4);
+ }
}
TYPED_TEST(ConvolutionLayerTest, TestCPUSimpleConvolutionGroup) {
LayerParameter layer_param;
ConvolutionParameter* convolution_param =
layer_param.mutable_convolution_param();
+ this->blob_bottom_vec_.push_back(this->blob_bottom_2_);
+ this->blob_top_vec_.push_back(this->blob_top_2_);
convolution_param->set_kernel_size(3);
convolution_param->set_stride(2);
convolution_param->set_num_output(2);
LayerParameter layer_param;
ConvolutionParameter* convolution_param =
layer_param.mutable_convolution_param();
+ this->blob_bottom_vec_.push_back(this->blob_bottom_2_);
+ this->blob_top_vec_.push_back(this->blob_top_2_);
convolution_param->set_kernel_size(3);
convolution_param->set_stride(2);
convolution_param->set_num_output(2);