allow in place computation of SplitLayer 0th top blob
authorJeff Donahue <jeff.donahue@gmail.com>
Sun, 16 Feb 2014 00:28:44 +0000 (16:28 -0800)
committerEvan Shelhamer <shelhamer@imaginarynumber.net>
Wed, 26 Feb 2014 22:41:27 +0000 (14:41 -0800)
src/caffe/layers/split_layer.cpp
src/caffe/test/test_split_layer.cpp

index cdfd539..5accdd0 100644 (file)
@@ -15,6 +15,12 @@ void SplitLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
   CHECK_GE(top->size(), 1) << "Split Layer takes at least one blob as output.";
   count_ = bottom[0]->count();
   for (int i = 0; i < top->size(); ++i) {
+    // Allow the 0th top blob to be 'in-place', but no others.
+    if (i == 0 && (*top)[i] == bottom[0]) {
+      continue;
+    } else {
+      CHECK_NE((*top)[i], bottom[0]) << "Only 0th top blob may be in place.";
+    }
     (*top)[i]->Reshape(bottom[0]->num(), bottom[0]->channels(),
                        bottom[0]->height(), bottom[0]->width());
     CHECK_EQ(count_, (*top)[i]->count());
@@ -26,6 +32,9 @@ void SplitLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top) {
   const Dtype* bottom_data = bottom[0]->cpu_data();
   for (int i = 0; i < top->size(); ++i) {
+    if (i == 0 && (*top)[i] == bottom[0]) {
+      continue;
+    }
     Dtype* top_data = (*top)[i]->mutable_cpu_data();
     caffe_copy(count_, bottom_data, top_data);
   }
@@ -36,6 +45,9 @@ void SplitLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top) {
   const Dtype* bottom_data = bottom[0]->gpu_data();
   for (int i = 0; i < top->size(); ++i) {
+    if (i == 0 && (*top)[i] == bottom[0]) {
+      continue;
+    }
     Dtype* top_data = (*top)[i]->mutable_gpu_data();
     caffe_gpu_copy(count_, bottom_data, top_data);
   }
@@ -47,7 +59,13 @@ Dtype SplitLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
   if (propagate_down) {
     const Dtype* top_diff = top[0]->cpu_diff();
     Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
-    caffe_copy(count_, top_diff, bottom_diff);
+    // Initialize by copying first top blob diff to our diff, unless we're
+    // doing in-place computation for the first blob, in which case the diff is
+    // already initialized.
+    if (top[0] != (*bottom)[0]) {
+      caffe_copy(count_, top_diff, bottom_diff);
+    }
+    // Add remaining top blob diffs.
     for (int i = 1; i < top.size(); ++i) {
       top_diff = top[i]->cpu_diff();
       caffe_axpy(count_, Dtype(1.), top_diff, bottom_diff);
@@ -63,7 +81,13 @@ Dtype SplitLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   if (propagate_down) {
     const Dtype* top_diff = top[0]->gpu_diff();
     Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
-    caffe_gpu_copy(count_, top_diff, bottom_diff);
+    // Initialize by copying first top blob diff to our diff, unless we're
+    // doing in-place computation for the first blob, in which case the diff is
+    // already initialized.
+    if (top[0] != (*bottom)[0]) {
+      caffe_gpu_copy(count_, top_diff, bottom_diff);
+    }
+    // Add remaining top blob diffs.
     for (int i = 1; i < top.size(); ++i) {
       top_diff = top[i]->gpu_diff();
       caffe_gpu_axpy(count_, Dtype(1.), top_diff, bottom_diff);
index a3252b2..f6d5d52 100644 (file)
@@ -88,6 +88,32 @@ TYPED_TEST(SplitLayerTest, TestGPU) {
   }
 }
 
+TYPED_TEST(SplitLayerTest, TestCPUInPlace) {
+  LayerParameter layer_param;
+  SplitLayer<TypeParam> layer(layer_param);
+  Caffe::set_mode(Caffe::CPU);
+  this->blob_top_vec_[0] = this->blob_bottom_vec_[0];
+  layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  for (int i = 0; i < this->blob_bottom_->count(); ++i) {
+    TypeParam bottom_value = this->blob_bottom_->cpu_data()[i];
+    EXPECT_EQ(bottom_value, this->blob_top_b_->cpu_data()[i]);
+  }
+}
+
+TYPED_TEST(SplitLayerTest, TestGPUInPlace) {
+  LayerParameter layer_param;
+  SplitLayer<TypeParam> layer(layer_param);
+  Caffe::set_mode(Caffe::GPU);
+  this->blob_top_vec_[0] = this->blob_bottom_vec_[0];
+  layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  for (int i = 0; i < this->blob_bottom_->count(); ++i) {
+    TypeParam bottom_value = this->blob_bottom_->cpu_data()[i];
+    EXPECT_EQ(bottom_value, this->blob_top_b_->cpu_data()[i]);
+  }
+}
+
 TYPED_TEST(SplitLayerTest, TestCPUGradient) {
   LayerParameter layer_param;
   Caffe::set_mode(Caffe::CPU);
@@ -108,6 +134,28 @@ TYPED_TEST(SplitLayerTest, TestGPUGradient) {
       this->blob_top_vec_);
 }
 
+TYPED_TEST(SplitLayerTest, TestCPUGradientInPlace) {
+  LayerParameter layer_param;
+  Caffe::set_mode(Caffe::CPU);
+  SplitLayer<TypeParam> layer(layer_param);
+  GradientChecker<TypeParam> checker(1e-2, 1e-2);
+  this->blob_top_vec_[0] = this->blob_bottom_vec_[0];
+  checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, this->blob_top_vec_);
+  checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_,
+      this->blob_top_vec_);
+}
+
+TYPED_TEST(SplitLayerTest, TestGPUGradientInPlace) {
+  LayerParameter layer_param;
+  Caffe::set_mode(Caffe::GPU);
+  SplitLayer<TypeParam> layer(layer_param);
+  GradientChecker<TypeParam> checker(1e-2, 1e-2);
+  this->blob_top_vec_[0] = this->blob_bottom_vec_[0];
+  checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, this->blob_top_vec_);
+  checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_,
+      this->blob_top_vec_);
+}
+
 
 template <typename Dtype>
 class SplitLayerInsertionTest : public ::testing::Test {