Disallow in-place computation in SPLIT layer -- has strange effects in
authorJeff Donahue <jeff.donahue@gmail.com>
Sat, 12 Jul 2014 05:52:02 +0000 (22:52 -0700)
committerJeff Donahue <jeff.donahue@gmail.com>
Wed, 13 Aug 2014 20:22:09 +0000 (13:22 -0700)
backward pass when input into a loss.

include/caffe/vision_layers.hpp
src/caffe/layers/lrn_layer.cpp
src/caffe/layers/split_layer.cpp
src/caffe/layers/split_layer.cu
src/caffe/test/test_split_layer.cpp
src/caffe/util/insert_splits.cpp

index 31df136..d2d9ab3 100644 (file)
@@ -230,7 +230,7 @@ class LRNLayer : public Layer<Dtype> {
   Blob<Dtype> power_output_;
   vector<Blob<Dtype>*> power_top_vec_;
   shared_ptr<EltwiseLayer<Dtype> > product_layer_;
-  Blob<Dtype> product_data_input_;
+  Blob<Dtype> product_input_;
   vector<Blob<Dtype>*> product_bottom_vec_;
 };
 
index c76ca95..e81a32b 100644 (file)
@@ -26,7 +26,7 @@ void LRNLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
     {
       // Set up split_layer_ to use inputs in the numerator and denominator.
       split_top_vec_.clear();
-      split_top_vec_.push_back(bottom[0]);
+      split_top_vec_.push_back(&product_input_);
       split_top_vec_.push_back(&square_input_);
       LayerParameter split_param;
       split_layer_.reset(new SplitLayer<Dtype>(split_param));
@@ -76,7 +76,7 @@ void LRNLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       // Set up a product_layer_ to compute outputs by multiplying inputs by the
       // inverse demoninator computed by the power layer.
       product_bottom_vec_.clear();
-      product_bottom_vec_.push_back(bottom[0]);
+      product_bottom_vec_.push_back(&product_input_);
       product_bottom_vec_.push_back(&power_output_);
       LayerParameter product_param;
       EltwiseParameter* eltwise_param = product_param.mutable_eltwise_param();
index c223d47..3bed347 100644 (file)
@@ -11,12 +11,13 @@ void SplitLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top) {
   count_ = bottom[0]->count();
   for (int i = 0; i < top->size(); ++i) {
-    // Allow the 0th top blob to be 'in-place', but no others.
-    if (i == 0 && (*top)[i] == bottom[0]) {
-      continue;
-    } else {
-      CHECK_NE((*top)[i], bottom[0]) << "Only 0th top blob may be in place.";
-    }
+    // Do not allow in-place computation in the SplitLayer.  Instead, share data
+    // by reference in the forward pass, and keep separate diff allocations in
+    // the backward pass.  (Technically, it should be possible to share the diff
+    // blob of the first split output with the input, but this seems to cause
+    // some strange effects in practice...)
+    CHECK_NE((*top)[i], bottom[0]) << this->type_name() << " Layer does not "
+        "allow in-place computation.";
     (*top)[i]->Reshape(bottom[0]->num(), bottom[0]->channels(),
                        bottom[0]->height(), bottom[0]->width());
     CHECK_EQ(count_, (*top)[i]->count());
@@ -34,14 +35,18 @@ void SplitLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 template <typename Dtype>
 void SplitLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
-  if (propagate_down[0]) {
-    (*bottom)[0]->ShareDiff(*top[0]);
-    // Add remaining top blob diffs.
+  if (!propagate_down[0]) { return; }
+  if (top.size() == 1) {
+    caffe_copy(count_, top[0]->cpu_diff(), (*bottom)[0]->mutable_cpu_diff());
+    return;
+  }
+  caffe_add(count_, top[0]->cpu_diff(), top[1]->cpu_diff(),
+            (*bottom)[0]->mutable_cpu_diff());
+  // Add remaining top blob diffs.
+  for (int i = 2; i < top.size(); ++i) {
+    const Dtype* top_diff = top[i]->cpu_diff();
     Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
-    for (int i = 1; i < top.size(); ++i) {
-      const Dtype* top_diff = top[i]->cpu_diff();
-      caffe_axpy(count_, Dtype(1.), top_diff, bottom_diff);
-    }
+    caffe_axpy(count_, Dtype(1.), top_diff, bottom_diff);
   }
 }
 
index 2d2b3c2..fcc0917 100644 (file)
@@ -17,14 +17,18 @@ void SplitLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
 template <typename Dtype>
 void SplitLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
-  if (propagate_down[0]) {
-    (*bottom)[0]->ShareDiff(*top[0]);
-    // Add remaining top blob diffs.
+  if (!propagate_down[0]) { return; }
+  if (top.size() == 1) {
+    caffe_copy(count_, top[0]->gpu_diff(), (*bottom)[0]->mutable_gpu_diff());
+    return;
+  }
+  caffe_gpu_add(count_, top[0]->gpu_diff(), top[1]->gpu_diff(),
+                (*bottom)[0]->mutable_gpu_diff());
+  // Add remaining top blob diffs.
+  for (int i = 2; i < top.size(); ++i) {
+    const Dtype* top_diff = top[i]->gpu_diff();
     Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
-    for (int i = 1; i < top.size(); ++i) {
-      const Dtype* top_diff = top[i]->gpu_diff();
-      caffe_gpu_axpy(count_, Dtype(1.), top_diff, bottom_diff);
-    }
+    caffe_gpu_axpy(count_, Dtype(1.), top_diff, bottom_diff);
   }
 }
 
index 448e13c..92a3ec7 100644 (file)
@@ -76,19 +76,6 @@ TYPED_TEST(SplitLayerTest, Test) {
   }
 }
 
-TYPED_TEST(SplitLayerTest, TestInPlace) {
-  typedef typename TypeParam::Dtype Dtype;
-  LayerParameter layer_param;
-  SplitLayer<Dtype> layer(layer_param);
-  this->blob_top_vec_[0] = this->blob_bottom_vec_[0];
-  layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
-  layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
-  for (int i = 0; i < this->blob_bottom_->count(); ++i) {
-    Dtype bottom_value = this->blob_bottom_->cpu_data()[i];
-    EXPECT_EQ(bottom_value, this->blob_top_b_->cpu_data()[i]);
-  }
-}
-
 TYPED_TEST(SplitLayerTest, TestGradient) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
@@ -98,16 +85,6 @@ TYPED_TEST(SplitLayerTest, TestGradient) {
       &(this->blob_top_vec_));
 }
 
-TYPED_TEST(SplitLayerTest, TestGradientInPlace) {
-  typedef typename TypeParam::Dtype Dtype;
-  LayerParameter layer_param;
-  SplitLayer<Dtype> layer(layer_param);
-  GradientChecker<Dtype> checker(1e-2, 1e-2);
-  this->blob_top_vec_[0] = this->blob_bottom_vec_[0];
-  checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
-      &(this->blob_top_vec_));
-}
-
 
 class SplitLayerInsertionTest : public ::testing::Test {
  protected:
@@ -124,12 +101,12 @@ class SplitLayerInsertionTest : public ::testing::Test {
         output_param_string, &expected_output_param));
     NetParameter actual_output_param;
     InsertSplits(input_param, &actual_output_param);
-    CHECK_EQ(expected_output_param.DebugString(),
+    EXPECT_EQ(expected_output_param.DebugString(),
         actual_output_param.DebugString());
     // Also test idempotence.
     NetParameter double_split_insert_param;
     InsertSplits(actual_output_param, &double_split_insert_param);
-    CHECK_EQ(actual_output_param.DebugString(),
+    EXPECT_EQ(actual_output_param.DebugString(),
        double_split_insert_param.DebugString());
   }
 };
@@ -624,7 +601,7 @@ TEST_F(SplitLayerInsertionTest, TestLossInsertion) {
       "  name: 'data_data_0_split' "
       "  type: SPLIT "
       "  bottom: 'data' "
-      "  top: 'data' "
+      "  top: 'data_data_0_split_0' "
       "  top: 'data_data_0_split_1' "
       "} "
       "layers: { "
@@ -639,14 +616,14 @@ TEST_F(SplitLayerInsertionTest, TestLossInsertion) {
       "    } "
       "  } "
       "  param: 'unsharedweights1' "
-      "  bottom: 'data' "
+      "  bottom: 'data_data_0_split_0' "
       "  top: 'innerproduct1' "
       "} "
       "layers: { "
       "  name: 'innerproduct1_innerproduct1_0_split' "
       "  type: SPLIT "
       "  bottom: 'innerproduct1' "
-      "  top: 'innerproduct1' "
+      "  top: 'innerproduct1_innerproduct1_0_split_0' "
       "  top: 'innerproduct1_innerproduct1_0_split_1' "
       "  loss_weight: 2.5 "
       "  loss_weight: 0 "
@@ -726,14 +703,14 @@ TEST_F(SplitLayerInsertionTest, TestInsertion) {
       "  name: 'data_data_0_split' "
       "  type: SPLIT "
       "  bottom: 'data' "
-      "  top: 'data' "
+      "  top: 'data_data_0_split_0' "
       "  top: 'data_data_0_split_1' "
       "  top: 'data_data_0_split_2' "
       "} "
       "layers: { "
       "  name: 'innerprod1' "
       "  type: INNER_PRODUCT "
-      "  bottom: 'data' "
+      "  bottom: 'data_data_0_split_0' "
       "  top: 'innerprod1' "
       "} "
       "layers: { "
@@ -746,7 +723,7 @@ TEST_F(SplitLayerInsertionTest, TestInsertion) {
       "  name: 'innerprod2_innerprod2_0_split' "
       "  type: SPLIT "
       "  bottom: 'innerprod2' "
-      "  top: 'innerprod2' "
+      "  top: 'innerprod2_innerprod2_0_split_0' "
       "  top: 'innerprod2_innerprod2_0_split_1' "
       "} "
       "layers: { "
@@ -759,7 +736,7 @@ TEST_F(SplitLayerInsertionTest, TestInsertion) {
       "  name: 'loss1' "
       "  type: EUCLIDEAN_LOSS "
       "  bottom: 'innerprod1' "
-      "  bottom: 'innerprod2' "
+      "  bottom: 'innerprod2_innerprod2_0_split_0' "
       "} "
       "layers: { "
       "  name: 'loss2' "
@@ -827,26 +804,26 @@ TEST_F(SplitLayerInsertionTest, TestInsertionTwoTop) {
       "  name: 'data_data_0_split' "
       "  type: SPLIT "
       "  bottom: 'data' "
-      "  top: 'data' "
+      "  top: 'data_data_0_split_0' "
       "  top: 'data_data_0_split_1' "
       "} "
       "layers: { "
       "  name: 'label_data_1_split' "
       "  type: SPLIT "
       "  bottom: 'label' "
-      "  top: 'label' "
+      "  top: 'label_data_1_split_0' "
       "  top: 'label_data_1_split_1' "
       "} "
       "layers: { "
       "  name: 'innerprod1' "
       "  type: INNER_PRODUCT "
-      "  bottom: 'data' "
+      "  bottom: 'data_data_0_split_0' "
       "  top: 'innerprod1' "
       "} "
       "layers: { "
       "  name: 'innerprod2' "
       "  type: INNER_PRODUCT "
-      "  bottom: 'label' "
+      "  bottom: 'label_data_1_split_0' "
       "  top: 'innerprod2' "
       "} "
       "layers: { "
@@ -913,13 +890,13 @@ TEST_F(SplitLayerInsertionTest, TestInputInsertion) {
       "  name: 'data_input_0_split' "
       "  type: SPLIT "
       "  bottom: 'data' "
-      "  top: 'data' "
+      "  top: 'data_input_0_split_0' "
       "  top: 'data_input_0_split_1' "
       "} "
       "layers: { "
       "  name: 'innerprod1' "
       "  type: INNER_PRODUCT "
-      "  bottom: 'data' "
+      "  bottom: 'data_input_0_split_0' "
       "  top: 'innerprod1' "
       "} "
       "layers: { "
@@ -988,13 +965,13 @@ TEST_F(SplitLayerInsertionTest, TestWithInPlace) {
       "  name: 'data_data_0_split' "
       "  type: SPLIT "
       "  bottom: 'data' "
-      "  top: 'data' "
+      "  top: 'data_data_0_split_0' "
       "  top: 'data_data_0_split_1' "
       "} "
       "layers: { "
       "  name: 'innerprod1' "
       "  type: INNER_PRODUCT "
-      "  bottom: 'data' "
+      "  bottom: 'data_data_0_split_0' "
       "  top: 'innerprod1' "
       "} "
       "layers: { "
@@ -1007,13 +984,13 @@ TEST_F(SplitLayerInsertionTest, TestWithInPlace) {
       "  name: 'innerprod1_relu1_0_split' "
       "  type: SPLIT "
       "  bottom: 'innerprod1' "
-      "  top: 'innerprod1' "
+      "  top: 'innerprod1_relu1_0_split_0' "
       "  top: 'innerprod1_relu1_0_split_1' "
       "} "
       "layers: { "
       "  name: 'innerprod2' "
       "  type: INNER_PRODUCT "
-      "  bottom: 'innerprod1' "
+      "  bottom: 'innerprod1_relu1_0_split_0' "
       "  top: 'innerprod2' "
       "} "
       "layers: { "
index e917e35..f20efda 100644 (file)
@@ -135,11 +135,6 @@ string SplitLayerName(const string& layer_name, const string& blob_name,
 
 string SplitBlobName(const string& layer_name, const string& blob_name,
     const int blob_idx, const int split_idx) {
-  // 0th split top blob is given the same name as the bottom blob so that
-  // computation is done 'in-place', saving a bit of time and memory.
-  if (split_idx == 0) {
-    return blob_name;
-  }
   ostringstream split_blob_name;
   split_blob_name << blob_name << "_" << layer_name << "_" << blob_idx
       << "_split_" << split_idx;