force_backward works properly with non-backproppable things

author Jeff Donahue <jeff.donahue@gmail.com>

Mon, 16 Jun 2014 23:37:17 +0000 (16:37 -0700)

committer Evan Shelhamer <shelhamer@imaginarynumber.net>

Thu, 26 Jun 2014 19:07:39 +0000 (12:07 -0700)
author Jeff Donahue <jeff.donahue@gmail.com>
Mon, 16 Jun 2014 23:37:17 +0000 (16:37 -0700)
committer Evan Shelhamer <shelhamer@imaginarynumber.net>
Thu, 26 Jun 2014 19:07:39 +0000 (12:07 -0700)
diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp

index 12e7610..690c36b 100644 (file)
--- a/include/caffe/layer.hpp
+++ b/include/caffe/layer.hpp
@@ -84,6 +84,14 @@ class Layer {
    virtual inline int MinTopBlobs() const { return -1; }
    virtual inline int MaxTopBlobs() const { return -1; }
  
+  // Declare for each bottom blob whether to allow force_backward -- that is,
+  // if AllowForceBackward(i) == false, we will ignore the force_backward
+  // setting and backpropagate to blob i only if it needs gradient information
+  // (as is done when force_backward == false).
+  virtual inline bool AllowForceBackward(const int bottom_index) const {
+    return true;
+  }
+
   protected:
    // The protobuf that stores the layer parameters
    LayerParameter layer_param_;
diff --git a/include/caffe/loss_layers.hpp b/include/caffe/loss_layers.hpp

index b8adc99..bb03f63 100644 (file)
--- a/include/caffe/loss_layers.hpp
+++ b/include/caffe/loss_layers.hpp
@@ -38,6 +38,11 @@ class LossLayer : public Layer<Dtype> {
  
    virtual inline int ExactNumBottomBlobs() const { return 2; }
    virtual inline int MaxTopBlobs() const { return 1; }
+  // We usually cannot backpropagate to the labels; ignore force_backward for
+  // these inputs.
+  virtual inline bool AllowForceBackward(const int bottom_index) const {
+    return bottom_index != 1;
+  }
  };
  
  // Forward declare SoftmaxLayer for use in SoftmaxWithLossLayer.
@@ -63,6 +68,11 @@ class SoftmaxWithLossLayer : public Layer<Dtype> {
      return LayerParameter_LayerType_SOFTMAX_LOSS;
    }
    virtual inline int MaxTopBlobs() const { return 2; }
+  // We cannot backpropagate to the labels; ignore force_backward for these
+  // inputs.
+  virtual inline bool AllowForceBackward(const int bottom_index) const {
+    return bottom_index != 1;
+  }
  
   protected:
    virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
@@ -133,6 +143,11 @@ class EuclideanLossLayer : public LossLayer<Dtype> {
    virtual inline LayerParameter_LayerType type() const {
      return LayerParameter_LayerType_EUCLIDEAN_LOSS;
    }
+  // Unlike most loss layers, in the EuclideanLossLayer we can backpropagate
+  // to both inputs.
+  virtual inline bool AllowForceBackward(const int bottom_index) const {
+    return true;
+  }
  
   protected:
    virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp

index 8687d00..a653761 100644 (file)
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -62,7 +62,7 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
      layers_.push_back(shared_ptr<Layer<Dtype> >(GetLayer<Dtype>(layer_param)));
      layer_names_.push_back(layer_param.name());
      LOG(INFO) << "Creating Layer " << layer_param.name();
-    bool need_backward = param.force_backward();
+    bool need_backward = false;
      // Figure out this layer's input and output
      for (int bottom_id = 0; bottom_id < layer_param.bottom_size();
           ++bottom_id) {
@@ -112,6 +112,21 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
                  << " does not need backward computation.";
      }
    }
+  // Handle force_backward if needed.
+  if (param.force_backward()) {
+    for (int layer_id = 0; layer_id < layers_.size(); ++layer_id) {
+      layer_need_backward_[layer_id] = true;
+      for (int bottom_id = 0;
+           bottom_id < bottom_need_backward_[layer_id].size(); ++bottom_id) {
+        bottom_need_backward_[layer_id][bottom_id] =
+            bottom_need_backward_[layer_id][bottom_id] ||
+            layers_[layer_id]->AllowForceBackward(bottom_id);
+        blob_need_backward_[bottom_id_vecs_[layer_id][bottom_id]] =
+            blob_need_backward_[bottom_id_vecs_[layer_id][bottom_id]] ||
+            bottom_need_backward_[layer_id][bottom_id];
+      }
+    }
+  }
    // In the end, all remaining blobs are considered output blobs.
    for (set<string>::iterator it = available_blobs.begin();
        it != available_blobs.end(); ++it) {
@@ -162,7 +177,7 @@ void Net<Dtype>::AppendTop(const NetParameter& param, const int layer_id,
      const int blob_id = blobs_.size();
      blobs_.push_back(blob_pointer);
      blob_names_.push_back(blob_name);
-    blob_need_backward_.push_back(param.force_backward());
+    blob_need_backward_.push_back(false);
      (*blob_name_to_idx)[blob_name] = blob_id;
      if (layer_id == -1) {
        // Set the (explicitly specified) dimensions of the input blob.
@@ -197,7 +212,7 @@ int Net<Dtype>::AppendBottom(const NetParameter& param,
    bottom_vecs_[layer_id].push_back(blobs_[blob_id].get());
    bottom_id_vecs_[layer_id].push_back(blob_id);
    available_blobs->erase(blob_name);
-  const bool need_backward = param.force_backward() || blob_need_backward_[blob_id];
+  const bool need_backward = blob_need_backward_[blob_id];
    bottom_need_backward_[layer_id].push_back(need_backward);
    return blob_id;
  }
diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp

index eb368cd..8fb02fc 100644 (file)
--- a/src/caffe/test/test_net.cpp
+++ b/src/caffe/test/test_net.cpp
@@ -80,6 +80,62 @@ class NetTest : public ::testing::Test {
      InitNetFromProtoString(proto);
    }
  
+  virtual void InitTinyNetEuclidean(const bool force_backward = false) {
+    string proto =
+        "name: 'TinyTestEuclidLossNetwork' "
+        "layers: { "
+        "  name: 'data' "
+        "  type: DUMMY_DATA "
+        "  dummy_data_param { "
+        "    num: 5 "
+        "    channels: 2 "
+        "    height: 3 "
+        "    width: 4 "
+        "    num: 5 "
+        "    channels: 1 "
+        "    height: 1 "
+        "    width: 1 "
+        "    data_filler { "
+        "      type: 'gaussian' "
+        "      std: 0.01 "
+        "    } "
+        "  } "
+        "  top: 'data' "
+        "  top: 'label' "
+        "} "
+        "layers: { "
+        "  name: 'innerproduct' "
+        "  type: INNER_PRODUCT "
+        "  inner_product_param { "
+        "    num_output: 1 "
+        "    weight_filler { "
+        "      type: 'gaussian' "
+        "      std: 0.01 "
+        "    } "
+        "    bias_filler { "
+        "      type: 'constant' "
+        "      value: 0 "
+        "    } "
+        "  } "
+        "  blobs_lr: 1. "
+        "  blobs_lr: 2. "
+        "  weight_decay: 1. "
+        "  weight_decay: 0. "
+        "  bottom: 'data' "
+        "  top: 'innerproduct' "
+        "} "
+        "layers: { "
+        "  name: 'loss' "
+        "  type: EUCLIDEAN_LOSS "
+        "  bottom: 'innerproduct' "
+        "  bottom: 'label' "
+        "} ";
+    if (force_backward) {
+      proto += "force_backward: true ";
+    }
+    InitNetFromProtoString(proto);
+  }
+
    virtual void InitTrickyNet() {
      const string& proto =
          "name: 'TrickyTestNetwork' "
@@ -218,6 +274,20 @@ TYPED_TEST(NetTest, TestBottomNeedBackwardForce) {
    EXPECT_EQ(true, bottom_need_backward[1][0]);
    EXPECT_EQ(2, bottom_need_backward[2].size());
    EXPECT_EQ(true, bottom_need_backward[2][0]);
+  EXPECT_EQ(false, bottom_need_backward[2][1]);
+}
+
+TYPED_TEST(NetTest, TestBottomNeedBackwardEuclideanForce) {
+  const bool force_backward = true;
+  this->InitTinyNetEuclidean(force_backward);
+  const vector<vector<bool> >& bottom_need_backward =
+      this->net_->bottom_need_backward();
+  EXPECT_EQ(3, bottom_need_backward.size());
+  EXPECT_EQ(0, bottom_need_backward[0].size());
+  EXPECT_EQ(1, bottom_need_backward[1].size());
+  EXPECT_EQ(true, bottom_need_backward[1][0]);
+  EXPECT_EQ(2, bottom_need_backward[2].size());
+  EXPECT_EQ(true, bottom_need_backward[2][0]);
    EXPECT_EQ(true, bottom_need_backward[2][1]);
  }
  
@@ -233,6 +303,9 @@ TYPED_TEST(NetTest, TestBottomNeedBackwardTricky) {
    EXPECT_EQ(false, bottom_need_backward[2][0]);
    EXPECT_EQ(2, bottom_need_backward[3].size());
    EXPECT_EQ(true, bottom_need_backward[3][0]);
+  // The label input to the SoftmaxLossLayer should say it "needs backward"
+  // since it has weights under it, even though we expect this to cause a crash
+  // at training/test time.
    EXPECT_EQ(true, bottom_need_backward[3][1]);
  }
author	Jeff Donahue <jeff.donahue@gmail.com>
	Mon, 16 Jun 2014 23:37:17 +0000 (16:37 -0700)
committer	Evan Shelhamer <shelhamer@imaginarynumber.net>
	Thu, 26 Jun 2014 19:07:39 +0000 (12:07 -0700)
include/caffe/layer.hpp		patch \| blob \| history
include/caffe/loss_layers.hpp		patch \| blob \| history
src/caffe/net.cpp		patch \| blob \| history
src/caffe/test/test_net.cpp		patch \| blob \| history