From a04834f6ceee13310b7f4e17ccf639c1e7abd459 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Mon, 16 Jun 2014 16:37:17 -0700 Subject: [PATCH] force_backward works properly with non-backproppable things --- include/caffe/layer.hpp | 8 +++++ include/caffe/loss_layers.hpp | 15 +++++++++ src/caffe/net.cpp | 21 +++++++++++-- src/caffe/test/test_net.cpp | 73 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 114 insertions(+), 3 deletions(-) diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp index 12e7610..690c36b 100644 --- a/include/caffe/layer.hpp +++ b/include/caffe/layer.hpp @@ -84,6 +84,14 @@ class Layer { virtual inline int MinTopBlobs() const { return -1; } virtual inline int MaxTopBlobs() const { return -1; } + // Declare for each bottom blob whether to allow force_backward -- that is, + // if AllowForceBackward(i) == false, we will ignore the force_backward + // setting and backpropagate to blob i only if it needs gradient information + // (as is done when force_backward == false). + virtual inline bool AllowForceBackward(const int bottom_index) const { + return true; + } + protected: // The protobuf that stores the layer parameters LayerParameter layer_param_; diff --git a/include/caffe/loss_layers.hpp b/include/caffe/loss_layers.hpp index b8adc99..bb03f63 100644 --- a/include/caffe/loss_layers.hpp +++ b/include/caffe/loss_layers.hpp @@ -38,6 +38,11 @@ class LossLayer : public Layer { virtual inline int ExactNumBottomBlobs() const { return 2; } virtual inline int MaxTopBlobs() const { return 1; } + // We usually cannot backpropagate to the labels; ignore force_backward for + // these inputs. + virtual inline bool AllowForceBackward(const int bottom_index) const { + return bottom_index != 1; + } }; // Forward declare SoftmaxLayer for use in SoftmaxWithLossLayer. @@ -63,6 +68,11 @@ class SoftmaxWithLossLayer : public Layer { return LayerParameter_LayerType_SOFTMAX_LOSS; } virtual inline int MaxTopBlobs() const { return 2; } + // We cannot backpropagate to the labels; ignore force_backward for these + // inputs. + virtual inline bool AllowForceBackward(const int bottom_index) const { + return bottom_index != 1; + } protected: virtual Dtype Forward_cpu(const vector*>& bottom, @@ -133,6 +143,11 @@ class EuclideanLossLayer : public LossLayer { virtual inline LayerParameter_LayerType type() const { return LayerParameter_LayerType_EUCLIDEAN_LOSS; } + // Unlike most loss layers, in the EuclideanLossLayer we can backpropagate + // to both inputs. + virtual inline bool AllowForceBackward(const int bottom_index) const { + return true; + } protected: virtual Dtype Forward_cpu(const vector*>& bottom, diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index 8687d00..a653761 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -62,7 +62,7 @@ void Net::Init(const NetParameter& in_param) { layers_.push_back(shared_ptr >(GetLayer(layer_param))); layer_names_.push_back(layer_param.name()); LOG(INFO) << "Creating Layer " << layer_param.name(); - bool need_backward = param.force_backward(); + bool need_backward = false; // Figure out this layer's input and output for (int bottom_id = 0; bottom_id < layer_param.bottom_size(); ++bottom_id) { @@ -112,6 +112,21 @@ void Net::Init(const NetParameter& in_param) { << " does not need backward computation."; } } + // Handle force_backward if needed. + if (param.force_backward()) { + for (int layer_id = 0; layer_id < layers_.size(); ++layer_id) { + layer_need_backward_[layer_id] = true; + for (int bottom_id = 0; + bottom_id < bottom_need_backward_[layer_id].size(); ++bottom_id) { + bottom_need_backward_[layer_id][bottom_id] = + bottom_need_backward_[layer_id][bottom_id] || + layers_[layer_id]->AllowForceBackward(bottom_id); + blob_need_backward_[bottom_id_vecs_[layer_id][bottom_id]] = + blob_need_backward_[bottom_id_vecs_[layer_id][bottom_id]] || + bottom_need_backward_[layer_id][bottom_id]; + } + } + } // In the end, all remaining blobs are considered output blobs. for (set::iterator it = available_blobs.begin(); it != available_blobs.end(); ++it) { @@ -162,7 +177,7 @@ void Net::AppendTop(const NetParameter& param, const int layer_id, const int blob_id = blobs_.size(); blobs_.push_back(blob_pointer); blob_names_.push_back(blob_name); - blob_need_backward_.push_back(param.force_backward()); + blob_need_backward_.push_back(false); (*blob_name_to_idx)[blob_name] = blob_id; if (layer_id == -1) { // Set the (explicitly specified) dimensions of the input blob. @@ -197,7 +212,7 @@ int Net::AppendBottom(const NetParameter& param, bottom_vecs_[layer_id].push_back(blobs_[blob_id].get()); bottom_id_vecs_[layer_id].push_back(blob_id); available_blobs->erase(blob_name); - const bool need_backward = param.force_backward() || blob_need_backward_[blob_id]; + const bool need_backward = blob_need_backward_[blob_id]; bottom_need_backward_[layer_id].push_back(need_backward); return blob_id; } diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp index eb368cd..8fb02fc 100644 --- a/src/caffe/test/test_net.cpp +++ b/src/caffe/test/test_net.cpp @@ -80,6 +80,62 @@ class NetTest : public ::testing::Test { InitNetFromProtoString(proto); } + virtual void InitTinyNetEuclidean(const bool force_backward = false) { + string proto = + "name: 'TinyTestEuclidLossNetwork' " + "layers: { " + " name: 'data' " + " type: DUMMY_DATA " + " dummy_data_param { " + " num: 5 " + " channels: 2 " + " height: 3 " + " width: 4 " + " num: 5 " + " channels: 1 " + " height: 1 " + " width: 1 " + " data_filler { " + " type: 'gaussian' " + " std: 0.01 " + " } " + " } " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'innerproduct' " + " type: INNER_PRODUCT " + " inner_product_param { " + " num_output: 1 " + " weight_filler { " + " type: 'gaussian' " + " std: 0.01 " + " } " + " bias_filler { " + " type: 'constant' " + " value: 0 " + " } " + " } " + " blobs_lr: 1. " + " blobs_lr: 2. " + " weight_decay: 1. " + " weight_decay: 0. " + " bottom: 'data' " + " top: 'innerproduct' " + "} " + "layers: { " + " name: 'loss' " + " type: EUCLIDEAN_LOSS " + " bottom: 'innerproduct' " + " bottom: 'label' " + "} "; + if (force_backward) { + proto += "force_backward: true "; + } + InitNetFromProtoString(proto); + } + virtual void InitTrickyNet() { const string& proto = "name: 'TrickyTestNetwork' " @@ -218,6 +274,20 @@ TYPED_TEST(NetTest, TestBottomNeedBackwardForce) { EXPECT_EQ(true, bottom_need_backward[1][0]); EXPECT_EQ(2, bottom_need_backward[2].size()); EXPECT_EQ(true, bottom_need_backward[2][0]); + EXPECT_EQ(false, bottom_need_backward[2][1]); +} + +TYPED_TEST(NetTest, TestBottomNeedBackwardEuclideanForce) { + const bool force_backward = true; + this->InitTinyNetEuclidean(force_backward); + const vector >& bottom_need_backward = + this->net_->bottom_need_backward(); + EXPECT_EQ(3, bottom_need_backward.size()); + EXPECT_EQ(0, bottom_need_backward[0].size()); + EXPECT_EQ(1, bottom_need_backward[1].size()); + EXPECT_EQ(true, bottom_need_backward[1][0]); + EXPECT_EQ(2, bottom_need_backward[2].size()); + EXPECT_EQ(true, bottom_need_backward[2][0]); EXPECT_EQ(true, bottom_need_backward[2][1]); } @@ -233,6 +303,9 @@ TYPED_TEST(NetTest, TestBottomNeedBackwardTricky) { EXPECT_EQ(false, bottom_need_backward[2][0]); EXPECT_EQ(2, bottom_need_backward[3].size()); EXPECT_EQ(true, bottom_need_backward[3][0]); + // The label input to the SoftmaxLossLayer should say it "needs backward" + // since it has weights under it, even though we expect this to cause a crash + // at training/test time. EXPECT_EQ(true, bottom_need_backward[3][1]); } -- 2.7.4