Make multiple losses work by inserting split layers and add some tests for it.
authorJeff Donahue <jeff.donahue@gmail.com>
Fri, 11 Jul 2014 10:17:53 +0000 (03:17 -0700)
committerJeff Donahue <jeff.donahue@gmail.com>
Wed, 13 Aug 2014 20:22:08 +0000 (13:22 -0700)
Test that we can call backward with an ACCURACY layer.  This currently fails,
but should be possible now that we explicitly associate a loss weight with
each top blob.

include/caffe/util/insert_splits.hpp
src/caffe/test/test_net.cpp
src/caffe/test/test_split_layer.cpp
src/caffe/util/insert_splits.cpp

index 4ca933b..446abb8 100644 (file)
@@ -12,7 +12,7 @@ namespace caffe {
 void InsertSplits(const NetParameter& param, NetParameter* param_split);
 
 void ConfigureSplitLayer(const string& layer_name, const string& blob_name,
-    const int blob_idx, const int split_count,
+    const int blob_idx, const int split_count, const float loss_weight,
     LayerParameter* split_layer_param);
 
 string SplitLayerName(const string& layer_name, const string& blob_name,
index 497f11d..fa5735f 100644 (file)
@@ -28,7 +28,34 @@ class NetTest : public MultiDeviceTest<TypeParam> {
     net_.reset(new Net<Dtype>(param));
   }
 
-  virtual void InitTinyNet(const bool force_backward = false) {
+  virtual void CopyNetBlobs(const bool copy_diff,
+      vector<shared_ptr<Blob<Dtype> > >* blobs_copy) {
+    CHECK(net_);
+    const vector<shared_ptr<Blob<Dtype> > >& net_blobs = net_->blobs();
+    blobs_copy->clear();
+    blobs_copy->resize(net_blobs.size());
+    const bool kReshape = true;
+    for (int i = 0; i < net_blobs.size(); ++i) {
+      (*blobs_copy)[i].reset(new Blob<Dtype>());
+      (*blobs_copy)[i]->CopyFrom(*net_blobs[i], copy_diff, kReshape);
+    }
+  }
+
+  virtual void CopyNetParams(const bool copy_diff,
+      vector<shared_ptr<Blob<Dtype> > >* params_copy) {
+    CHECK(net_);
+    const vector<shared_ptr<Blob<Dtype> > >& net_params = net_->params();
+    params_copy->clear();
+    params_copy->resize(net_params.size());
+    const bool kReshape = true;
+    for (int i = 0; i < net_params.size(); ++i) {
+      (*params_copy)[i].reset(new Blob<Dtype>());
+      (*params_copy)[i]->CopyFrom(*net_params[i], copy_diff, kReshape);
+    }
+  }
+
+  virtual void InitTinyNet(const bool force_backward = false,
+                           const bool accuracy_layer = false) {
     string proto =
         "name: 'TinyTestNetwork' "
         "layers: { "
@@ -79,6 +106,16 @@ class NetTest : public MultiDeviceTest<TypeParam> {
         "  bottom: 'label' "
         "  top: 'top_loss' "
         "} ";
+    if (accuracy_layer) {
+      proto +=
+          "layers: { "
+          "  name: 'loss' "
+          "  type: ACCURACY "
+          "  bottom: 'innerproduct' "
+          "  bottom: 'label' "
+          "  top: 'accuracy' "
+          "} ";
+    }
     if (force_backward) {
       proto += "force_backward: true ";
     }
@@ -606,20 +643,10 @@ TYPED_TEST(NetTest, TestLossWeight) {
   this->InitUnsharedWeightsNet(NULL, NULL, kForceBackward);
   const Dtype loss = this->net_->ForwardBackward(bottom);
   const bool kCopyDiff = true;
-  const bool kReshape = true;
-  const vector<shared_ptr<Blob<Dtype> > >& net_blobs = this->net_->blobs();
-  vector<shared_ptr<Blob<Dtype> > > blob_grads(net_blobs.size());
-  for (int i = 0; i < net_blobs.size(); ++i) {
-    blob_grads[i].reset(new Blob<Dtype>());
-    blob_grads[i]->CopyFrom(*net_blobs[i], kCopyDiff, kReshape);
-  }
-  const vector<shared_ptr<Blob<Dtype> > >& net_params =
-      this->net_->params();
-  vector<shared_ptr<Blob<Dtype> > > param_grads(net_params.size());
-  for (int i = 0; i < net_params.size(); ++i) {
-    param_grads[i].reset(new Blob<Dtype>());
-    param_grads[i]->CopyFrom(*net_params[i], kCopyDiff, kReshape);
-  }
+  vector<shared_ptr<Blob<Dtype> > > blob_grads;
+  this->CopyNetBlobs(kCopyDiff, &blob_grads);
+  vector<shared_ptr<Blob<Dtype> > > param_grads;
+  this->CopyNetParams(kCopyDiff, &param_grads);
   // Check that the loss is non-trivial, otherwise the test doesn't prove much.
   const Dtype kMinLossAbsValue = 1e-2;
   ASSERT_GE(fabs(loss), kMinLossAbsValue);
@@ -656,6 +683,185 @@ TYPED_TEST(NetTest, TestLossWeight) {
   }
 }
 
+TYPED_TEST(NetTest, TestLossWeightMidNet) {
+  typedef typename TypeParam::Dtype Dtype;
+  vector<Blob<Dtype>*> bottom;
+  Caffe::set_random_seed(this->seed_);
+  const bool kForceBackward = true;
+  Dtype loss_weight = 0;
+  Dtype midnet_loss_weight = 1;
+  this->InitUnsharedWeightsNet(&loss_weight, &midnet_loss_weight,
+                               kForceBackward);
+  const Dtype loss = this->net_->ForwardBackward(bottom);
+  const bool kCopyDiff = true;
+  const bool kReshape = true;
+  Blob<Dtype> data_grad;
+  data_grad.CopyFrom(*this->net_->blob_by_name("data"), kCopyDiff, kReshape);
+  // Check that the loss is non-trivial, otherwise the test doesn't prove much.
+  const Dtype kMinLossAbsValue = 1e-2;
+  ASSERT_GE(fabs(loss), kMinLossAbsValue);
+  const Dtype kErrorMargin = 1e-4;
+  const int kNumLossWeights = 6;
+  Dtype kLossWeights[kNumLossWeights] = {2, 0, 1, -1, -2.5, 3.7};
+  for (int i = 0; i < kNumLossWeights; ++i) {
+    Caffe::set_random_seed(this->seed_);
+    this->InitUnsharedWeightsNet(&loss_weight, &kLossWeights[i],
+                                 kForceBackward);
+    const Dtype weighted_loss = this->net_->ForwardBackward(bottom);
+    const Dtype error_margin = kErrorMargin * fabs(kLossWeights[i]);
+    EXPECT_NEAR(loss * kLossWeights[i], weighted_loss, error_margin)
+        << "loss weight = " << kLossWeights[i];
+    const shared_ptr<Blob<Dtype> >& weighted_blob =
+        this->net_->blob_by_name("data");
+    ASSERT_EQ(data_grad.count(), weighted_blob->count());
+    for (int j = 0; j < data_grad.count(); ++j) {
+      EXPECT_NEAR(data_grad.cpu_diff()[j] * kLossWeights[i],
+                  weighted_blob->cpu_diff()[j], error_margin);
+    }
+  }
+}
+
+TYPED_TEST(NetTest, TestComboLossWeight) {
+  typedef typename TypeParam::Dtype Dtype;
+  vector<Blob<Dtype>*> bottom;
+  Dtype loss_weight;
+  Dtype midnet_loss_weight;
+  const bool kForceBackward = true;
+  const Dtype kErrorMargin = 1e-4;
+
+  // Get the loss and gradients with EUCLIDEAN_LOSS weight 1,
+  // INNER_PRODUCT weight 1.
+  loss_weight = 1;
+  midnet_loss_weight = 1;
+  Caffe::set_random_seed(this->seed_);
+  this->InitUnsharedWeightsNet(&loss_weight, &midnet_loss_weight,
+                               kForceBackward);
+  const Dtype loss = this->net_->ForwardBackward(bottom);
+  const bool kCopyDiff = true;
+  vector<shared_ptr<Blob<Dtype> > > blob_grads;
+  this->CopyNetBlobs(kCopyDiff, &blob_grads);
+  vector<shared_ptr<Blob<Dtype> > > param_grads;
+  this->CopyNetParams(kCopyDiff, &param_grads);
+
+  loss_weight = 2;
+  midnet_loss_weight = 1;
+  Caffe::set_random_seed(this->seed_);
+  this->InitUnsharedWeightsNet(&loss_weight, &midnet_loss_weight,
+                               kForceBackward);
+  const Dtype loss_main_2 = this->net_->ForwardBackward(bottom);
+  vector<shared_ptr<Blob<Dtype> > > blob_grads_loss_2;
+  this->CopyNetBlobs(kCopyDiff, &blob_grads_loss_2);
+  vector<shared_ptr<Blob<Dtype> > > param_grads_loss_2;
+  this->CopyNetParams(kCopyDiff, &param_grads_loss_2);
+
+  loss_weight = 3;
+  midnet_loss_weight = 1;
+  Caffe::set_random_seed(this->seed_);
+  this->InitUnsharedWeightsNet(&loss_weight, &midnet_loss_weight,
+                               kForceBackward);
+  const Dtype loss_main_3 = this->net_->ForwardBackward(bottom);
+  const vector<shared_ptr<Blob<Dtype> > >& blob_grads_loss_3 =
+      this->net_->blobs();
+  ASSERT_EQ(blob_grads.size(), blob_grads_loss_3.size());
+  ASSERT_EQ(blob_grads_loss_2.size(), blob_grads_loss_3.size());
+  for (int j = 0; j < blob_grads.size(); ++j) {
+    const string& blob_name = this->net_->blob_names()[j];
+    bool grad_should_change = true;
+    if (blob_name == "innerproduct1_innerproduct1_0_split_0") {
+      grad_should_change = false;
+    }
+    ASSERT_EQ(blob_grads[j]->count(), blob_grads_loss_3[j]->count());
+    ASSERT_EQ(blob_grads_loss_2[j]->count(), blob_grads_loss_3[j]->count());
+    for (int k = 0; k < blob_grads[j]->count(); ++k) {
+      const Dtype grad_diff_2 = blob_grads_loss_2[j]->cpu_diff()[k] -
+                                    blob_grads[j]->cpu_diff()[k];
+      const Dtype grad_diff_3 = blob_grads_loss_3[j]->cpu_diff()[k] -
+                                    blob_grads[j]->cpu_diff()[k];
+      if (grad_should_change) {
+        // Test non-triviality.
+        const Dtype kMinGradDiffAbsValue = 1e-4;
+        EXPECT_GT(fabs(grad_diff_2), kMinGradDiffAbsValue) << blob_name;
+        EXPECT_NEAR(2 * grad_diff_2, grad_diff_3, kErrorMargin) << blob_name;
+      } else {
+        EXPECT_EQ(0, grad_diff_2) << blob_name;
+        EXPECT_EQ(0, grad_diff_3) << blob_name;
+      }
+    }
+  }
+
+  loss_weight = 1;
+  midnet_loss_weight = 2;
+  Caffe::set_random_seed(this->seed_);
+  this->InitUnsharedWeightsNet(&loss_weight, &midnet_loss_weight,
+                               kForceBackward);
+  const Dtype loss_midnet_2 = this->net_->ForwardBackward(bottom);
+  this->CopyNetBlobs(kCopyDiff, &blob_grads_loss_2);
+  this->CopyNetParams(kCopyDiff, &param_grads_loss_2);
+
+  loss_weight = 1;
+  midnet_loss_weight = 3;
+  Caffe::set_random_seed(this->seed_);
+  this->InitUnsharedWeightsNet(&loss_weight, &midnet_loss_weight,
+                               kForceBackward);
+  const Dtype loss_midnet_3 = this->net_->ForwardBackward(bottom);
+  const vector<shared_ptr<Blob<Dtype> > >& blob_grads_midnet_loss_3 =
+      this->net_->blobs();
+  ASSERT_EQ(blob_grads.size(), blob_grads_midnet_loss_3.size());
+  ASSERT_EQ(blob_grads_loss_2.size(), blob_grads_midnet_loss_3.size());
+  const vector<string>& blob_names = this->net_->blob_names();
+  for (int j = 0; j < blob_grads.size(); ++j) {
+    const string& blob_name = blob_names[j];
+    bool grad_should_change = false;
+    if (blob_name == "innerproduct1" ||
+        blob_name == "innerproduct1_innerproduct1_0_split_0" ||
+        blob_name == "data_data_0_split_0" || blob_name == "data") {
+      grad_should_change = true;
+    }
+    ASSERT_EQ(blob_grads[j]->count(), blob_grads_midnet_loss_3[j]->count());
+    ASSERT_EQ(blob_grads[j]->count(), blob_grads_loss_2[j]->count());
+    for (int k = 0; k < blob_grads[j]->count(); ++k) {
+      const Dtype grad_diff_2 = blob_grads_loss_2[j]->cpu_diff()[k] -
+                                    blob_grads[j]->cpu_diff()[k];
+      const Dtype grad_diff_3 = blob_grads_midnet_loss_3[j]->cpu_diff()[k] -
+                                    blob_grads[j]->cpu_diff()[k];
+      if (grad_should_change) {
+        // Test non-triviality.
+        const Dtype kMinGradDiffAbsValue = 1e-4;
+        EXPECT_GT(fabs(grad_diff_2), kMinGradDiffAbsValue) << blob_name;
+        EXPECT_NEAR(2 * grad_diff_2, grad_diff_3, kErrorMargin) << blob_name;
+      } else {
+        EXPECT_EQ(0, grad_diff_2) << blob_name;
+        EXPECT_EQ(0, grad_diff_3) << blob_name;
+      }
+    }
+  }
+
+  const Dtype kMinLossDiffAbsValue = 1e-4;
+
+  Dtype loss_diff_2 = loss_main_2 - loss;
+  // Test non-triviality.
+  EXPECT_GT(fabs(loss_diff_2), kMinLossDiffAbsValue);
+  Dtype loss_diff_3 = loss_main_3 - loss;
+  EXPECT_NEAR(2 * loss_diff_2, loss_diff_3, kErrorMargin);
+
+  loss_diff_2 = loss_midnet_2 - loss;
+  // Test non-triviality.
+  EXPECT_GT(fabs(loss_diff_2), kMinLossDiffAbsValue);
+  loss_diff_3 = loss_midnet_3 - loss;
+  EXPECT_NEAR(2 * loss_diff_2, loss_diff_3, kErrorMargin);
+}
+
+TYPED_TEST(NetTest, TestBackwardWithAccuracyLayer) {
+  typedef typename TypeParam::Dtype Dtype;
+  const bool kForceBackward = false;
+  const bool kAccuracyLayer = true;
+  this->InitTinyNet(kForceBackward, kAccuracyLayer);
+  EXPECT_TRUE(this->net_->has_blob("accuracy"));
+  vector<Blob<Dtype>*> bottom;
+  // Test that we can do Backward even though we have an ACCURACY layer.
+  this->net_->ForwardBackward(bottom);
+}
+
 TYPED_TEST(NetTest, TestUnsharedWeightsDataNet) {
   typedef typename TypeParam::Dtype Dtype;
   this->InitUnsharedWeightsNet();
index bf634f5..448e13c 100644 (file)
@@ -124,12 +124,12 @@ class SplitLayerInsertionTest : public ::testing::Test {
         output_param_string, &expected_output_param));
     NetParameter actual_output_param;
     InsertSplits(input_param, &actual_output_param);
-    EXPECT_EQ(expected_output_param.DebugString(),
+    CHECK_EQ(expected_output_param.DebugString(),
         actual_output_param.DebugString());
     // Also test idempotence.
     NetParameter double_split_insert_param;
     InsertSplits(actual_output_param, &double_split_insert_param);
-    EXPECT_EQ(actual_output_param.DebugString(),
+    CHECK_EQ(actual_output_param.DebugString(),
        double_split_insert_param.DebugString());
   }
 };
@@ -546,6 +546,135 @@ TEST_F(SplitLayerInsertionTest, TestNoInsertionWithInPlace) {
   this->RunInsertionTest(input_proto, input_proto);
 }
 
+TEST_F(SplitLayerInsertionTest, TestLossInsertion) {
+  const string& input_proto =
+      "name: 'UnsharedWeightsNetwork' "
+      "force_backward: true "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DUMMY_DATA "
+      "  dummy_data_param { "
+      "    num: 5 "
+      "    channels: 2 "
+      "    height: 3 "
+      "    width: 4 "
+      "    data_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "  } "
+      "  top: 'data' "
+      "} "
+      "layers: { "
+      "  name: 'innerproduct1' "
+      "  type: INNER_PRODUCT "
+      "  inner_product_param { "
+      "    num_output: 10 "
+      "    bias_term: false "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 10 "
+      "    } "
+      "  } "
+      "  param: 'unsharedweights1' "
+      "  bottom: 'data' "
+      "  top: 'innerproduct1' "
+      "  loss_weight: 2.5 "
+      "} "
+      "layers: { "
+      "  name: 'innerproduct2' "
+      "  type: INNER_PRODUCT "
+      "  inner_product_param { "
+      "    num_output: 10 "
+      "    bias_term: false "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 10 "
+      "    } "
+      "  } "
+      "  param: 'unsharedweights2' "
+      "  bottom: 'data' "
+      "  top: 'innerproduct2' "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: EUCLIDEAN_LOSS "
+      "  bottom: 'innerproduct1' "
+      "  bottom: 'innerproduct2' "
+      "} ";
+  const string& expected_output_proto =
+      "name: 'UnsharedWeightsNetwork' "
+      "force_backward: true "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DUMMY_DATA "
+      "  dummy_data_param { "
+      "    num: 5 "
+      "    channels: 2 "
+      "    height: 3 "
+      "    width: 4 "
+      "    data_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "  } "
+      "  top: 'data' "
+      "} "
+      "layers: { "
+      "  name: 'data_data_0_split' "
+      "  type: SPLIT "
+      "  bottom: 'data' "
+      "  top: 'data' "
+      "  top: 'data_data_0_split_1' "
+      "} "
+      "layers: { "
+      "  name: 'innerproduct1' "
+      "  type: INNER_PRODUCT "
+      "  inner_product_param { "
+      "    num_output: 10 "
+      "    bias_term: false "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 10 "
+      "    } "
+      "  } "
+      "  param: 'unsharedweights1' "
+      "  bottom: 'data' "
+      "  top: 'innerproduct1' "
+      "} "
+      "layers: { "
+      "  name: 'innerproduct1_innerproduct1_0_split' "
+      "  type: SPLIT "
+      "  bottom: 'innerproduct1' "
+      "  top: 'innerproduct1' "
+      "  top: 'innerproduct1_innerproduct1_0_split_1' "
+      "  loss_weight: 2.5 "
+      "  loss_weight: 0 "
+      "} "
+      "layers: { "
+      "  name: 'innerproduct2' "
+      "  type: INNER_PRODUCT "
+      "  inner_product_param { "
+      "    num_output: 10 "
+      "    bias_term: false "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 10 "
+      "    } "
+      "  } "
+      "  param: 'unsharedweights2' "
+      "  bottom: 'data_data_0_split_1' "
+      "  top: 'innerproduct2' "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: EUCLIDEAN_LOSS "
+      "  bottom: 'innerproduct1_innerproduct1_0_split_1' "
+      "  bottom: 'innerproduct2' "
+      "} ";
+  this->RunInsertionTest(input_proto, expected_output_proto);
+}
+
 TEST_F(SplitLayerInsertionTest, TestInsertion) {
   const string& input_proto =
       "name: 'TestNetwork' "
index 270568c..e917e35 100644 (file)
@@ -1,3 +1,4 @@
+#include <algorithm>
 #include <map>
 #include <sstream>
 #include <string>
@@ -15,6 +16,7 @@ void InsertSplits(const NetParameter& param, NetParameter* param_split) {
   map<string, pair<int, int> > blob_name_to_last_top_idx;
   map<pair<int, int>, pair<int, int> > bottom_idx_to_source_top_idx;
   map<pair<int, int>, int> top_idx_to_bottom_count;
+  map<pair<int, int>, float> top_idx_to_loss_weight;
   map<pair<int, int>, int> top_idx_to_bottom_split_idx;
   map<int, string> layer_idx_to_layer_name;
   layer_idx_to_layer_name[-1] = "input";
@@ -41,6 +43,18 @@ void InsertSplits(const NetParameter& param, NetParameter* param_split) {
       const string& blob_name = layer_param.top(j);
       blob_name_to_last_top_idx[blob_name] = make_pair(i, j);
     }
+    // A use of a top blob as a loss should be handled similarly to the use of
+    // a top blob as an input (bottom) blob to another layer.
+    const int last_loss =
+        std::min(layer_param.loss_weight_size(), layer_param.top_size());
+    for (int j = 0; j < last_loss; ++j) {
+      const string& blob_name = layer_param.top(j);
+      const pair<int, int>& top_idx = blob_name_to_last_top_idx[blob_name];
+      top_idx_to_loss_weight[top_idx] = layer_param.loss_weight(j);
+      if (top_idx_to_loss_weight[top_idx]) {
+        ++top_idx_to_bottom_count[top_idx];
+      }
+    }
   }
   // Create split layer for any input blobs used by other layers as bottom
   // blobs more than once.
@@ -50,8 +64,9 @@ void InsertSplits(const NetParameter& param, NetParameter* param_split) {
       const string& layer_name = layer_idx_to_layer_name[-1];
       const string& blob_name = param.input(i);
       LayerParameter* split_layer_param = param_split->add_layers();
+      const float kZeroLossWeight = 0;
       ConfigureSplitLayer(layer_name, blob_name, i, split_count,
-          split_layer_param);
+          kZeroLossWeight, split_layer_param);
     }
   }
   for (int i = 0; i < param.layers_size(); ++i) {
@@ -72,20 +87,26 @@ void InsertSplits(const NetParameter& param, NetParameter* param_split) {
     // Create split layer for any top blobs used by other layers as bottom
     // blobs more than once.
     for (int j = 0; j < layer_param->top_size(); ++j) {
-      const int split_count = top_idx_to_bottom_count[make_pair(i, j)];
+      const pair<int, int>& top_idx = make_pair(i, j);
+      const int split_count = top_idx_to_bottom_count[top_idx];
       if (split_count > 1) {
         const string& layer_name = layer_idx_to_layer_name[i];
         const string& blob_name = layer_param->top(j);
         LayerParameter* split_layer_param = param_split->add_layers();
+        const float loss_weight = top_idx_to_loss_weight[top_idx];
         ConfigureSplitLayer(layer_name, blob_name, j, split_count,
-            split_layer_param);
+            loss_weight, split_layer_param);
+        if (loss_weight) {
+          layer_param->clear_loss_weight();
+          top_idx_to_bottom_split_idx[top_idx]++;
+        }
       }
     }
   }
 }
 
 void ConfigureSplitLayer(const string& layer_name, const string& blob_name,
-    const int blob_idx, const int split_count,
+    const int blob_idx, const int split_count, const float loss_weight,
     LayerParameter* split_layer_param) {
   split_layer_param->Clear();
   split_layer_param->add_bottom(blob_name);
@@ -94,6 +115,13 @@ void ConfigureSplitLayer(const string& layer_name, const string& blob_name,
   for (int k = 0; k < split_count; ++k) {
     split_layer_param->add_top(
         SplitBlobName(layer_name, blob_name, blob_idx, k));
+    if (loss_weight) {
+      if (k == 0) {
+        split_layer_param->add_loss_weight(loss_weight);
+      } else {
+        split_layer_param->add_loss_weight(0);
+      }
+    }
   }
 }