From 6176f5bad668deda6bae32a772aa8775603e9a27 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Fri, 11 Jul 2014 03:17:53 -0700
Subject: [PATCH] Make multiple losses work by inserting split layers and add
 some tests for it. Test that we can call backward with an ACCURACY layer. 
 This currently fails, but should be possible now that we explicitly associate
 a loss weight with each top blob.

---
 include/caffe/util/insert_splits.hpp |   2 +-
 src/caffe/test/test_net.cpp          | 236 ++++++++++++++++++++++++++++++++---
 src/caffe/test/test_split_layer.cpp  | 133 +++++++++++++++++++-
 src/caffe/util/insert_splits.cpp     |  36 +++++-
 4 files changed, 385 insertions(+), 22 deletions(-)
diff --git a/include/caffe/util/insert_splits.hpp b/include/caffe/util/insert_splits.hpp
index 4ca933b..446abb8 100644
--- a/include/caffe/util/insert_splits.hpp
+++ b/include/caffe/util/insert_splits.hpp
@@ -12,7 +12,7 @@ namespace caffe {
 void InsertSplits(const NetParameter& param, NetParameter* param_split);
 
 void ConfigureSplitLayer(const string& layer_name, const string& blob_name,
-    const int blob_idx, const int split_count,
+    const int blob_idx, const int split_count, const float loss_weight,
     LayerParameter* split_layer_param);
 
 string SplitLayerName(const string& layer_name, const string& blob_name,
diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp
index 497f11d..fa5735f 100644
--- a/src/caffe/test/test_net.cpp
+++ b/src/caffe/test/test_net.cpp
@@ -28,7 +28,34 @@ class NetTest : public MultiDeviceTest<TypeParam> {
     net_.reset(new Net<Dtype>(param));
   }
 
-  virtual void InitTinyNet(const bool force_backward = false) {
+  virtual void CopyNetBlobs(const bool copy_diff,
+      vector<shared_ptr<Blob<Dtype> > >* blobs_copy) {
+    CHECK(net_);
+    const vector<shared_ptr<Blob<Dtype> > >& net_blobs = net_->blobs();
+    blobs_copy->clear();
+    blobs_copy->resize(net_blobs.size());
+    const bool kReshape = true;
+    for (int i = 0; i < net_blobs.size(); ++i) {
+      (*blobs_copy)[i].reset(new Blob<Dtype>());
+      (*blobs_copy)[i]->CopyFrom(*net_blobs[i], copy_diff, kReshape);
+    }
+  }
+
+  virtual void CopyNetParams(const bool copy_diff,
+      vector<shared_ptr<Blob<Dtype> > >* params_copy) {
+    CHECK(net_);
+    const vector<shared_ptr<Blob<Dtype> > >& net_params = net_->params();
+    params_copy->clear();
+    params_copy->resize(net_params.size());
+    const bool kReshape = true;
+    for (int i = 0; i < net_params.size(); ++i) {
+      (*params_copy)[i].reset(new Blob<Dtype>());
+      (*params_copy)[i]->CopyFrom(*net_params[i], copy_diff, kReshape);
+    }
+  }
+
+  virtual void InitTinyNet(const bool force_backward = false,
+                           const bool accuracy_layer = false) {
     string proto =
         "name: 'TinyTestNetwork' "
         "layers: { "
@@ -79,6 +106,16 @@ class NetTest : public MultiDeviceTest<TypeParam> {
         "  bottom: 'label' "
         "  top: 'top_loss' "
         "} ";
+    if (accuracy_layer) {
+      proto +=
+          "layers: { "
+          "  name: 'loss' "
+          "  type: ACCURACY "
+          "  bottom: 'innerproduct' "
+          "  bottom: 'label' "
+          "  top: 'accuracy' "
+          "} ";
+    }
     if (force_backward) {
       proto += "force_backward: true ";
     }
@@ -606,20 +643,10 @@ TYPED_TEST(NetTest, TestLossWeight) {
   this->InitUnsharedWeightsNet(NULL, NULL, kForceBackward);
   const Dtype loss = this->net_->ForwardBackward(bottom);
   const bool kCopyDiff = true;
-  const bool kReshape = true;
-  const vector<shared_ptr<Blob<Dtype> > >& net_blobs = this->net_->blobs();
-  vector<shared_ptr<Blob<Dtype> > > blob_grads(net_blobs.size());
-  for (int i = 0; i < net_blobs.size(); ++i) {
-    blob_grads[i].reset(new Blob<Dtype>());
-    blob_grads[i]->CopyFrom(*net_blobs[i], kCopyDiff, kReshape);
-  }
-  const vector<shared_ptr<Blob<Dtype> > >& net_params =
-      this->net_->params();
-  vector<shared_ptr<Blob<Dtype> > > param_grads(net_params.size());
-  for (int i = 0; i < net_params.size(); ++i) {
-    param_grads[i].reset(new Blob<Dtype>());
-    param_grads[i]->CopyFrom(*net_params[i], kCopyDiff, kReshape);
-  }
+  vector<shared_ptr<Blob<Dtype> > > blob_grads;
+  this->CopyNetBlobs(kCopyDiff, &blob_grads);
+  vector<shared_ptr<Blob<Dtype> > > param_grads;
+  this->CopyNetParams(kCopyDiff, &param_grads);
   // Check that the loss is non-trivial, otherwise the test doesn't prove much.
   const Dtype kMinLossAbsValue = 1e-2;
   ASSERT_GE(fabs(loss), kMinLossAbsValue);
@@ -656,6 +683,185 @@ TYPED_TEST(NetTest, TestLossWeight) {
   }
 }
 
+TYPED_TEST(NetTest, TestLossWeightMidNet) {
+  typedef typename TypeParam::Dtype Dtype;
+  vector<Blob<Dtype>*> bottom;
+  Caffe::set_random_seed(this->seed_);
+  const bool kForceBackward = true;
+  Dtype loss_weight = 0;
+  Dtype midnet_loss_weight = 1;
+  this->InitUnsharedWeightsNet(&loss_weight, &midnet_loss_weight,
+                               kForceBackward);
+  const Dtype loss = this->net_->ForwardBackward(bottom);
+  const bool kCopyDiff = true;
+  const bool kReshape = true;
+  Blob<Dtype> data_grad;
+  data_grad.CopyFrom(*this->net_->blob_by_name("data"), kCopyDiff, kReshape);
+  // Check that the loss is non-trivial, otherwise the test doesn't prove much.
+  const Dtype kMinLossAbsValue = 1e-2;
+  ASSERT_GE(fabs(loss), kMinLossAbsValue);
+  const Dtype kErrorMargin = 1e-4;
+  const int kNumLossWeights = 6;
+  Dtype kLossWeights[kNumLossWeights] = {2, 0, 1, -1, -2.5, 3.7};
+  for (int i = 0; i < kNumLossWeights; ++i) {
+    Caffe::set_random_seed(this->seed_);
+    this->InitUnsharedWeightsNet(&loss_weight, &kLossWeights[i],
+                                 kForceBackward);
+    const Dtype weighted_loss = this->net_->ForwardBackward(bottom);
+    const Dtype error_margin = kErrorMargin * fabs(kLossWeights[i]);
+    EXPECT_NEAR(loss * kLossWeights[i], weighted_loss, error_margin)
+        << "loss weight = " << kLossWeights[i];
+    const shared_ptr<Blob<Dtype> >& weighted_blob =
+        this->net_->blob_by_name("data");
+    ASSERT_EQ(data_grad.count(), weighted_blob->count());
+    for (int j = 0; j < data_grad.count(); ++j) {
+      EXPECT_NEAR(data_grad.cpu_diff()[j] * kLossWeights[i],
+                  weighted_blob->cpu_diff()[j], error_margin);
+    }
+  }
+}
+
+TYPED_TEST(NetTest, TestComboLossWeight) {
+  typedef typename TypeParam::Dtype Dtype;
+  vector<Blob<Dtype>*> bottom;
+  Dtype loss_weight;
+  Dtype midnet_loss_weight;
+  const bool kForceBackward = true;
+  const Dtype kErrorMargin = 1e-4;
+
+  // Get the loss and gradients with EUCLIDEAN_LOSS weight 1,
+  // INNER_PRODUCT weight 1.
+  loss_weight = 1;
+  midnet_loss_weight = 1;
+  Caffe::set_random_seed(this->seed_);
+  this->InitUnsharedWeightsNet(&loss_weight, &midnet_loss_weight,
+                               kForceBackward);
+  const Dtype loss = this->net_->ForwardBackward(bottom);
+  const bool kCopyDiff = true;
+  vector<shared_ptr<Blob<Dtype> > > blob_grads;
+  this->CopyNetBlobs(kCopyDiff, &blob_grads);
+  vector<shared_ptr<Blob<Dtype> > > param_grads;
+  this->CopyNetParams(kCopyDiff, &param_grads);
+
+  loss_weight = 2;
+  midnet_loss_weight = 1;
+  Caffe::set_random_seed(this->seed_);
+  this->InitUnsharedWeightsNet(&loss_weight, &midnet_loss_weight,
+                               kForceBackward);
+  const Dtype loss_main_2 = this->net_->ForwardBackward(bottom);
+  vector<shared_ptr<Blob<Dtype> > > blob_grads_loss_2;
+  this->CopyNetBlobs(kCopyDiff, &blob_grads_loss_2);
+  vector<shared_ptr<Blob<Dtype> > > param_grads_loss_2;
+  this->CopyNetParams(kCopyDiff, &param_grads_loss_2);
+
+  loss_weight = 3;
+  midnet_loss_weight = 1;
+  Caffe::set_random_seed(this->seed_);
+  this->InitUnsharedWeightsNet(&loss_weight, &midnet_loss_weight,
+                               kForceBackward);
+  const Dtype loss_main_3 = this->net_->ForwardBackward(bottom);
+  const vector<shared_ptr<Blob<Dtype> > >& blob_grads_loss_3 =
+      this->net_->blobs();
+  ASSERT_EQ(blob_grads.size(), blob_grads_loss_3.size());
+  ASSERT_EQ(blob_grads_loss_2.size(), blob_grads_loss_3.size());
+  for (int j = 0; j < blob_grads.size(); ++j) {
+    const string& blob_name = this->net_->blob_names()[j];
+    bool grad_should_change = true;
+    if (blob_name == "innerproduct1_innerproduct1_0_split_0") {
+      grad_should_change = false;
+    }
+    ASSERT_EQ(blob_grads[j]->count(), blob_grads_loss_3[j]->count());
+    ASSERT_EQ(blob_grads_loss_2[j]->count(), blob_grads_loss_3[j]->count());
+    for (int k = 0; k < blob_grads[j]->count(); ++k) {
+      const Dtype grad_diff_2 = blob_grads_loss_2[j]->cpu_diff()[k] -
+                                    blob_grads[j]->cpu_diff()[k];
+      const Dtype grad_diff_3 = blob_grads_loss_3[j]->cpu_diff()[k] -
+                                    blob_grads[j]->cpu_diff()[k];
+      if (grad_should_change) {
+        // Test non-triviality.
+        const Dtype kMinGradDiffAbsValue = 1e-4;
+        EXPECT_GT(fabs(grad_diff_2), kMinGradDiffAbsValue) << blob_name;
+        EXPECT_NEAR(2 * grad_diff_2, grad_diff_3, kErrorMargin) << blob_name;
+      } else {
+        EXPECT_EQ(0, grad_diff_2) << blob_name;
+        EXPECT_EQ(0, grad_diff_3) << blob_name;
+      }
+    }
+  }
+
+  loss_weight = 1;
+  midnet_loss_weight = 2;
+  Caffe::set_random_seed(this->seed_);
+  this->InitUnsharedWeightsNet(&loss_weight, &midnet_loss_weight,
+                               kForceBackward);
+  const Dtype loss_midnet_2 = this->net_->ForwardBackward(bottom);
+  this->CopyNetBlobs(kCopyDiff, &blob_grads_loss_2);
+  this->CopyNetParams(kCopyDiff, &param_grads_loss_2);
+
+  loss_weight = 1;
+  midnet_loss_weight = 3;
+  Caffe::set_random_seed(this->seed_);
+  this->InitUnsharedWeightsNet(&loss_weight, &midnet_loss_weight,
+                               kForceBackward);
+  const Dtype loss_midnet_3 = this->net_->ForwardBackward(bottom);
+  const vector<shared_ptr<Blob<Dtype> > >& blob_grads_midnet_loss_3 =
+      this->net_->blobs();
+  ASSERT_EQ(blob_grads.size(), blob_grads_midnet_loss_3.size());
+  ASSERT_EQ(blob_grads_loss_2.size(), blob_grads_midnet_loss_3.size());
+  const vector<string>& blob_names = this->net_->blob_names();
+  for (int j = 0; j < blob_grads.size(); ++j) {
+    const string& blob_name = blob_names[j];
+    bool grad_should_change = false;
+    if (blob_name == "innerproduct1" ||
+        blob_name == "innerproduct1_innerproduct1_0_split_0" ||
+        blob_name == "data_data_0_split_0" || blob_name == "data") {
+      grad_should_change = true;
+    }
+    ASSERT_EQ(blob_grads[j]->count(), blob_grads_midnet_loss_3[j]->count());
+    ASSERT_EQ(blob_grads[j]->count(), blob_grads_loss_2[j]->count());
+    for (int k = 0; k < blob_grads[j]->count(); ++k) {
+      const Dtype grad_diff_2 = blob_grads_loss_2[j]->cpu_diff()[k] -
+                                    blob_grads[j]->cpu_diff()[k];
+      const Dtype grad_diff_3 = blob_grads_midnet_loss_3[j]->cpu_diff()[k] -
+                                    blob_grads[j]->cpu_diff()[k];
+      if (grad_should_change) {
+        // Test non-triviality.
+        const Dtype kMinGradDiffAbsValue = 1e-4;
+        EXPECT_GT(fabs(grad_diff_2), kMinGradDiffAbsValue) << blob_name;
+        EXPECT_NEAR(2 * grad_diff_2, grad_diff_3, kErrorMargin) << blob_name;
+      } else {
+        EXPECT_EQ(0, grad_diff_2) << blob_name;
+        EXPECT_EQ(0, grad_diff_3) << blob_name;
+      }
+    }
+  }
+
+  const Dtype kMinLossDiffAbsValue = 1e-4;
+
+  Dtype loss_diff_2 = loss_main_2 - loss;
+  // Test non-triviality.
+  EXPECT_GT(fabs(loss_diff_2), kMinLossDiffAbsValue);
+  Dtype loss_diff_3 = loss_main_3 - loss;
+  EXPECT_NEAR(2 * loss_diff_2, loss_diff_3, kErrorMargin);
+
+  loss_diff_2 = loss_midnet_2 - loss;
+  // Test non-triviality.
+  EXPECT_GT(fabs(loss_diff_2), kMinLossDiffAbsValue);
+  loss_diff_3 = loss_midnet_3 - loss;
+  EXPECT_NEAR(2 * loss_diff_2, loss_diff_3, kErrorMargin);
+}
+
+TYPED_TEST(NetTest, TestBackwardWithAccuracyLayer) {
+  typedef typename TypeParam::Dtype Dtype;
+  const bool kForceBackward = false;
+  const bool kAccuracyLayer = true;
+  this->InitTinyNet(kForceBackward, kAccuracyLayer);
+  EXPECT_TRUE(this->net_->has_blob("accuracy"));
+  vector<Blob<Dtype>*> bottom;
+  // Test that we can do Backward even though we have an ACCURACY layer.
+  this->net_->ForwardBackward(bottom);
+}
+
 TYPED_TEST(NetTest, TestUnsharedWeightsDataNet) {
   typedef typename TypeParam::Dtype Dtype;
   this->InitUnsharedWeightsNet();
diff --git a/src/caffe/test/test_split_layer.cpp b/src/caffe/test/test_split_layer.cpp
index bf634f5..448e13c 100644
--- a/src/caffe/test/test_split_layer.cpp
+++ b/src/caffe/test/test_split_layer.cpp
@@ -124,12 +124,12 @@ class SplitLayerInsertionTest : public ::testing::Test {
         output_param_string, &expected_output_param));
     NetParameter actual_output_param;
     InsertSplits(input_param, &actual_output_param);
-    EXPECT_EQ(expected_output_param.DebugString(),
+    CHECK_EQ(expected_output_param.DebugString(),
         actual_output_param.DebugString());
     // Also test idempotence.
     NetParameter double_split_insert_param;
     InsertSplits(actual_output_param, &double_split_insert_param);
-    EXPECT_EQ(actual_output_param.DebugString(),
+    CHECK_EQ(actual_output_param.DebugString(),
        double_split_insert_param.DebugString());
   }
 };
@@ -546,6 +546,135 @@ TEST_F(SplitLayerInsertionTest, TestNoInsertionWithInPlace) {
   this->RunInsertionTest(input_proto, input_proto);
 }
 
+TEST_F(SplitLayerInsertionTest, TestLossInsertion) {
+  const string& input_proto =
+      "name: 'UnsharedWeightsNetwork' "
+      "force_backward: true "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DUMMY_DATA "
+      "  dummy_data_param { "
+      "    num: 5 "
+      "    channels: 2 "
+      "    height: 3 "
+      "    width: 4 "
+      "    data_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "  } "
+      "  top: 'data' "
+      "} "
+      "layers: { "
+      "  name: 'innerproduct1' "
+      "  type: INNER_PRODUCT "
+      "  inner_product_param { "
+      "    num_output: 10 "
+      "    bias_term: false "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 10 "
+      "    } "
+      "  } "
+      "  param: 'unsharedweights1' "
+      "  bottom: 'data' "
+      "  top: 'innerproduct1' "
+      "  loss_weight: 2.5 "
+      "} "
+      "layers: { "
+      "  name: 'innerproduct2' "
+      "  type: INNER_PRODUCT "
+      "  inner_product_param { "
+      "    num_output: 10 "
+      "    bias_term: false "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 10 "
+      "    } "
+      "  } "
+      "  param: 'unsharedweights2' "
+      "  bottom: 'data' "
+      "  top: 'innerproduct2' "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: EUCLIDEAN_LOSS "
+      "  bottom: 'innerproduct1' "
+      "  bottom: 'innerproduct2' "
+      "} ";
+  const string& expected_output_proto =
+      "name: 'UnsharedWeightsNetwork' "
+      "force_backward: true "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DUMMY_DATA "
+      "  dummy_data_param { "
+      "    num: 5 "
+      "    channels: 2 "
+      "    height: 3 "
+      "    width: 4 "
+      "    data_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "  } "
+      "  top: 'data' "
+      "} "
+      "layers: { "
+      "  name: 'data_data_0_split' "
+      "  type: SPLIT "
+      "  bottom: 'data' "
+      "  top: 'data' "
+      "  top: 'data_data_0_split_1' "
+      "} "
+      "layers: { "
+      "  name: 'innerproduct1' "
+      "  type: INNER_PRODUCT "
+      "  inner_product_param { "
+      "    num_output: 10 "
+      "    bias_term: false "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 10 "
+      "    } "
+      "  } "
+      "  param: 'unsharedweights1' "
+      "  bottom: 'data' "
+      "  top: 'innerproduct1' "
+      "} "
+      "layers: { "
+      "  name: 'innerproduct1_innerproduct1_0_split' "
+      "  type: SPLIT "
+      "  bottom: 'innerproduct1' "
+      "  top: 'innerproduct1' "
+      "  top: 'innerproduct1_innerproduct1_0_split_1' "
+      "  loss_weight: 2.5 "
+      "  loss_weight: 0 "
+      "} "
+      "layers: { "
+      "  name: 'innerproduct2' "
+      "  type: INNER_PRODUCT "
+      "  inner_product_param { "
+      "    num_output: 10 "
+      "    bias_term: false "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 10 "
+      "    } "
+      "  } "
+      "  param: 'unsharedweights2' "
+      "  bottom: 'data_data_0_split_1' "
+      "  top: 'innerproduct2' "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: EUCLIDEAN_LOSS "
+      "  bottom: 'innerproduct1_innerproduct1_0_split_1' "
+      "  bottom: 'innerproduct2' "
+      "} ";
+  this->RunInsertionTest(input_proto, expected_output_proto);
+}
+
 TEST_F(SplitLayerInsertionTest, TestInsertion) {
   const string& input_proto =
       "name: 'TestNetwork' "
diff --git a/src/caffe/util/insert_splits.cpp b/src/caffe/util/insert_splits.cpp
index 270568c..e917e35 100644
--- a/src/caffe/util/insert_splits.cpp
+++ b/src/caffe/util/insert_splits.cpp
@@ -1,3 +1,4 @@
+#include <algorithm>
 #include <map>
 #include <sstream>
 #include <string>
@@ -15,6 +16,7 @@ void InsertSplits(const NetParameter& param, NetParameter* param_split) {
   map<string, pair<int, int> > blob_name_to_last_top_idx;
   map<pair<int, int>, pair<int, int> > bottom_idx_to_source_top_idx;
   map<pair<int, int>, int> top_idx_to_bottom_count;
+  map<pair<int, int>, float> top_idx_to_loss_weight;
   map<pair<int, int>, int> top_idx_to_bottom_split_idx;
   map<int, string> layer_idx_to_layer_name;
   layer_idx_to_layer_name[-1] = "input";
@@ -41,6 +43,18 @@ void InsertSplits(const NetParameter& param, NetParameter* param_split) {
       const string& blob_name = layer_param.top(j);
       blob_name_to_last_top_idx[blob_name] = make_pair(i, j);
     }
+    // A use of a top blob as a loss should be handled similarly to the use of
+    // a top blob as an input (bottom) blob to another layer.
+    const int last_loss =
+        std::min(layer_param.loss_weight_size(), layer_param.top_size());
+    for (int j = 0; j < last_loss; ++j) {
+      const string& blob_name = layer_param.top(j);
+      const pair<int, int>& top_idx = blob_name_to_last_top_idx[blob_name];
+      top_idx_to_loss_weight[top_idx] = layer_param.loss_weight(j);
+      if (top_idx_to_loss_weight[top_idx]) {
+        ++top_idx_to_bottom_count[top_idx];
+      }
+    }
   }
   // Create split layer for any input blobs used by other layers as bottom
   // blobs more than once.
@@ -50,8 +64,9 @@ void InsertSplits(const NetParameter& param, NetParameter* param_split) {
       const string& layer_name = layer_idx_to_layer_name[-1];
       const string& blob_name = param.input(i);
       LayerParameter* split_layer_param = param_split->add_layers();
+      const float kZeroLossWeight = 0;
       ConfigureSplitLayer(layer_name, blob_name, i, split_count,
-          split_layer_param);
+          kZeroLossWeight, split_layer_param);
     }
   }
   for (int i = 0; i < param.layers_size(); ++i) {
@@ -72,20 +87,26 @@ void InsertSplits(const NetParameter& param, NetParameter* param_split) {
     // Create split layer for any top blobs used by other layers as bottom
     // blobs more than once.
     for (int j = 0; j < layer_param->top_size(); ++j) {
-      const int split_count = top_idx_to_bottom_count[make_pair(i, j)];
+      const pair<int, int>& top_idx = make_pair(i, j);
+      const int split_count = top_idx_to_bottom_count[top_idx];
       if (split_count > 1) {
         const string& layer_name = layer_idx_to_layer_name[i];
         const string& blob_name = layer_param->top(j);
         LayerParameter* split_layer_param = param_split->add_layers();
+        const float loss_weight = top_idx_to_loss_weight[top_idx];
         ConfigureSplitLayer(layer_name, blob_name, j, split_count,
-            split_layer_param);
+            loss_weight, split_layer_param);
+        if (loss_weight) {
+          layer_param->clear_loss_weight();
+          top_idx_to_bottom_split_idx[top_idx]++;
+        }
       }
     }
   }
 }
 
 void ConfigureSplitLayer(const string& layer_name, const string& blob_name,
-    const int blob_idx, const int split_count,
+    const int blob_idx, const int split_count, const float loss_weight,
     LayerParameter* split_layer_param) {
   split_layer_param->Clear();
   split_layer_param->add_bottom(blob_name);
@@ -94,6 +115,13 @@ void ConfigureSplitLayer(const string& layer_name, const string& blob_name,
   for (int k = 0; k < split_count; ++k) {
     split_layer_param->add_top(
         SplitBlobName(layer_name, blob_name, blob_idx, k));
+    if (loss_weight) {
+      if (k == 0) {
+        split_layer_param->add_loss_weight(loss_weight);
+      } else {
+        split_layer_param->add_loss_weight(0);
+      }
+    }
   }
 }
 
-- 
2.7.4