Store loss coefficients in layer; use for prettier training output.
authorJeff Donahue <jeff.donahue@gmail.com>
Sun, 13 Jul 2014 23:56:42 +0000 (16:56 -0700)
committerJeff Donahue <jeff.donahue@gmail.com>
Wed, 13 Aug 2014 20:22:09 +0000 (13:22 -0700)
include/caffe/net.hpp
include/caffe/solver.hpp
src/caffe/net.cpp
src/caffe/solver.cpp

index 8c22137..cc238e7 100644 (file)
@@ -90,6 +90,9 @@ class Net {
   inline vector<vector<bool> >& bottom_need_backward() {
     return bottom_need_backward_;
   }
+  inline vector<Dtype>& blob_loss_weights() {
+    return blob_loss_weights_;
+  }
   // returns the parameters
   inline vector<shared_ptr<Blob<Dtype> > >& params() { return params_; }
   // returns the parameter learning rate multipliers
@@ -163,6 +166,9 @@ class Net {
   // top_vecs stores the vectors containing the output for each layer
   vector<vector<Blob<Dtype>*> > top_vecs_;
   vector<vector<int> > top_id_vecs_;
+  // Vector of weight in the loss (or objective) function of each net blob,
+  // indexed by blob_id.
+  vector<Dtype> blob_loss_weights_;
   vector<int> param_owners_;
   vector<string> param_display_names_;
   vector<pair<int, int> > param_layer_indices_;
index cdf583a..9012c5d 100644 (file)
@@ -46,6 +46,7 @@ class Solver {
   // function that restores the state from a SolverState protocol buffer.
   void Restore(const char* resume_file);
   virtual void RestoreSolverState(const SolverState& state) = 0;
+  void DisplayOutputBlobs(const int net_id);
 
   SolverParameter param_;
   int iter_;
index e70c23b..5a49a23 100644 (file)
@@ -96,11 +96,18 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
     LOG(INFO) << "Setting up " << layer_names_[layer_id];
     layers_[layer_id]->SetUp(bottom_vecs_[layer_id], &top_vecs_[layer_id]);
     for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) {
+      if (blob_loss_weights_.size() <= top_id_vecs_[layer_id][top_id]) {
+        blob_loss_weights_.resize(top_id_vecs_[layer_id][top_id] + 1, Dtype(0));
+      }
+      blob_loss_weights_[top_id_vecs_[layer_id][top_id]] = layer->loss(top_id);
       LOG(INFO) << "Top shape: " << top_vecs_[layer_id][top_id]->num() << " "
           << top_vecs_[layer_id][top_id]->channels() << " "
           << top_vecs_[layer_id][top_id]->height() << " "
           << top_vecs_[layer_id][top_id]->width() << " ("
           << top_vecs_[layer_id][top_id]->count() << ")";
+      if (layer->loss(top_id)) {
+        LOG(INFO) << "    with loss weight " << layer->loss(top_id);
+      }
     }
     DLOG(INFO) << "Memory required for data: " << memory_used_ * sizeof(Dtype);
     const int blobs_lr_size = layer_param.blobs_lr_size();
@@ -151,7 +158,7 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
     bool layer_contributes_loss = false;
     for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) {
       const string& blob_name = blob_names_[top_id_vecs_[layer_id][top_id]];
-      if (layers_[layer_id]->has_loss(top_id) ||
+      if (layers_[layer_id]->loss(top_id) ||
           (blobs_under_loss.find(blob_name) != blobs_under_loss.end())) {
         layer_contributes_loss = true;
         break;
index 18269b0..80582b3 100644 (file)
@@ -154,7 +154,7 @@ void Solver<Dtype>::InitTestNets() {
     }
     net_params[i].mutable_state()->CopyFrom(net_state);
     LOG(INFO)
-        << "Creating testing net (#" << i << ") specified by " << sources[i];
+        << "Creating test net (#" << i << ") specified by " << sources[i];
     test_nets_[i].reset(new Net<Dtype>(net_params[i]));
   }
 }
@@ -194,16 +194,24 @@ void Solver<Dtype>::Solve(const char* resume_file) {
     if (display) {
       LOG(INFO) << "Iteration " << iter_ << ", loss = " << loss;
       const vector<Blob<Dtype>*>& result = net_->output_blobs();
-      vector<Dtype> score;
+      int score_index = 0;
       for (int j = 0; j < result.size(); ++j) {
         const Dtype* result_vec = result[j]->cpu_data();
+        const string& output_name =
+            net_->blob_names()[net_->output_blob_indices()[j]];
+        const Dtype loss_weight =
+            net_->blob_loss_weights()[net_->output_blob_indices()[j]];
         for (int k = 0; k < result[j]->count(); ++k) {
-          score.push_back(result_vec[k]);
+          ostringstream loss_msg_stream;
+          if (loss_weight) {
+            loss_msg_stream << " (* " << loss_weight
+                            << " = " << loss_weight * result_vec[k] << " loss)";
+          }
+          LOG(INFO) << "    Train net output #"
+              << score_index++ << ": " << output_name << " = "
+              << result_vec[k] << loss_msg_stream.str();
         }
       }
-      for (int i = 0; i < score.size(); ++i) {
-        LOG(INFO) << "    Training score #" << i << ": " << score[i];
-      }
     }
 
     ComputeUpdateValue();
@@ -247,12 +255,14 @@ void Solver<Dtype>::Test(const int test_net_id) {
   CHECK_NOTNULL(test_nets_[test_net_id].get())->
       ShareTrainedLayersWith(net_.get());
   vector<Dtype> test_score;
+  vector<int> test_score_output_id;
   vector<Blob<Dtype>*> bottom_vec;
+  const shared_ptr<Net<Dtype> >& test_net = test_nets_[test_net_id];
   Dtype loss = 0;
   for (int i = 0; i < param_.test_iter(test_net_id); ++i) {
     Dtype iter_loss;
     const vector<Blob<Dtype>*>& result =
-        test_nets_[test_net_id]->Forward(bottom_vec, &iter_loss);
+        test_net->Forward(bottom_vec, &iter_loss);
     if (param_.test_compute_loss()) {
       loss += iter_loss;
     }
@@ -261,6 +271,7 @@ void Solver<Dtype>::Test(const int test_net_id) {
         const Dtype* result_vec = result[j]->cpu_data();
         for (int k = 0; k < result[j]->count(); ++k) {
           test_score.push_back(result_vec[k]);
+          test_score_output_id.push_back(j);
         }
       }
     } else {
@@ -278,8 +289,18 @@ void Solver<Dtype>::Test(const int test_net_id) {
     LOG(INFO) << "Test loss: " << loss;
   }
   for (int i = 0; i < test_score.size(); ++i) {
-    LOG(INFO) << "    Test score #" << i << ": "
-        << test_score[i] / param_.test_iter(test_net_id);
+    const string& output_name = test_net->blob_names()[
+        test_net->output_blob_indices()[test_score_output_id[i]]];
+    const Dtype loss_weight =
+        test_net->blob_loss_weights()[test_net->output_blob_indices()[i]];
+    ostringstream loss_msg_stream;
+    const Dtype mean_score = test_score[i] / param_.test_iter(test_net_id);
+    if (loss_weight) {
+      loss_msg_stream << " (* " << loss_weight
+                      << " = " << loss_weight * mean_score << " loss)";
+    }
+    LOG(INFO) << "    Test net output #" << i << ": " << output_name << " = "
+        << mean_score << loss_msg_stream.str();
   }
   Caffe::set_phase(Caffe::TRAIN);
 }