Display averaged loss over the last several iterations

author qipeng <pengrobertqi@163.com>

Wed, 17 Sep 2014 02:59:53 +0000 (19:59 -0700)

committer qipeng <pengrobertqi@163.com>

Wed, 17 Sep 2014 02:59:53 +0000 (19:59 -0700)
author qipeng <pengrobertqi@163.com>
Wed, 17 Sep 2014 02:59:53 +0000 (19:59 -0700)
committer qipeng <pengrobertqi@163.com>
Wed, 17 Sep 2014 02:59:53 +0000 (19:59 -0700)
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto

index 493bfa7..ba4c41c 100644 (file)
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -63,7 +63,7 @@ message NetParameter {
  // NOTE
  // Update the next available ID when you add a new SolverParameter field.
  //
-// SolverParameter next available ID: 33 (last added: test_initialization)
+// SolverParameter next available ID: 34 (last added: average_loss)
  message SolverParameter {
    //////////////////////////////////////////////////////////////////////////////
    // Specifying the train and test networks
@@ -113,6 +113,8 @@ message SolverParameter {
    // the number of iterations between displaying info. If display = 0, no info
    // will be displayed.
    optional int32 display = 6;
+  // Display the cost averaged over the last average_cost iterations
+  optional int32 average_loss = 33 [default = 1];
    optional int32 max_iter = 7; // the maximum number of iterations
    optional string lr_policy = 8; // The learning rate decay policy.
    optional float gamma = 9; // The parameter to compute the learning rate.
diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp

index 0ea4edc..0810f48 100644 (file)
--- a/src/caffe/solver.cpp
+++ b/src/caffe/solver.cpp
@@ -169,6 +169,13 @@ void Solver<Dtype>::Solve(const char* resume_file) {
    // resume_file above.
    const int start_iter = iter_;
  
+  int average_loss = this->param_.average_loss();
+
+  CHECK_GE(average_loss, 1) << "average_cost should be non-negative.";
+
+  vector<Dtype> losses;
+  Dtype smoothed_loss;
+
    // For a network that is trained by the solver, no bottom or top vecs
    // should be given, and we will just provide dummy vecs.
    vector<Blob<Dtype>*> bottom_vec;
@@ -187,8 +194,17 @@ void Solver<Dtype>::Solve(const char* resume_file) {
      const bool display = param_.display() && iter_ % param_.display() == 0;
      net_->set_debug_info(display && param_.debug_info());
      Dtype loss = net_->ForwardBackward(bottom_vec);
+    if (losses.size() < average_loss) {
+      losses.push_back(loss);
+      int size = losses.size();
+      smoothed_loss = (smoothed_loss * (size - 1) + loss) / size;
+    } else {
+      int idx = (iter_ - start_iter) % average_loss;
+      smoothed_loss += (loss - losses[idx]) / average_loss;
+      losses[idx] = loss;
+    }
      if (display) {
-      LOG(INFO) << "Iteration " << iter_ << ", loss = " << loss;
+      LOG(INFO) << "Iteration " << iter_ << ", loss = " << smoothed_loss;
        const vector<Blob<Dtype>*>& result = net_->output_blobs();
        int score_index = 0;
        for (int j = 0; j < result.size(); ++j) {
author	qipeng <pengrobertqi@163.com>
	Wed, 17 Sep 2014 02:59:53 +0000 (19:59 -0700)
committer	qipeng <pengrobertqi@163.com>
	Wed, 17 Sep 2014 02:59:53 +0000 (19:59 -0700)
src/caffe/proto/caffe.proto		patch \| blob \| history
src/caffe/solver.cpp		patch \| blob \| history