From 0a6870339afce63d0ca838f312248bf049791dc6 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Thu, 24 Apr 2014 21:44:48 -0700
Subject: [PATCH] move analytic gradient computation outside loop and store --
 saves a lot of time

---
 include/caffe/blob.hpp                      |  1 +
 src/caffe/blob.cpp                          |  5 ++++
 src/caffe/test/test_gradient_check_util.hpp | 43 ++++++++++++++++++-----------
 3 files changed, 33 insertions(+), 16 deletions(-)
diff --git a/include/caffe/blob.hpp b/include/caffe/blob.hpp
index 712fc05..2f6b8f8 100644
--- a/include/caffe/blob.hpp
+++ b/include/caffe/blob.hpp
@@ -19,6 +19,7 @@ class Blob {
     const int width);
   void Reshape(const int num, const int channels, const int height,
     const int width);
+  void ReshapeLike(const Blob& other);
   inline int num() const { return num_; }
   inline int channels() const { return channels_; }
   inline int height() const { return height_; }
diff --git a/src/caffe/blob.cpp b/src/caffe/blob.cpp
index 54b6992..f1fe98d 100644
--- a/src/caffe/blob.cpp
+++ b/src/caffe/blob.cpp
@@ -32,6 +32,11 @@ void Blob<Dtype>::Reshape(const int num, const int channels, const int height,
 }
 
 template <typename Dtype>
+void Blob<Dtype>::ReshapeLike(const Blob<Dtype>& other) {
+  Reshape(other.num(), other.channels(), other.height(), other.width());
+}
+
+template <typename Dtype>
 Blob<Dtype>::Blob(const int num, const int channels, const int height,
     const int width) {
   Reshape(num, channels, height, width);
diff --git a/src/caffe/test/test_gradient_check_util.hpp b/src/caffe/test/test_gradient_check_util.hpp
index 1975886..a1c66a1 100644
--- a/src/caffe/test/test_gradient_check_util.hpp
+++ b/src/caffe/test/test_gradient_check_util.hpp
@@ -62,9 +62,6 @@ class GradientChecker {
 };
 
 
-// Detailed implementations are as follows.
-
-
 template <typename Dtype>
 void GradientChecker<Dtype>::CheckGradientSingle(Layer<Dtype>* layer,
     vector<Blob<Dtype>*>* bottom, vector<Blob<Dtype>*>* top,
@@ -82,36 +79,50 @@ void GradientChecker<Dtype>::CheckGradientSingle(Layer<Dtype>* layer,
     CHECK(check_bottom < bottom->size());
     blobs_to_check.push_back((*bottom)[check_bottom]);
   }
-  // go through the bottom and parameter blobs
+  // Compute the gradient analytically using Backward
+  Caffe::set_random_seed(seed_);
+  // Get any loss from the layer
+  Dtype computed_objective = layer->Forward(*bottom, top);
+  // Get additional loss from the objective
+  computed_objective += GetObjAndGradient(top, top_id, top_data_id);
+  layer->Backward(*top, true, bottom);
+  // Store computed gradients for all checked blobs
+  vector<shared_ptr<Blob<Dtype> > > computed_gradient_blobs(blobs_to_check.size());
+  for (int blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) {
+    Blob<Dtype>* current_blob = blobs_to_check[blob_id];
+    computed_gradient_blobs[blob_id].reset(new Blob<Dtype>());
+    computed_gradient_blobs[blob_id]->ReshapeLike(*current_blob);
+    const int count = blobs_to_check[blob_id]->count();
+    const Dtype* diff = blobs_to_check[blob_id]->cpu_diff();
+    Dtype* computed_gradients =
+        computed_gradient_blobs[blob_id]->mutable_cpu_data();
+    caffe_copy(count, diff, computed_gradients);
+  }
+  // Compute derivative of top w.r.t. each bottom and parameter input using
+  // finite differencing.
   // LOG(ERROR) << "Checking " << blobs_to_check.size() << " blobs.";
   for (int blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) {
     Blob<Dtype>* current_blob = blobs_to_check[blob_id];
+    const Dtype* computed_gradients =
+        computed_gradient_blobs[blob_id]->cpu_data();
     // LOG(ERROR) << "Blob " << blob_id << ": checking "
     //     << current_blob->count() << " parameters.";
-    // go through the values
     for (int feat_id = 0; feat_id < current_blob->count(); ++feat_id) {
-      // First, obtain the original data
-      Caffe::set_random_seed(seed_);
-      // Get any loss from the layer
-      Dtype computed_objective = layer->Forward(*bottom, top);
-      // Get additional loss from the objective
-      computed_objective += GetObjAndGradient(top, top_id, top_data_id);
-      layer->Backward(*top, true, bottom);
-      Dtype computed_gradient = current_blob->cpu_diff()[feat_id];
-      // compute score by adding stepsize
+      // Compute loss with stepsize_ added to input.
       current_blob->mutable_cpu_data()[feat_id] += stepsize_;
       Caffe::set_random_seed(seed_);
       Dtype positive_objective = layer->Forward(*bottom, top);
       positive_objective += GetObjAndGradient(top, top_id, top_data_id);
-      // compute score by subtracting stepsize
+      // Compute loss with stepsize_ subtracted from input.
       current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2;
       Caffe::set_random_seed(seed_);
       Dtype negative_objective = layer->Forward(*bottom, top);
       negative_objective += GetObjAndGradient(top, top_id, top_data_id);
-      // Recover stepsize
+      // Recover original input value.
       current_blob->mutable_cpu_data()[feat_id] += stepsize_;
       Dtype estimated_gradient = (positive_objective - negative_objective) /
           stepsize_ / 2.;
+      Dtype computed_gradient = computed_gradients[feat_id];
       Dtype feature = current_blob->cpu_data()[feat_id];
       // LOG(ERROR) << "debug: " << current_blob->cpu_data()[feat_id] << " "
       //     << current_blob->cpu_diff()[feat_id];
-- 
2.7.4