move analytic gradient computation outside loop and store -- saves a lot

author Jeff Donahue <jeff.donahue@gmail.com>

Fri, 25 Apr 2014 04:44:48 +0000 (21:44 -0700)

committer Jeff Donahue <jeff.donahue@gmail.com>

Fri, 25 Apr 2014 17:33:08 +0000 (10:33 -0700)
author Jeff Donahue <jeff.donahue@gmail.com>
Fri, 25 Apr 2014 04:44:48 +0000 (21:44 -0700)
committer Jeff Donahue <jeff.donahue@gmail.com>
Fri, 25 Apr 2014 17:33:08 +0000 (10:33 -0700)
diff --git a/include/caffe/blob.hpp b/include/caffe/blob.hpp

index 712fc05..2f6b8f8 100644 (file)
--- a/include/caffe/blob.hpp
+++ b/include/caffe/blob.hpp
@@ -19,6 +19,7 @@ class Blob {
      const int width);
    void Reshape(const int num, const int channels, const int height,
      const int width);
+  void ReshapeLike(const Blob& other);
    inline int num() const { return num_; }
    inline int channels() const { return channels_; }
    inline int height() const { return height_; }
diff --git a/src/caffe/blob.cpp b/src/caffe/blob.cpp

index 54b6992..f1fe98d 100644 (file)
--- a/src/caffe/blob.cpp
+++ b/src/caffe/blob.cpp
@@ -32,6 +32,11 @@ void Blob<Dtype>::Reshape(const int num, const int channels, const int height,
  }
  
  template <typename Dtype>
+void Blob<Dtype>::ReshapeLike(const Blob<Dtype>& other) {
+  Reshape(other.num(), other.channels(), other.height(), other.width());
+}
+
+template <typename Dtype>
  Blob<Dtype>::Blob(const int num, const int channels, const int height,
      const int width) {
    Reshape(num, channels, height, width);
diff --git a/src/caffe/test/test_gradient_check_util.hpp b/src/caffe/test/test_gradient_check_util.hpp

index 1975886..a1c66a1 100644 (file)
--- a/src/caffe/test/test_gradient_check_util.hpp
+++ b/src/caffe/test/test_gradient_check_util.hpp
@@ -62,9 +62,6 @@ class GradientChecker {
  };
  
  
-// Detailed implementations are as follows.
-
-
  template <typename Dtype>
  void GradientChecker<Dtype>::CheckGradientSingle(Layer<Dtype>* layer,
      vector<Blob<Dtype>*>* bottom, vector<Blob<Dtype>*>* top,
@@ -82,36 +79,50 @@ void GradientChecker<Dtype>::CheckGradientSingle(Layer<Dtype>* layer,
      CHECK(check_bottom < bottom->size());
      blobs_to_check.push_back((*bottom)[check_bottom]);
    }
-  // go through the bottom and parameter blobs
+  // Compute the gradient analytically using Backward
+  Caffe::set_random_seed(seed_);
+  // Get any loss from the layer
+  Dtype computed_objective = layer->Forward(*bottom, top);
+  // Get additional loss from the objective
+  computed_objective += GetObjAndGradient(top, top_id, top_data_id);
+  layer->Backward(*top, true, bottom);
+  // Store computed gradients for all checked blobs
+  vector<shared_ptr<Blob<Dtype> > > computed_gradient_blobs(blobs_to_check.size());
+  for (int blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) {
+    Blob<Dtype>* current_blob = blobs_to_check[blob_id];
+    computed_gradient_blobs[blob_id].reset(new Blob<Dtype>());
+    computed_gradient_blobs[blob_id]->ReshapeLike(*current_blob);
+    const int count = blobs_to_check[blob_id]->count();
+    const Dtype* diff = blobs_to_check[blob_id]->cpu_diff();
+    Dtype* computed_gradients =
+        computed_gradient_blobs[blob_id]->mutable_cpu_data();
+    caffe_copy(count, diff, computed_gradients);
+  }
+  // Compute derivative of top w.r.t. each bottom and parameter input using
+  // finite differencing.
    // LOG(ERROR) << "Checking " << blobs_to_check.size() << " blobs.";
    for (int blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) {
      Blob<Dtype>* current_blob = blobs_to_check[blob_id];
+    const Dtype* computed_gradients =
+        computed_gradient_blobs[blob_id]->cpu_data();
      // LOG(ERROR) << "Blob " << blob_id << ": checking "
      //     << current_blob->count() << " parameters.";
-    // go through the values
      for (int feat_id = 0; feat_id < current_blob->count(); ++feat_id) {
-      // First, obtain the original data
-      Caffe::set_random_seed(seed_);
-      // Get any loss from the layer
-      Dtype computed_objective = layer->Forward(*bottom, top);
-      // Get additional loss from the objective
-      computed_objective += GetObjAndGradient(top, top_id, top_data_id);
-      layer->Backward(*top, true, bottom);
-      Dtype computed_gradient = current_blob->cpu_diff()[feat_id];
-      // compute score by adding stepsize
+      // Compute loss with stepsize_ added to input.
        current_blob->mutable_cpu_data()[feat_id] += stepsize_;
        Caffe::set_random_seed(seed_);
        Dtype positive_objective = layer->Forward(*bottom, top);
        positive_objective += GetObjAndGradient(top, top_id, top_data_id);
-      // compute score by subtracting stepsize
+      // Compute loss with stepsize_ subtracted from input.
        current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2;
        Caffe::set_random_seed(seed_);
        Dtype negative_objective = layer->Forward(*bottom, top);
        negative_objective += GetObjAndGradient(top, top_id, top_data_id);
-      // Recover stepsize
+      // Recover original input value.
        current_blob->mutable_cpu_data()[feat_id] += stepsize_;
        Dtype estimated_gradient = (positive_objective - negative_objective) /
            stepsize_ / 2.;
+      Dtype computed_gradient = computed_gradients[feat_id];
        Dtype feature = current_blob->cpu_data()[feat_id];
        // LOG(ERROR) << "debug: " << current_blob->cpu_data()[feat_id] << " "
        //     << current_blob->cpu_diff()[feat_id];
author	Jeff Donahue <jeff.donahue@gmail.com>
	Fri, 25 Apr 2014 04:44:48 +0000 (21:44 -0700)
committer	Jeff Donahue <jeff.donahue@gmail.com>
	Fri, 25 Apr 2014 17:33:08 +0000 (10:33 -0700)
include/caffe/blob.hpp		patch \| blob \| history
src/caffe/blob.cpp		patch \| blob \| history
src/caffe/test/test_gradient_check_util.hpp		patch \| blob \| history