From 0a6870339afce63d0ca838f312248bf049791dc6 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Thu, 24 Apr 2014 21:44:48 -0700 Subject: [PATCH] move analytic gradient computation outside loop and store -- saves a lot of time --- include/caffe/blob.hpp | 1 + src/caffe/blob.cpp | 5 ++++ src/caffe/test/test_gradient_check_util.hpp | 43 ++++++++++++++++++----------- 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/include/caffe/blob.hpp b/include/caffe/blob.hpp index 712fc05..2f6b8f8 100644 --- a/include/caffe/blob.hpp +++ b/include/caffe/blob.hpp @@ -19,6 +19,7 @@ class Blob { const int width); void Reshape(const int num, const int channels, const int height, const int width); + void ReshapeLike(const Blob& other); inline int num() const { return num_; } inline int channels() const { return channels_; } inline int height() const { return height_; } diff --git a/src/caffe/blob.cpp b/src/caffe/blob.cpp index 54b6992..f1fe98d 100644 --- a/src/caffe/blob.cpp +++ b/src/caffe/blob.cpp @@ -32,6 +32,11 @@ void Blob::Reshape(const int num, const int channels, const int height, } template +void Blob::ReshapeLike(const Blob& other) { + Reshape(other.num(), other.channels(), other.height(), other.width()); +} + +template Blob::Blob(const int num, const int channels, const int height, const int width) { Reshape(num, channels, height, width); diff --git a/src/caffe/test/test_gradient_check_util.hpp b/src/caffe/test/test_gradient_check_util.hpp index 1975886..a1c66a1 100644 --- a/src/caffe/test/test_gradient_check_util.hpp +++ b/src/caffe/test/test_gradient_check_util.hpp @@ -62,9 +62,6 @@ class GradientChecker { }; -// Detailed implementations are as follows. - - template void GradientChecker::CheckGradientSingle(Layer* layer, vector*>* bottom, vector*>* top, @@ -82,36 +79,50 @@ void GradientChecker::CheckGradientSingle(Layer* layer, CHECK(check_bottom < bottom->size()); blobs_to_check.push_back((*bottom)[check_bottom]); } - // go through the bottom and parameter blobs + // Compute the gradient analytically using Backward + Caffe::set_random_seed(seed_); + // Get any loss from the layer + Dtype computed_objective = layer->Forward(*bottom, top); + // Get additional loss from the objective + computed_objective += GetObjAndGradient(top, top_id, top_data_id); + layer->Backward(*top, true, bottom); + // Store computed gradients for all checked blobs + vector > > computed_gradient_blobs(blobs_to_check.size()); + for (int blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) { + Blob* current_blob = blobs_to_check[blob_id]; + computed_gradient_blobs[blob_id].reset(new Blob()); + computed_gradient_blobs[blob_id]->ReshapeLike(*current_blob); + const int count = blobs_to_check[blob_id]->count(); + const Dtype* diff = blobs_to_check[blob_id]->cpu_diff(); + Dtype* computed_gradients = + computed_gradient_blobs[blob_id]->mutable_cpu_data(); + caffe_copy(count, diff, computed_gradients); + } + // Compute derivative of top w.r.t. each bottom and parameter input using + // finite differencing. // LOG(ERROR) << "Checking " << blobs_to_check.size() << " blobs."; for (int blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) { Blob* current_blob = blobs_to_check[blob_id]; + const Dtype* computed_gradients = + computed_gradient_blobs[blob_id]->cpu_data(); // LOG(ERROR) << "Blob " << blob_id << ": checking " // << current_blob->count() << " parameters."; - // go through the values for (int feat_id = 0; feat_id < current_blob->count(); ++feat_id) { - // First, obtain the original data - Caffe::set_random_seed(seed_); - // Get any loss from the layer - Dtype computed_objective = layer->Forward(*bottom, top); - // Get additional loss from the objective - computed_objective += GetObjAndGradient(top, top_id, top_data_id); - layer->Backward(*top, true, bottom); - Dtype computed_gradient = current_blob->cpu_diff()[feat_id]; - // compute score by adding stepsize + // Compute loss with stepsize_ added to input. current_blob->mutable_cpu_data()[feat_id] += stepsize_; Caffe::set_random_seed(seed_); Dtype positive_objective = layer->Forward(*bottom, top); positive_objective += GetObjAndGradient(top, top_id, top_data_id); - // compute score by subtracting stepsize + // Compute loss with stepsize_ subtracted from input. current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2; Caffe::set_random_seed(seed_); Dtype negative_objective = layer->Forward(*bottom, top); negative_objective += GetObjAndGradient(top, top_id, top_data_id); - // Recover stepsize + // Recover original input value. current_blob->mutable_cpu_data()[feat_id] += stepsize_; Dtype estimated_gradient = (positive_objective - negative_objective) / stepsize_ / 2.; + Dtype computed_gradient = computed_gradients[feat_id]; Dtype feature = current_blob->cpu_data()[feat_id]; // LOG(ERROR) << "debug: " << current_blob->cpu_data()[feat_id] << " " // << current_blob->cpu_diff()[feat_id]; -- 2.7.4