From 2c5f9dd7e2c8d7b2b130ec001a3f066ead8682f4 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Thu, 24 Apr 2014 20:24:19 -0700 Subject: [PATCH] eltwise gradient checker --- src/caffe/test/test_eltwise_product_layer.cpp | 4 +- src/caffe/test/test_flatten_layer.cpp | 4 +- src/caffe/test/test_gradient_check_util.hpp | 73 ++++++++++++++++++++------- src/caffe/test/test_neuron_layer.cpp | 14 ++--- src/caffe/test/test_power_layer.cpp | 2 +- src/caffe/test/test_split_layer.cpp | 8 +-- src/caffe/test/test_tanh_layer.cpp | 4 +- 7 files changed, 74 insertions(+), 35 deletions(-) diff --git a/src/caffe/test/test_eltwise_product_layer.cpp b/src/caffe/test/test_eltwise_product_layer.cpp index 8255a57..86d6fdc 100644 --- a/src/caffe/test/test_eltwise_product_layer.cpp +++ b/src/caffe/test/test_eltwise_product_layer.cpp @@ -102,7 +102,7 @@ TYPED_TEST(EltwiseProductLayerTest, TestCPUGradient) { LayerParameter layer_param; EltwiseProductLayer layer(layer_param); GradientChecker checker(1e-2, 1e-3); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } @@ -111,7 +111,7 @@ TYPED_TEST(EltwiseProductLayerTest, TestGPUGradient) { LayerParameter layer_param; EltwiseProductLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } diff --git a/src/caffe/test/test_flatten_layer.cpp b/src/caffe/test/test_flatten_layer.cpp index 139488b..52c567b 100644 --- a/src/caffe/test/test_flatten_layer.cpp +++ b/src/caffe/test/test_flatten_layer.cpp @@ -84,7 +84,7 @@ TYPED_TEST(FlattenLayerTest, TestCPUGradient) { Caffe::set_mode(Caffe::CPU); FlattenLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } @@ -93,7 +93,7 @@ TYPED_TEST(FlattenLayerTest, TestGPUGradient) { Caffe::set_mode(Caffe::GPU); FlattenLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } diff --git a/src/caffe/test/test_gradient_check_util.hpp b/src/caffe/test/test_gradient_check_util.hpp index a1c66a1..da54a96 100644 --- a/src/caffe/test/test_gradient_check_util.hpp +++ b/src/caffe/test/test_gradient_check_util.hpp @@ -40,9 +40,15 @@ class GradientChecker { vector*>* bottom, vector*>* top, int check_bottom = -1); + // CheckGradientEltwise can be used to test layers that perform element-wise + // computation only (e.g., neuron layers) -- where (d y_i) / (d x_j) = 0 when + // i != j. + void CheckGradientEltwise(Layer* layer, + vector*>* bottom, vector*>* top); + void CheckGradientSingle(Layer* layer, vector*>* bottom, vector*>* top, int check_bottom, int top_id, - int top_data_id); + int top_data_id, bool element_wise = false); // Checks the gradient of a network. This network should not have any data // layers or loss layers, since the function does not explicitly deal with @@ -65,7 +71,16 @@ class GradientChecker { template void GradientChecker::CheckGradientSingle(Layer* layer, vector*>* bottom, vector*>* top, - int check_bottom, int top_id, int top_data_id) { + int check_bottom, int top_id, int top_data_id, bool element_wise) { + if (element_wise) { + CHECK_EQ(0, layer->blobs().size()); + CHECK_LE(0, top_id); + CHECK_LE(0, top_data_id); + const int top_count = (*top)[top_id]->count(); + for (int blob_id = 0; blob_id < bottom->size(); ++blob_id) { + CHECK_EQ(top_count, (*bottom)[blob_id]->count()); + } + } // First, figure out what blobs we need to check against. vector*> blobs_to_check; for (int i = 0; i < layer->blobs().size(); ++i) { @@ -87,7 +102,8 @@ void GradientChecker::CheckGradientSingle(Layer* layer, computed_objective += GetObjAndGradient(top, top_id, top_data_id); layer->Backward(*top, true, bottom); // Store computed gradients for all checked blobs - vector > > computed_gradient_blobs(blobs_to_check.size()); + vector > > + computed_gradient_blobs(blobs_to_check.size()); for (int blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) { Blob* current_blob = blobs_to_check[blob_id]; computed_gradient_blobs[blob_id].reset(new Blob()); @@ -108,20 +124,29 @@ void GradientChecker::CheckGradientSingle(Layer* layer, // LOG(ERROR) << "Blob " << blob_id << ": checking " // << current_blob->count() << " parameters."; for (int feat_id = 0; feat_id < current_blob->count(); ++feat_id) { - // Compute loss with stepsize_ added to input. - current_blob->mutable_cpu_data()[feat_id] += stepsize_; - Caffe::set_random_seed(seed_); - Dtype positive_objective = layer->Forward(*bottom, top); - positive_objective += GetObjAndGradient(top, top_id, top_data_id); - // Compute loss with stepsize_ subtracted from input. - current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2; - Caffe::set_random_seed(seed_); - Dtype negative_objective = layer->Forward(*bottom, top); - negative_objective += GetObjAndGradient(top, top_id, top_data_id); - // Recover original input value. - current_blob->mutable_cpu_data()[feat_id] += stepsize_; - Dtype estimated_gradient = (positive_objective - negative_objective) / - stepsize_ / 2.; + // For an element-wise layer, we only need to do finite differencing to + // compute the derivative of (*top)[top_id][top_data_id] w.r.t. + // (*bottom)[blob_id][i] only for i == top_data_id. For any other + // i != top_data_id, we know the derivative is 0 by definition, and simply + // check that that's true. + Dtype estimated_gradient = 0; + if (!element_wise || (feat_id == top_data_id)) { + // Do finite differencing. + // Compute loss with stepsize_ added to input. + current_blob->mutable_cpu_data()[feat_id] += stepsize_; + Caffe::set_random_seed(seed_); + Dtype positive_objective = layer->Forward(*bottom, top); + positive_objective += GetObjAndGradient(top, top_id, top_data_id); + // Compute loss with stepsize_ subtracted from input. + current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2; + Caffe::set_random_seed(seed_); + Dtype negative_objective = layer->Forward(*bottom, top); + negative_objective += GetObjAndGradient(top, top_id, top_data_id); + // Recover original input value. + current_blob->mutable_cpu_data()[feat_id] += stepsize_; + estimated_gradient = (positive_objective - negative_objective) / + stepsize_ / 2.; + } Dtype computed_gradient = computed_gradients[feat_id]; Dtype feature = current_blob->cpu_data()[feat_id]; // LOG(ERROR) << "debug: " << current_blob->cpu_data()[feat_id] << " " @@ -158,6 +183,20 @@ void GradientChecker::CheckGradientExhaustive(Layer* layer, } template +void GradientChecker::CheckGradientEltwise(Layer* layer, + vector*>* bottom, vector*>* top) { + layer->SetUp(*bottom, top); + CHECK_GT(top->size(), 0) << "Eltwise mode requires at least one top blob."; + const int check_bottom = -1; + const bool element_wise = true; + for (int i = 0; i < top->size(); ++i) { + for (int j = 0; j < (*top)[i]->count(); ++j) { + CheckGradientSingle(layer, bottom, top, check_bottom, i, j, element_wise); + } + } +} + +template void GradientChecker::CheckGradientNet( const Net& net, const vector*>& input) { const vector > >& layers = net.layers(); diff --git a/src/caffe/test/test_neuron_layer.cpp b/src/caffe/test/test_neuron_layer.cpp index cd73375..9c852a1 100644 --- a/src/caffe/test/test_neuron_layer.cpp +++ b/src/caffe/test/test_neuron_layer.cpp @@ -61,7 +61,7 @@ TYPED_TEST(NeuronLayerTest, TestReLUGradientCPU) { Caffe::set_mode(Caffe::CPU); ReLULayer layer(layer_param); GradientChecker checker(1e-2, 1e-3, 1701, 0., 0.01); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } @@ -87,7 +87,7 @@ TYPED_TEST(NeuronLayerTest, TestReLUGradientGPU) { Caffe::set_mode(Caffe::GPU); ReLULayer layer(layer_param); GradientChecker checker(1e-2, 1e-3, 1701, 0., 0.01); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } @@ -115,7 +115,7 @@ TYPED_TEST(NeuronLayerTest, TestSigmoidGradientCPU) { Caffe::set_mode(Caffe::CPU); SigmoidLayer layer(layer_param); GradientChecker checker(1e-2, 1e-3, 1701, 0., 0.01); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } @@ -142,7 +142,7 @@ TYPED_TEST(NeuronLayerTest, TestSigmoidGradientGPU) { Caffe::set_mode(Caffe::GPU); SigmoidLayer layer(layer_param); GradientChecker checker(1e-2, 1e-3, 1701, 0., 0.01); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } @@ -172,7 +172,7 @@ TYPED_TEST(NeuronLayerTest, TestDropoutGradientCPU) { Caffe::set_mode(Caffe::CPU); DropoutLayer layer(layer_param); GradientChecker checker(1e-2, 1e-3); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } @@ -271,7 +271,7 @@ TYPED_TEST(NeuronLayerTest, TestBNLLGradientCPU) { Caffe::set_mode(Caffe::CPU); BNLLLayer layer(layer_param); GradientChecker checker(1e-2, 1e-3); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } @@ -297,7 +297,7 @@ TYPED_TEST(NeuronLayerTest, TestBNLLGradientGPU) { Caffe::set_mode(Caffe::GPU); BNLLLayer layer(layer_param); GradientChecker checker(1e-2, 1e-3); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } diff --git a/src/caffe/test/test_power_layer.cpp b/src/caffe/test/test_power_layer.cpp index 2101a41..4fab8af 100644 --- a/src/caffe/test/test_power_layer.cpp +++ b/src/caffe/test/test_power_layer.cpp @@ -79,7 +79,7 @@ class PowerLayerTest : public ::testing::Test { } } GradientChecker checker(1e-2, 1e-2, 1701, 0., 0.01); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } diff --git a/src/caffe/test/test_split_layer.cpp b/src/caffe/test/test_split_layer.cpp index 06f0f3f..327bcf9 100644 --- a/src/caffe/test/test_split_layer.cpp +++ b/src/caffe/test/test_split_layer.cpp @@ -121,7 +121,7 @@ TYPED_TEST(SplitLayerTest, TestCPUGradient) { Caffe::set_mode(Caffe::CPU); SplitLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } @@ -130,7 +130,7 @@ TYPED_TEST(SplitLayerTest, TestGPUGradient) { Caffe::set_mode(Caffe::GPU); SplitLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } @@ -140,7 +140,7 @@ TYPED_TEST(SplitLayerTest, TestCPUGradientInPlace) { SplitLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); this->blob_top_vec_[0] = this->blob_bottom_vec_[0]; - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } @@ -150,7 +150,7 @@ TYPED_TEST(SplitLayerTest, TestGPUGradientInPlace) { SplitLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); this->blob_top_vec_[0] = this->blob_bottom_vec_[0]; - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } diff --git a/src/caffe/test/test_tanh_layer.cpp b/src/caffe/test/test_tanh_layer.cpp index 82cb96c..9c9f8a7 100644 --- a/src/caffe/test/test_tanh_layer.cpp +++ b/src/caffe/test/test_tanh_layer.cpp @@ -70,7 +70,7 @@ TYPED_TEST(TanHLayerTest, TestGradientCPU) { Caffe::set_mode(Caffe::CPU); TanHLayer layer(layer_param); GradientChecker checker(1e-2, 1e-3); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } @@ -102,7 +102,7 @@ TYPED_TEST(TanHLayerTest, TestGradientGPU) { Caffe::set_mode(Caffe::GPU); TanHLayer layer(layer_param); GradientChecker checker(1e-2, 1e-3); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } -- 2.7.4