namespace caffeine {
template <typename Dtype>
-void GradientChecker<Dtype>::CheckGradient(Layer<Dtype>& layer,
+void GradientChecker<Dtype>::CheckGradientSingle(Layer<Dtype>& layer,
vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>& top,
- int check_bottom) {
- layer.SetUp(bottom, &top);
+ int check_bottom, int top_id, int top_data_id) {
// First, figure out what blobs we need to check against.
vector<Blob<Dtype>*> blobs_to_check;
for (int i = 0; i < layer.params().size(); ++i) {
CHECK(check_bottom < bottom.size());
blobs_to_check.push_back(bottom[check_bottom]);
}
- // go through the blobs
+ // go through the bottom and parameter blobs
//LOG(ERROR) << "Checking " << blobs_to_check.size() << " blobs.";
for (int blobid = 0; blobid < blobs_to_check.size(); ++blobid) {
Blob<Dtype>* current_blob = blobs_to_check[blobid];
// First, obtain the original data
Caffeine::set_random_seed(seed_);
layer.Forward(bottom, &top);
- Dtype computed_objective = GetObjAndGradient(top);
+ Dtype computed_objective = GetObjAndGradient(top, top_id, top_data_id);
// Get any additional loss from the layer
computed_objective += layer.Backward(top, true, &bottom);
Dtype computed_gradient = current_blob->cpu_diff()[feat_id];
current_blob->mutable_cpu_data()[feat_id] += stepsize_;
Caffeine::set_random_seed(seed_);
layer.Forward(bottom, &top);
- Dtype positive_objective = GetObjAndGradient(top);
+ Dtype positive_objective = GetObjAndGradient(top, top_id, top_data_id);
positive_objective += layer.Backward(top, true, &bottom);
// compute score by subtracting stepsize
current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2;
Caffeine::set_random_seed(seed_);
layer.Forward(bottom, &top);
- Dtype negative_objective = GetObjAndGradient(top);
+ Dtype negative_objective = GetObjAndGradient(top, top_id, top_data_id);
negative_objective += layer.Backward(top, true, &bottom);
// Recover stepsize
current_blob->mutable_cpu_data()[feat_id] += stepsize_;
}
template <typename Dtype>
-Dtype GradientChecker<Dtype>::GetObjAndGradient(vector<Blob<Dtype>*>& top) {
- Dtype loss = 0;
+void GradientChecker<Dtype>::CheckGradientExhaustive(Layer<Dtype>& layer,
+ vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>& top, int check_bottom) {
+ layer.SetUp(bottom, &top);
+ //LOG(ERROR) << "Exhaustive Mode.";
for (int i = 0; i < top.size(); ++i) {
- Blob<Dtype>* top_blob = top[i];
- const Dtype* top_blob_data = top_blob->cpu_data();
- Dtype* top_blob_diff = top_blob->mutable_cpu_diff();
- int count = top_blob->count();
- for (int j = 0; j < count; ++j) {
- loss += top_blob_data[j] * top_blob_data[j];
+ //LOG(ERROR) << "Exhaustive: blob " << i << " size " << top[i]->count();
+ for (int j = 0; j < top[i]->count(); ++j) {
+ //LOG(ERROR) << "Exhaustive: blob " << i << " data " << j;
+ CheckGradientSingle(layer, bottom, top, check_bottom, i, j);
+ }
+ }
+}
+
+template <typename Dtype>
+Dtype GradientChecker<Dtype>::GetObjAndGradient(vector<Blob<Dtype>*>& top,
+ int top_id, int top_data_id) {
+ Dtype loss = 0;
+ if (top_id < 0) {
+ // the loss will be half of the sum of squares of all outputs
+ for (int i = 0; i < top.size(); ++i) {
+ Blob<Dtype>* top_blob = top[i];
+ const Dtype* top_blob_data = top_blob->cpu_data();
+ Dtype* top_blob_diff = top_blob->mutable_cpu_diff();
+ int count = top_blob->count();
+ for (int j = 0; j < count; ++j) {
+ loss += top_blob_data[j] * top_blob_data[j];
+ }
+ // set the diff: simply the data.
+ memcpy(top_blob_diff, top_blob_data, sizeof(Dtype) * top_blob->count());
+ }
+ loss /= 2.;
+ } else {
+ // the loss will be the top_data_id-th element in the top_id-th blob.
+ for (int i = 0; i < top.size(); ++i) {
+ Blob<Dtype>* top_blob = top[i];
+ Dtype* top_blob_diff = top_blob->mutable_cpu_diff();
+ memset(top_blob_diff, 0, sizeof(Dtype) * top_blob->count());
}
- // set the diff: simply the data.
- memcpy(top_blob_diff, top_blob_data, sizeof(Dtype) * count);
+ loss = top[top_id]->cpu_data()[top_data_id];
+ top[top_id]->mutable_cpu_diff()[top_data_id] = 1.;
}
- loss /= 2.;
return loss;
}
// Note that after the gradient check, we do not guarantee that the data
// stored in the layer parameters and the blobs.
void CheckGradient(Layer<Dtype>& layer, vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>& top, int check_bottom = -1);
+ vector<Blob<Dtype>*>& top, int check_bottom = -1) {
+ layer.SetUp(bottom, &top);
+ CheckGradientSingle(layer, bottom, top, check_bottom, -1, -1);
+ }
+ void CheckGradientExhaustive(Layer<Dtype>& layer,
+ vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>& top,
+ int check_bottom = -1);
+
+ void CheckGradientSingle(Layer<Dtype>& layer, vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>& top, int check_bottom, int top_id,
+ int top_data_id);
protected:
- Dtype GetObjAndGradient(vector<Blob<Dtype>*>& top);
+ Dtype GetObjAndGradient(vector<Blob<Dtype>*>& top, int top_id = -1,
+ int top_data_id = -1);
Dtype stepsize_;
Dtype threshold_;
unsigned int seed_;
Caffeine::set_mode(Caffeine::CPU);
ReLULayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-3, 1701, 0., 0.01);
- checker.CheckGradient(layer, this->blob_bottom_vec_, this->blob_top_vec_);
+ checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, this->blob_top_vec_);
}
Caffeine::set_mode(Caffeine::GPU);
ReLULayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-3, 1701, 0., 0.01);
- checker.CheckGradient(layer, this->blob_bottom_vec_, this->blob_top_vec_);
+ checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, this->blob_top_vec_);
}
Caffeine::set_mode(Caffeine::CPU);
DropoutLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-3);
- checker.CheckGradient(layer, this->blob_bottom_vec_, this->blob_top_vec_);
+ checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, this->blob_top_vec_);
}
Caffeine::set_mode(Caffeine::GPU);
DropoutLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-3);
- checker.CheckGradient(layer, this->blob_bottom_vec_, this->blob_top_vec_);
+ checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, this->blob_top_vec_);
} else {
LOG(ERROR) << "Skipping test to spare my laptop.";
}