[test] gradient checks for softmax ignore_label and normalize: false
authorJonathan L Long <jonlong@cs.berkeley.edu>
Tue, 27 Jan 2015 21:27:48 +0000 (13:27 -0800)
committerJonathan L Long <jonlong@cs.berkeley.edu>
Tue, 27 Jan 2015 23:56:23 +0000 (15:56 -0800)
src/caffe/test/test_softmax_with_loss_layer.cpp

index badda3b..1498d5c 100644 (file)
@@ -3,6 +3,7 @@
 #include <cstring>
 #include <vector>
 
+#include "boost/scoped_ptr.hpp"
 #include "gtest/gtest.h"
 
 #include "caffe/blob.hpp"
@@ -13,6 +14,8 @@
 #include "caffe/test/test_caffe_main.hpp"
 #include "caffe/test/test_gradient_check_util.hpp"
 
+using boost::scoped_ptr;
+
 namespace caffe {
 
 template <typename TypeParam>
@@ -50,7 +53,6 @@ class SoftmaxWithLossLayerTest : public MultiDeviceTest<TypeParam> {
 
 TYPED_TEST_CASE(SoftmaxWithLossLayerTest, TestDtypesAndDevices);
 
-
 TYPED_TEST(SoftmaxWithLossLayerTest, TestGradient) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
@@ -61,4 +63,48 @@ TYPED_TEST(SoftmaxWithLossLayerTest, TestGradient) {
       this->blob_top_vec_, 0);
 }
 
+TYPED_TEST(SoftmaxWithLossLayerTest, TestForwardIgnoreLabel) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  layer_param.mutable_loss_param()->set_normalize(false);
+  // First, compute the loss with all labels
+  scoped_ptr<SoftmaxWithLossLayer<Dtype> > layer(
+      new SoftmaxWithLossLayer<Dtype>(layer_param));
+  layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  Dtype full_loss = this->blob_top_loss_->cpu_data()[0];
+  // Now, accumulate the loss, ignoring each label in {0, ..., 4} in turn.
+  Dtype accum_loss = 0;
+  for (int label = 0; label < 5; ++label) {
+    layer_param.mutable_loss_param()->set_ignore_label(label);
+    layer.reset(new SoftmaxWithLossLayer<Dtype>(layer_param));
+    layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+    layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+    accum_loss += this->blob_top_loss_->cpu_data()[0];
+  }
+  // Check that each label was included all but once.
+  EXPECT_NEAR(4 * full_loss, accum_loss, 1e-4);
+}
+
+TYPED_TEST(SoftmaxWithLossLayerTest, TestGradientIgnoreLabel) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  // labels are in {0, ..., 4}, so we'll ignore about a fifth of them
+  layer_param.mutable_loss_param()->set_ignore_label(0);
+  SoftmaxWithLossLayer<Dtype> layer(layer_param);
+  GradientChecker<Dtype> checker(1e-2, 1e-2, 1701);
+  checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+      this->blob_top_vec_, 0);
+}
+
+TYPED_TEST(SoftmaxWithLossLayerTest, TestGradientUnnormalized) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  layer_param.mutable_loss_param()->set_normalize(false);
+  SoftmaxWithLossLayer<Dtype> layer(layer_param);
+  GradientChecker<Dtype> checker(1e-2, 1e-2, 1701);
+  checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+      this->blob_top_vec_, 0);
+}
+
 }  // namespace caffe