add forward tests (via reference impl) for SigmoidCrossEntropyLayer
authorJeff Donahue <jeff.donahue@gmail.com>
Wed, 23 Apr 2014 06:12:14 +0000 (23:12 -0700)
committerJeff Donahue <jeff.donahue@gmail.com>
Wed, 23 Apr 2014 06:35:37 +0000 (23:35 -0700)
src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp
src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu
src/caffe/test/test_sigmoid_cross_entropy_loss_layer.cpp

index f2186d6..767601c 100644 (file)
@@ -19,6 +19,10 @@ void SigmoidCrossEntropyLossLayer<Dtype>::SetUp(
       "SigmoidCrossEntropyLoss Layer takes two blobs as input.";
   CHECK_EQ(top->size(), 0) <<
       "SigmoidCrossEntropyLoss Layer takes no blob as output.";
+  CHECK_EQ(bottom[0]->count(), bottom[1]->count()) <<
+      "SigmoidCrossEntropyLoss Layer inputs must have same count.";
+  CHECK_EQ(bottom[0]->num(), bottom[1]->num()) <<
+      "SigmoidCrossEntropyLoss Layer inputs must have same num.";
   sigmoid_bottom_vec_.clear();
   sigmoid_bottom_vec_.push_back(bottom[0]);
   sigmoid_top_vec_.clear();
@@ -33,14 +37,14 @@ Dtype SigmoidCrossEntropyLossLayer<Dtype>::Forward_cpu(
   sigmoid_bottom_vec_[0] = bottom[0];
   sigmoid_layer_->Forward(sigmoid_bottom_vec_, &sigmoid_top_vec_);
   // Compute the loss (negative log likelihood)
-  int count = bottom[0]->count();
-  int num = bottom[0]->num();
+  const int count = bottom[0]->count();
+  const int num = bottom[0]->num();
   // Stable version of loss computation from input data
   const Dtype* input_data = bottom[0]->cpu_data();
-  const Dtype* ground_truth = bottom[1]->cpu_data();
+  const Dtype* target = bottom[1]->cpu_data();
   Dtype loss = 0;
   for (int i = 0; i < count; ++i) {
-    loss -= input_data[i] * (ground_truth[i] - (input_data[i] >= 0)) -
+    loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) -
         log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0)));
   }
   return loss / num;
@@ -51,12 +55,12 @@ void SigmoidCrossEntropyLossLayer<Dtype>::Backward_cpu(
     const vector<Blob<Dtype>*>& top, const bool propagate_down,
     vector<Blob<Dtype>*>* bottom) {
   // First, compute the diff
-  int count = (*bottom)[0]->count();
-  int num = (*bottom)[0]->num();
+  const int count = (*bottom)[0]->count();
+  const int num = (*bottom)[0]->num();
   const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data();
-  const Dtype* ground_truth = (*bottom)[1]->cpu_data();
+  const Dtype* target = (*bottom)[1]->cpu_data();
   Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
-  caffe_sub(count, sigmoid_output_data, ground_truth, bottom_diff);
+  caffe_sub(count, sigmoid_output_data, target, bottom_diff);
   // Scale down gradient
   caffe_scal(count, Dtype(1) / num, bottom_diff);
 }
index 64bc476..6100454 100644 (file)
@@ -19,14 +19,14 @@ Dtype SigmoidCrossEntropyLossLayer<Dtype>::Forward_gpu(
   sigmoid_bottom_vec_[0] = bottom[0];
   sigmoid_layer_->Forward(sigmoid_bottom_vec_, &sigmoid_top_vec_);
   // Compute the loss (negative log likelihood)
-  int count = bottom[0]->count();
-  int num = bottom[0]->num();
+  const int count = bottom[0]->count();
+  const int num = bottom[0]->num();
   // Stable version of loss computation from input data
   const Dtype* input_data = bottom[0]->cpu_data();
-  const Dtype* ground_truth = bottom[1]->cpu_data();
+  const Dtype* target = bottom[1]->cpu_data();
   Dtype loss = 0;
   for (int i = 0; i < count; ++i) {
-    loss -= input_data[i] * (ground_truth[i] - (input_data[i] >= 0)) -
+    loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) -
         log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0)));
   }
   return loss / num;
@@ -37,13 +37,13 @@ void SigmoidCrossEntropyLossLayer<Dtype>::Backward_gpu(
     const vector<Blob<Dtype>*>& top, const bool propagate_down,
     vector<Blob<Dtype>*>* bottom) {
   // First, compute the diff
-  int count = (*bottom)[0]->count();
-  int num = (*bottom)[0]->num();
+  const int count = (*bottom)[0]->count();
+  const int num = (*bottom)[0]->num();
   const Dtype* sigmoid_output_data = sigmoid_output_->gpu_data();
-  const Dtype* ground_truth = (*bottom)[1]->gpu_data();
+  const Dtype* target = (*bottom)[1]->gpu_data();
   Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
   caffe_gpu_copy(count, sigmoid_output_data, bottom_diff);
-  caffe_gpu_axpy(count, Dtype(-1), ground_truth, bottom_diff);
+  caffe_gpu_axpy(count, Dtype(-1), target, bottom_diff);
   // Scale down gradient
   caffe_gpu_scal(count, Dtype(1) / num, bottom_diff);
 }
index fe899d4..d8018be 100644 (file)
@@ -26,14 +26,14 @@ class SigmoidCrossEntropyLossLayerTest : public ::testing::Test {
         blob_bottom_targets_(new Blob<Dtype>(10, 5, 1, 1)) {
     // Fill the data vector
     FillerParameter data_filler_param;
-    data_filler_param.set_std(10);
+    data_filler_param.set_std(1);
     GaussianFiller<Dtype> data_filler(data_filler_param);
     data_filler.Fill(blob_bottom_data_);
     blob_bottom_vec_.push_back(blob_bottom_data_);
     // Fill the targets vector
     FillerParameter targets_filler_param;
-    targets_filler_param.set_min(0.0);
-    targets_filler_param.set_max(1.0);
+    targets_filler_param.set_min(0);
+    targets_filler_param.set_max(1);
     UniformFiller<Dtype> targets_filler(targets_filler_param);
     targets_filler.Fill(blob_bottom_targets_);
     blob_bottom_vec_.push_back(blob_bottom_targets_);
@@ -42,6 +42,54 @@ class SigmoidCrossEntropyLossLayerTest : public ::testing::Test {
     delete blob_bottom_data_;
     delete blob_bottom_targets_;
   }
+
+  Dtype SigmoidCrossEntropyLossReference(const int count, const int num,
+                                         const Dtype* input,
+                                         const Dtype* target) {
+    Dtype loss = 0;
+    for (int i = 0; i < count; ++i) {
+      const Dtype prediction = 1 / (1 + exp(-input[i]));
+      EXPECT_LE(prediction, 1);
+      EXPECT_GE(prediction, 0);
+      EXPECT_LE(target[i], 1);
+      EXPECT_GE(target[i], 0);
+      loss -= target[i] * log(prediction + (target[i] == Dtype(0)));
+      loss -= (1 - target[i]) * log(1 - prediction + (target[i] == Dtype(1)));
+    }
+    return loss / num;
+  }
+
+  void TestForward() {
+    LayerParameter layer_param;
+    FillerParameter data_filler_param;
+    data_filler_param.set_std(1);
+    GaussianFiller<Dtype> data_filler(data_filler_param);
+    FillerParameter targets_filler_param;
+    targets_filler_param.set_min(0.0);
+    targets_filler_param.set_max(1.0);
+    UniformFiller<Dtype> targets_filler(targets_filler_param);
+    Dtype eps = 2e-2;
+    int num_inf = 0;
+    for (int i = 0; i < 100; ++i) {
+      // Fill the data vector
+      data_filler.Fill(this->blob_bottom_data_);
+      // Fill the targets vector
+      targets_filler.Fill(this->blob_bottom_targets_);
+      SigmoidCrossEntropyLossLayer<Dtype> layer(layer_param);
+      layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+      Dtype layer_loss =
+          layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+      const int count = this->blob_bottom_data_->count();
+      const int num = this->blob_bottom_data_->num();
+      const Dtype* blob_bottom_data = this->blob_bottom_data_->cpu_data();
+      const Dtype* blob_bottom_targets =
+          this->blob_bottom_targets_->cpu_data();
+      Dtype reference_loss = this->SigmoidCrossEntropyLossReference(
+          count, num, blob_bottom_data, blob_bottom_targets);
+      EXPECT_NEAR(reference_loss, layer_loss, eps) << "debug: trial #" << i;
+    }
+  }
+
   Blob<Dtype>* const blob_bottom_data_;
   Blob<Dtype>* const blob_bottom_targets_;
   vector<Blob<Dtype>*> blob_bottom_vec_;
@@ -52,6 +100,16 @@ typedef ::testing::Types<float, double> Dtypes;
 TYPED_TEST_CASE(SigmoidCrossEntropyLossLayerTest, Dtypes);
 
 
+TYPED_TEST(SigmoidCrossEntropyLossLayerTest, TestSigmoidCrossEntropyLossCPU) {
+  Caffe::set_mode(Caffe::CPU);
+  this->TestForward();
+}
+
+TYPED_TEST(SigmoidCrossEntropyLossLayerTest, TestSigmoidCrossEntropyLossGPU) {
+  Caffe::set_mode(Caffe::GPU);
+  this->TestForward();
+}
+
 TYPED_TEST(SigmoidCrossEntropyLossLayerTest, TestGradientCPU) {
   LayerParameter layer_param;
   Caffe::set_mode(Caffe::CPU);