From: Yangqing Jia <jiayq84@gmail.com>
Date: Thu, 26 Sep 2013 18:32:46 +0000 (-0700)
Subject: multinomial logistic loss
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c4db4064e5c6f4a0a38fd95bf39f1639a8eb627a;p=platform%2Fupstream%2Fcaffe.git

multinomial logistic loss
---

diff --git a/src/caffe/filler.hpp b/src/caffe/filler.hpp
index f4ca5c9..e945307 100644
--- a/src/caffe/filler.hpp
+++ b/src/caffe/filler.hpp
@@ -72,6 +72,32 @@ class GaussianFiller : public Filler<Dtype> {
   }
 };
 
+template <typename Dtype>
+class PositiveUnitballFiller : public Filler<Dtype> {
+ public:
+  explicit PositiveUnitballFiller(const FillerParameter& param)
+      : Filler<Dtype>(param) {}
+  virtual void Fill(Blob<Dtype>* blob) {
+    Dtype* data = blob->mutable_cpu_data();
+    DCHECK(blob->count());
+    caffe_vRngUniform<Dtype>(blob->count(), blob->mutable_cpu_data(), 0, 1);
+    // We expect the filler to not be called very frequently, so we will
+    // just use a simple implementation
+    int dim = blob->count() / blob->num();
+    DCHECK(dim);
+    for (int i = 0; i < blob->num(); ++i) {
+      Dtype sum = 0;
+      for (int j = 0; j < dim; ++j) {
+        sum += data[i * dim + j];
+      }
+      for (int j = 0; j < dim; ++j) {
+        data[i * dim + j] /= sum;
+      }
+    }
+  }
+};
+
+
 // A function to get a specific filler from the specification given in
 // FillerParameter. Ideally this would be replaced by a factory pattern,
 // but we will leave it this way for now.
@@ -84,6 +110,8 @@ Filler<Dtype>* GetFiller(const FillerParameter& param) {
     return new UniformFiller<Dtype>(param);
   } else if (type == "gaussian") {
     return new GaussianFiller<Dtype>(param);
+  } else if (type == "positive_unitball") {
+    return new PositiveUnitballFiller<Dtype>(param);
   } else {
     CHECK(false) << "Unknown filler name: " << param.type();
   }
diff --git a/src/caffe/layers/lrn_layer.cu b/src/caffe/layers/lrn_layer.cu
index 902a066..c2a5201 100644
--- a/src/caffe/layers/lrn_layer.cu
+++ b/src/caffe/layers/lrn_layer.cu
@@ -26,7 +26,7 @@ __global__ void LRNFillScale(const int nthreads, const Dtype* in,
     int post_pad = size - pre_pad - 1;
     Dtype accum_scale = 0;
     // fill the scale at [n, :, h, w]
-    // accumulate values 
+    // accumulate values
     while (head < post_pad) {
       accum_scale += in[head * step] * in[head * step];
       ++head;
diff --git a/src/caffe/layers/multinomial_logistic_loss_layer.cpp b/src/caffe/layers/multinomial_logistic_loss_layer.cpp
new file mode 100644
index 0000000..5ffa4ac
--- /dev/null
+++ b/src/caffe/layers/multinomial_logistic_loss_layer.cpp
@@ -0,0 +1,52 @@
+// Copyright 2013 Yangqing Jia
+
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/util/math_functions.hpp"
+#include <algorithm>
+#include <cmath>
+
+using std::max;
+
+namespace caffe {
+
+template <typename Dtype>
+void MultinomialLogisticLossLayer<Dtype>::SetUp(
+    const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {
+  CHECK_EQ(bottom.size(), 2) << "Loss Layer takes two blobs as input.";
+  CHECK_EQ(top->size(), 0) << "Loss Layer takes no as output.";
+  CHECK_EQ(bottom[0]->num(), bottom[1]->num())
+      << "The data and label should have the same number.";
+  CHECK_EQ(bottom[1]->channels(), 1);
+  CHECK_EQ(bottom[1]->height(), 1);
+  CHECK_EQ(bottom[1]->width(), 1);
+};
+
+
+template <typename Dtype>
+Dtype MultinomialLogisticLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+    const bool propagate_down,
+    vector<Blob<Dtype>*>* bottom) {
+  const Dtype* bottom_data = (*bottom)[0]->cpu_data();
+  const Dtype* bottom_label = (*bottom)[1]->cpu_data();
+  Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
+  int num = (*bottom)[0]->num();
+  int dim = (*bottom)[0]->count() / (*bottom)[0]->num();
+  memset(bottom_diff, 0, sizeof(Dtype) * (*bottom)[0]->count());
+  Dtype loss = 0;
+  const Dtype kLOG_THRESHOLD = 1e-8;
+  for (int i = 0; i < num; ++i) {
+    int label = static_cast<int>(bottom_label[i]);
+    Dtype prob = max(bottom_data[i * dim + label], kLOG_THRESHOLD);
+    loss -= log(prob);
+    bottom_diff[i * dim + label] = - 1. / prob / num;
+  }
+  return loss / num;
+}
+
+// TODO: implement the GPU version
+
+INSTANTIATE_CLASS(MultinomialLogisticLossLayer);
+
+
+}  // namespace caffe
diff --git a/src/caffe/test/test_filler.cpp b/src/caffe/test/test_filler.cpp
index 8ecb703..7738ce4 100644
--- a/src/caffe/test/test_filler.cpp
+++ b/src/caffe/test/test_filler.cpp
@@ -70,6 +70,43 @@ TYPED_TEST(UniformFillerTest, TestFill) {
 }
 
 template <typename Dtype>
+class PositiveUnitballFillerTest : public ::testing::Test {
+ protected:
+  PositiveUnitballFillerTest()
+      : blob_(new Blob<Dtype>(2, 3, 4, 5)),
+        filler_param_() {
+    filler_.reset(new PositiveUnitballFiller<Dtype>(filler_param_));
+    filler_->Fill(blob_);
+  };
+  virtual ~PositiveUnitballFillerTest() { delete blob_; }
+  Blob<Dtype>* const blob_;
+  FillerParameter filler_param_;
+  shared_ptr<PositiveUnitballFiller<Dtype> > filler_;
+};
+
+TYPED_TEST_CASE(PositiveUnitballFillerTest, Dtypes);
+
+TYPED_TEST(PositiveUnitballFillerTest, TestFill) {
+  EXPECT_TRUE(this->blob_);
+  const int num = this->blob_->num();
+  const int count = this->blob_->count();
+  const int dim = count / num;
+  const TypeParam* data = this->blob_->cpu_data();
+  for (int i = 0; i < count; ++i) {
+    EXPECT_GE(data[i], 0);
+    EXPECT_LE(data[i], 1);
+  }
+  for (int i = 0; i < num; ++i) {
+    TypeParam sum = 0;
+    for (int j = 0; j < dim; ++j) {
+      sum += data[i * dim + j];
+    }
+    EXPECT_GE(sum, 0.999);
+    EXPECT_LE(sum, 1.001);
+  }
+}
+
+template <typename Dtype>
 class GaussianFillerTest : public ::testing::Test {
  protected:
   GaussianFillerTest()
@@ -96,7 +133,7 @@ TYPED_TEST(GaussianFillerTest, TestFill) {
   TypeParam var = 0.;
   for (int i = 0; i < count; ++i) {
     mean += data[i];
-    var += (data[i] - this->filler_param_.mean()) * 
+    var += (data[i] - this->filler_param_.mean()) *
         (data[i] - this->filler_param_.mean());
   }
   mean /= count;
diff --git a/src/caffe/test/test_multinomial_logistic_loss_layer.cpp b/src/caffe/test/test_multinomial_logistic_loss_layer.cpp
new file mode 100644
index 0000000..de50245
--- /dev/null
+++ b/src/caffe/test/test_multinomial_logistic_loss_layer.cpp
@@ -0,0 +1,60 @@
+// Copyright 2013 Yangqing Jia
+
+#include <cmath>
+#include <cstdlib>
+#include <cstring>
+#include <cuda_runtime.h>
+
+#include "gtest/gtest.h"
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/test/test_gradient_check_util.hpp"
+
+#include "caffe/test/test_caffe_main.hpp"
+
+namespace caffe {
+
+extern cudaDeviceProp CAFFE_TEST_CUDA_PROP;
+
+template <typename Dtype>
+class MultinomialLogisticLossLayerTest : public ::testing::Test {
+ protected:
+  MultinomialLogisticLossLayerTest()
+      : blob_bottom_data_(new Blob<Dtype>(10, 5, 1, 1)),
+        blob_bottom_label_(new Blob<Dtype>(10, 1, 1, 1)) {
+    // fill the values
+    FillerParameter filler_param;
+    PositiveUnitballFiller<Dtype> filler(filler_param);
+    filler.Fill(this->blob_bottom_data_);
+    blob_bottom_vec_.push_back(blob_bottom_data_);
+    for (int i = 0; i < blob_bottom_label_->count(); ++i) {
+      blob_bottom_label_->mutable_cpu_data()[i] = rand() % 5;
+    }
+    blob_bottom_vec_.push_back(blob_bottom_label_);
+  }
+  virtual ~MultinomialLogisticLossLayerTest() {
+    delete blob_bottom_data_;
+    delete blob_bottom_label_;
+  }
+  Blob<Dtype>* const blob_bottom_data_;
+  Blob<Dtype>* const blob_bottom_label_;
+  vector<Blob<Dtype>*> blob_bottom_vec_;
+  vector<Blob<Dtype>*> blob_top_vec_;
+};
+
+typedef ::testing::Types<float, double> Dtypes;
+TYPED_TEST_CASE(MultinomialLogisticLossLayerTest, Dtypes);
+
+
+TYPED_TEST(MultinomialLogisticLossLayerTest, TestGradientCPU) {
+  LayerParameter layer_param;
+  Caffe::set_mode(Caffe::CPU);
+  MultinomialLogisticLossLayer<TypeParam> layer(layer_param);
+  GradientChecker<TypeParam> checker(1e-2, 1e-2, 1701, 0, 0.05);
+  checker.CheckGradientSingle(layer, this->blob_bottom_vec_,
+      this->blob_top_vec_, 0, -1, -1);
+}
+
+}
diff --git a/src/caffe/vision_layers.hpp b/src/caffe/vision_layers.hpp
index 2c7af47..dbf278c 100644
--- a/src/caffe/vision_layers.hpp
+++ b/src/caffe/vision_layers.hpp
@@ -280,6 +280,26 @@ class SoftmaxLayer : public Layer<Dtype> {
   Blob<Dtype> scale_;
 };
 
+template <typename Dtype>
+class MultinomialLogisticLossLayer : public Layer<Dtype> {
+ public:
+  explicit MultinomialLogisticLossLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
+  virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
+
+ protected:
+  // The loss layer will do nothing during forward - all computation are
+  // carried out in the backward pass.
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top) { return; }
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top) { return; }
+  virtual Dtype Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const bool propagate_down, vector<Blob<Dtype>*>* bottom);
+  //virtual Dtype Backward_gpu(const vector<Blob<Dtype>*>& top,
+  //    const bool propagate_down, vector<Blob<Dtype>*>* bottom);
+};
 
 }  // namespace caffe