From: Yangqing Jia Date: Thu, 26 Sep 2013 18:32:46 +0000 (-0700) Subject: multinomial logistic loss X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c4db4064e5c6f4a0a38fd95bf39f1639a8eb627a;p=platform%2Fupstream%2Fcaffe.git multinomial logistic loss --- diff --git a/src/caffe/filler.hpp b/src/caffe/filler.hpp index f4ca5c9..e945307 100644 --- a/src/caffe/filler.hpp +++ b/src/caffe/filler.hpp @@ -72,6 +72,32 @@ class GaussianFiller : public Filler { } }; +template +class PositiveUnitballFiller : public Filler { + public: + explicit PositiveUnitballFiller(const FillerParameter& param) + : Filler(param) {} + virtual void Fill(Blob* blob) { + Dtype* data = blob->mutable_cpu_data(); + DCHECK(blob->count()); + caffe_vRngUniform(blob->count(), blob->mutable_cpu_data(), 0, 1); + // We expect the filler to not be called very frequently, so we will + // just use a simple implementation + int dim = blob->count() / blob->num(); + DCHECK(dim); + for (int i = 0; i < blob->num(); ++i) { + Dtype sum = 0; + for (int j = 0; j < dim; ++j) { + sum += data[i * dim + j]; + } + for (int j = 0; j < dim; ++j) { + data[i * dim + j] /= sum; + } + } + } +}; + + // A function to get a specific filler from the specification given in // FillerParameter. Ideally this would be replaced by a factory pattern, // but we will leave it this way for now. @@ -84,6 +110,8 @@ Filler* GetFiller(const FillerParameter& param) { return new UniformFiller(param); } else if (type == "gaussian") { return new GaussianFiller(param); + } else if (type == "positive_unitball") { + return new PositiveUnitballFiller(param); } else { CHECK(false) << "Unknown filler name: " << param.type(); } diff --git a/src/caffe/layers/lrn_layer.cu b/src/caffe/layers/lrn_layer.cu index 902a066..c2a5201 100644 --- a/src/caffe/layers/lrn_layer.cu +++ b/src/caffe/layers/lrn_layer.cu @@ -26,7 +26,7 @@ __global__ void LRNFillScale(const int nthreads, const Dtype* in, int post_pad = size - pre_pad - 1; Dtype accum_scale = 0; // fill the scale at [n, :, h, w] - // accumulate values + // accumulate values while (head < post_pad) { accum_scale += in[head * step] * in[head * step]; ++head; diff --git a/src/caffe/layers/multinomial_logistic_loss_layer.cpp b/src/caffe/layers/multinomial_logistic_loss_layer.cpp new file mode 100644 index 0000000..5ffa4ac --- /dev/null +++ b/src/caffe/layers/multinomial_logistic_loss_layer.cpp @@ -0,0 +1,52 @@ +// Copyright 2013 Yangqing Jia + +#include "caffe/layer.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/util/math_functions.hpp" +#include +#include + +using std::max; + +namespace caffe { + +template +void MultinomialLogisticLossLayer::SetUp( + const vector*>& bottom, vector*>* top) { + CHECK_EQ(bottom.size(), 2) << "Loss Layer takes two blobs as input."; + CHECK_EQ(top->size(), 0) << "Loss Layer takes no as output."; + CHECK_EQ(bottom[0]->num(), bottom[1]->num()) + << "The data and label should have the same number."; + CHECK_EQ(bottom[1]->channels(), 1); + CHECK_EQ(bottom[1]->height(), 1); + CHECK_EQ(bottom[1]->width(), 1); +}; + + +template +Dtype MultinomialLogisticLossLayer::Backward_cpu(const vector*>& top, + const bool propagate_down, + vector*>* bottom) { + const Dtype* bottom_data = (*bottom)[0]->cpu_data(); + const Dtype* bottom_label = (*bottom)[1]->cpu_data(); + Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); + int num = (*bottom)[0]->num(); + int dim = (*bottom)[0]->count() / (*bottom)[0]->num(); + memset(bottom_diff, 0, sizeof(Dtype) * (*bottom)[0]->count()); + Dtype loss = 0; + const Dtype kLOG_THRESHOLD = 1e-8; + for (int i = 0; i < num; ++i) { + int label = static_cast(bottom_label[i]); + Dtype prob = max(bottom_data[i * dim + label], kLOG_THRESHOLD); + loss -= log(prob); + bottom_diff[i * dim + label] = - 1. / prob / num; + } + return loss / num; +} + +// TODO: implement the GPU version + +INSTANTIATE_CLASS(MultinomialLogisticLossLayer); + + +} // namespace caffe diff --git a/src/caffe/test/test_filler.cpp b/src/caffe/test/test_filler.cpp index 8ecb703..7738ce4 100644 --- a/src/caffe/test/test_filler.cpp +++ b/src/caffe/test/test_filler.cpp @@ -70,6 +70,43 @@ TYPED_TEST(UniformFillerTest, TestFill) { } template +class PositiveUnitballFillerTest : public ::testing::Test { + protected: + PositiveUnitballFillerTest() + : blob_(new Blob(2, 3, 4, 5)), + filler_param_() { + filler_.reset(new PositiveUnitballFiller(filler_param_)); + filler_->Fill(blob_); + }; + virtual ~PositiveUnitballFillerTest() { delete blob_; } + Blob* const blob_; + FillerParameter filler_param_; + shared_ptr > filler_; +}; + +TYPED_TEST_CASE(PositiveUnitballFillerTest, Dtypes); + +TYPED_TEST(PositiveUnitballFillerTest, TestFill) { + EXPECT_TRUE(this->blob_); + const int num = this->blob_->num(); + const int count = this->blob_->count(); + const int dim = count / num; + const TypeParam* data = this->blob_->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_GE(data[i], 0); + EXPECT_LE(data[i], 1); + } + for (int i = 0; i < num; ++i) { + TypeParam sum = 0; + for (int j = 0; j < dim; ++j) { + sum += data[i * dim + j]; + } + EXPECT_GE(sum, 0.999); + EXPECT_LE(sum, 1.001); + } +} + +template class GaussianFillerTest : public ::testing::Test { protected: GaussianFillerTest() @@ -96,7 +133,7 @@ TYPED_TEST(GaussianFillerTest, TestFill) { TypeParam var = 0.; for (int i = 0; i < count; ++i) { mean += data[i]; - var += (data[i] - this->filler_param_.mean()) * + var += (data[i] - this->filler_param_.mean()) * (data[i] - this->filler_param_.mean()); } mean /= count; diff --git a/src/caffe/test/test_multinomial_logistic_loss_layer.cpp b/src/caffe/test/test_multinomial_logistic_loss_layer.cpp new file mode 100644 index 0000000..de50245 --- /dev/null +++ b/src/caffe/test/test_multinomial_logistic_loss_layer.cpp @@ -0,0 +1,60 @@ +// Copyright 2013 Yangqing Jia + +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +#include "caffe/test/test_caffe_main.hpp" + +namespace caffe { + +extern cudaDeviceProp CAFFE_TEST_CUDA_PROP; + +template +class MultinomialLogisticLossLayerTest : public ::testing::Test { + protected: + MultinomialLogisticLossLayerTest() + : blob_bottom_data_(new Blob(10, 5, 1, 1)), + blob_bottom_label_(new Blob(10, 1, 1, 1)) { + // fill the values + FillerParameter filler_param; + PositiveUnitballFiller filler(filler_param); + filler.Fill(this->blob_bottom_data_); + blob_bottom_vec_.push_back(blob_bottom_data_); + for (int i = 0; i < blob_bottom_label_->count(); ++i) { + blob_bottom_label_->mutable_cpu_data()[i] = rand() % 5; + } + blob_bottom_vec_.push_back(blob_bottom_label_); + } + virtual ~MultinomialLogisticLossLayerTest() { + delete blob_bottom_data_; + delete blob_bottom_label_; + } + Blob* const blob_bottom_data_; + Blob* const blob_bottom_label_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +typedef ::testing::Types Dtypes; +TYPED_TEST_CASE(MultinomialLogisticLossLayerTest, Dtypes); + + +TYPED_TEST(MultinomialLogisticLossLayerTest, TestGradientCPU) { + LayerParameter layer_param; + Caffe::set_mode(Caffe::CPU); + MultinomialLogisticLossLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-2, 1701, 0, 0.05); + checker.CheckGradientSingle(layer, this->blob_bottom_vec_, + this->blob_top_vec_, 0, -1, -1); +} + +} diff --git a/src/caffe/vision_layers.hpp b/src/caffe/vision_layers.hpp index 2c7af47..dbf278c 100644 --- a/src/caffe/vision_layers.hpp +++ b/src/caffe/vision_layers.hpp @@ -280,6 +280,26 @@ class SoftmaxLayer : public Layer { Blob scale_; }; +template +class MultinomialLogisticLossLayer : public Layer { + public: + explicit MultinomialLogisticLossLayer(const LayerParameter& param) + : Layer(param) {} + virtual void SetUp(const vector*>& bottom, + vector*>* top); + + protected: + // The loss layer will do nothing during forward - all computation are + // carried out in the backward pass. + virtual void Forward_cpu(const vector*>& bottom, + vector*>* top) { return; } + virtual void Forward_gpu(const vector*>& bottom, + vector*>* top) { return; } + virtual Dtype Backward_cpu(const vector*>& top, + const bool propagate_down, vector*>* bottom); + //virtual Dtype Backward_gpu(const vector*>& top, + // const bool propagate_down, vector*>* bottom); +}; } // namespace caffe