src/caffe/layers/softmax_loss_layer.cpp

   1 // Copyright 2013 Yangqing Jia
   2
   3 #include <algorithm>
   4 #include <cfloat>
   5 #include <vector>
   6
   7 #include "caffe/layer.hpp"
   8 #include "caffe/vision_layers.hpp"
   9 #include "caffe/util/math_functions.hpp"
  10
  11 using std::max;
  12
  13 namespace caffe {
  14
  15 template <typename Dtype>
  16 void SoftmaxWithLossLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
  17       vector<Blob<Dtype>*>* top) {
  18   CHECK_EQ(bottom.size(), 2) << "SoftmaxLoss Layer takes two blobs as input.";
  19   CHECK_EQ(top->size(), 0) << "SoftmaxLoss Layer takes no blob as output.";
  20   softmax_bottom_vec_.clear();
  21   softmax_bottom_vec_.push_back(bottom[0]);
  22   softmax_top_vec_.push_back(&prob_);
  23   softmax_layer_->SetUp(softmax_bottom_vec_, &softmax_top_vec_);
  24 }
  25
  26 template <typename Dtype>
  27 Dtype SoftmaxWithLossLayer<Dtype>::Forward_cpu(
  28     const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {
  29   // The forward pass computes the softmax prob values.
  30   softmax_bottom_vec_[0] = bottom[0];
  31   softmax_layer_->Forward(softmax_bottom_vec_, &softmax_top_vec_);
  32   const Dtype* prob_data = prob_.cpu_data();
  33   const Dtype* label = bottom[1]->cpu_data();
  34   int num = prob_.num();
  35   int dim = prob_.count() / num;
  36   Dtype loss = 0;
  37   for (int i = 0; i < num; ++i) {
  38     loss += -log(max(prob_data[i * dim + static_cast<int>(label[i])],
  39                      Dtype(FLT_MIN)));
  40   }
  41   return loss / num;
  42 }
  43
  44 template <typename Dtype>
  45 void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
  46     const bool propagate_down,
  47     vector<Blob<Dtype>*>* bottom) {
  48   // Compute the diff
  49   Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
  50   const Dtype* prob_data = prob_.cpu_data();
  51   memcpy(bottom_diff, prob_data, sizeof(Dtype) * prob_.count());
  52   const Dtype* label = (*bottom)[1]->cpu_data();
  53   int num = prob_.num();
  54   int dim = prob_.count() / num;
  55   for (int i = 0; i < num; ++i) {
  56     bottom_diff[i * dim + static_cast<int>(label[i])] -= 1;
  57   }
  58   // Scale down gradient
  59   caffe_scal(prob_.count(), Dtype(1) / num, bottom_diff);
  60 }
  61
  62
  63 INSTANTIATE_CLASS(SoftmaxWithLossLayer);
  64
  65
  66 }  // namespace caffe