"SigmoidCrossEntropyLoss Layer takes two blobs as input.";
CHECK_EQ(top->size(), 0) <<
"SigmoidCrossEntropyLoss Layer takes no blob as output.";
+ CHECK_EQ(bottom[0]->count(), bottom[1]->count()) <<
+ "SigmoidCrossEntropyLoss Layer inputs must have same count.";
+ CHECK_EQ(bottom[0]->num(), bottom[1]->num()) <<
+ "SigmoidCrossEntropyLoss Layer inputs must have same num.";
sigmoid_bottom_vec_[0] = bottom[0];
sigmoid_layer_->Forward(sigmoid_bottom_vec_, &sigmoid_top_vec_);
// Compute the loss (negative log likelihood)
- int count = bottom[0]->count();
- int num = bottom[0]->num();
+ const int count = bottom[0]->count();
+ const int num = bottom[0]->num();
// Stable version of loss computation from input data
const Dtype* input_data = bottom[0]->cpu_data();
- const Dtype* ground_truth = bottom[1]->cpu_data();
+ const Dtype* target = bottom[1]->cpu_data();
Dtype loss = 0;
for (int i = 0; i < count; ++i) {
- loss -= input_data[i] * (ground_truth[i] - (input_data[i] >= 0)) -
+ loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) -
log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0)));
return loss / num;
const vector<Blob<Dtype>*>& top, const bool propagate_down,
vector<Blob<Dtype>*>* bottom) {
// First, compute the diff
- int count = (*bottom)[0]->count();
- int num = (*bottom)[0]->num();
+ const int count = (*bottom)[0]->count();
+ const int num = (*bottom)[0]->num();
const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data();
- const Dtype* ground_truth = (*bottom)[1]->cpu_data();
+ const Dtype* target = (*bottom)[1]->cpu_data();
Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
- caffe_sub(count, sigmoid_output_data, ground_truth, bottom_diff);
+ caffe_sub(count, sigmoid_output_data, target, bottom_diff);
// Scale down gradient
caffe_scal(count, Dtype(1) / num, bottom_diff);
sigmoid_bottom_vec_[0] = bottom[0];
sigmoid_layer_->Forward(sigmoid_bottom_vec_, &sigmoid_top_vec_);
// Compute the loss (negative log likelihood)
- int count = bottom[0]->count();
- int num = bottom[0]->num();
+ const int count = bottom[0]->count();
+ const int num = bottom[0]->num();
// Stable version of loss computation from input data
const Dtype* input_data = bottom[0]->cpu_data();
- const Dtype* ground_truth = bottom[1]->cpu_data();
+ const Dtype* target = bottom[1]->cpu_data();
Dtype loss = 0;
for (int i = 0; i < count; ++i) {
- loss -= input_data[i] * (ground_truth[i] - (input_data[i] >= 0)) -
+ loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) -
log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0)));
return loss / num;
const vector<Blob<Dtype>*>& top, const bool propagate_down,
vector<Blob<Dtype>*>* bottom) {
// First, compute the diff
- int count = (*bottom)[0]->count();
- int num = (*bottom)[0]->num();
+ const int count = (*bottom)[0]->count();
+ const int num = (*bottom)[0]->num();
const Dtype* sigmoid_output_data = sigmoid_output_->gpu_data();
- const Dtype* ground_truth = (*bottom)[1]->gpu_data();
+ const Dtype* target = (*bottom)[1]->gpu_data();
Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
caffe_gpu_copy(count, sigmoid_output_data, bottom_diff);
- caffe_gpu_axpy(count, Dtype(-1), ground_truth, bottom_diff);
+ caffe_gpu_axpy(count, Dtype(-1), target, bottom_diff);
// Scale down gradient
caffe_gpu_scal(count, Dtype(1) / num, bottom_diff);
blob_bottom_targets_(new Blob<Dtype>(10, 5, 1, 1)) {
// Fill the data vector
FillerParameter data_filler_param;
- data_filler_param.set_std(10);
+ data_filler_param.set_std(1);
GaussianFiller<Dtype> data_filler(data_filler_param);
// Fill the targets vector
FillerParameter targets_filler_param;
- targets_filler_param.set_min(0.0);
- targets_filler_param.set_max(1.0);
+ targets_filler_param.set_min(0);
+ targets_filler_param.set_max(1);
UniformFiller<Dtype> targets_filler(targets_filler_param);
delete blob_bottom_data_;
delete blob_bottom_targets_;
+ Dtype SigmoidCrossEntropyLossReference(const int count, const int num,
+ const Dtype* input,
+ const Dtype* target) {
+ Dtype loss = 0;
+ for (int i = 0; i < count; ++i) {
+ const Dtype prediction = 1 / (1 + exp(-input[i]));
+ EXPECT_LE(prediction, 1);
+ EXPECT_GE(prediction, 0);
+ EXPECT_LE(target[i], 1);
+ EXPECT_GE(target[i], 0);
+ loss -= target[i] * log(prediction + (target[i] == Dtype(0)));
+ loss -= (1 - target[i]) * log(1 - prediction + (target[i] == Dtype(1)));
+ }
+ return loss / num;
+ }
+ void TestForward() {
+ LayerParameter layer_param;
+ FillerParameter data_filler_param;
+ data_filler_param.set_std(1);
+ GaussianFiller<Dtype> data_filler(data_filler_param);
+ FillerParameter targets_filler_param;
+ targets_filler_param.set_min(0.0);
+ targets_filler_param.set_max(1.0);
+ UniformFiller<Dtype> targets_filler(targets_filler_param);
+ Dtype eps = 2e-2;
+ int num_inf = 0;
+ for (int i = 0; i < 100; ++i) {
+ // Fill the data vector
+ data_filler.Fill(this->blob_bottom_data_);
+ // Fill the targets vector
+ targets_filler.Fill(this->blob_bottom_targets_);
+ SigmoidCrossEntropyLossLayer<Dtype> layer(layer_param);
+ layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+ Dtype layer_loss =
+ layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+ const int count = this->blob_bottom_data_->count();
+ const int num = this->blob_bottom_data_->num();
+ const Dtype* blob_bottom_data = this->blob_bottom_data_->cpu_data();
+ const Dtype* blob_bottom_targets =
+ this->blob_bottom_targets_->cpu_data();
+ Dtype reference_loss = this->SigmoidCrossEntropyLossReference(
+ count, num, blob_bottom_data, blob_bottom_targets);
+ EXPECT_NEAR(reference_loss, layer_loss, eps) << "debug: trial #" << i;
+ }
+ }
Blob<Dtype>* const blob_bottom_data_;
Blob<Dtype>* const blob_bottom_targets_;
vector<Blob<Dtype>*> blob_bottom_vec_;
TYPED_TEST_CASE(SigmoidCrossEntropyLossLayerTest, Dtypes);
+TYPED_TEST(SigmoidCrossEntropyLossLayerTest, TestSigmoidCrossEntropyLossCPU) {
+ Caffe::set_mode(Caffe::CPU);
+ this->TestForward();
+TYPED_TEST(SigmoidCrossEntropyLossLayerTest, TestSigmoidCrossEntropyLossGPU) {
+ Caffe::set_mode(Caffe::GPU);
+ this->TestForward();
TYPED_TEST(SigmoidCrossEntropyLossLayerTest, TestGradientCPU) {
LayerParameter layer_param;