MVNLayer fixes.
authorJ Yegerlehner <jyegerlehner@yahoo.com>
Sun, 23 Aug 2015 23:57:16 +0000 (18:57 -0500)
committerJ Yegerlehner <jyegerlehner@yahoo.com>
Wed, 26 Aug 2015 06:11:16 +0000 (01:11 -0500)
Fix the MVNLayer tests so they actually test what they claim.

MVNLayer fixes: sum_multiplier_ sized correctly; backward gradient calculation.

Gradient calculation per analysis of seanbell, found here:
https://github.com/BVLC/caffe/issues/1938

Fixes according to review comments.

src/caffe/layers/mvn_layer.cpp
src/caffe/layers/mvn_layer.cu
src/caffe/test/test_mvn_layer.cpp

index 3e79bdd..325691b 100644 (file)
@@ -18,8 +18,12 @@ void MVNLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
       1, 1);
   temp_.Reshape(bottom[0]->num(), bottom[0]->channels(),
       bottom[0]->height(), bottom[0]->width());
-  sum_multiplier_.Reshape(1, 1,
-      bottom[0]->height(), bottom[0]->width());
+  if ( this->layer_param_.mvn_param().across_channels() ) {
+    sum_multiplier_.Reshape(1, bottom[0]->channels(), bottom[0]->height(),
+                            bottom[0]->width());
+  } else {
+    sum_multiplier_.Reshape(1, 1, bottom[0]->height(), bottom[0]->width());
+  }
   Dtype* multiplier_data = sum_multiplier_.mutable_cpu_data();
   caffe_set(sum_multiplier_.count(), Dtype(1), multiplier_data);
   eps_ = this->layer_param_.mvn_param().eps();
@@ -130,7 +134,12 @@ void MVNLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
 
     caffe_div(temp_.count(), bottom_diff, temp_.cpu_data(), bottom_diff);
   } else {
-    caffe_copy(temp_.count(), top_diff, bottom_diff);
+    caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, top_diff,
+      sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());
+    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
+      mean_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
+      temp_.mutable_cpu_data());
+    caffe_add(temp_.count(), top_diff, temp_.cpu_data(), bottom_diff);
   }
 }
 
index 3888a0c..d86a2e7 100644 (file)
@@ -113,7 +113,12 @@ void MVNLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
 
     caffe_gpu_div(temp_.count(), bottom_diff, temp_.gpu_data(), bottom_diff);
   } else {
-    caffe_copy(temp_.count(), top_diff, bottom_diff);
+    caffe_gpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, top_diff,
+            sum_multiplier_.gpu_data(), 0., mean_.mutable_gpu_data());
+    caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
+            mean_.gpu_data(), sum_multiplier_.gpu_data(), 0.,
+            temp_.mutable_gpu_data());
+    caffe_gpu_add(temp_.count(), top_diff, temp_.gpu_data(), bottom_diff);
   }
 }
 
index 933b432..be23d86 100644 (file)
@@ -6,6 +6,7 @@
 #include "caffe/common.hpp"
 #include "caffe/common_layers.hpp"
 #include "caffe/filler.hpp"
+#include "google/protobuf/text_format.h"
 #include "gtest/gtest.h"
 
 #include "caffe/test/test_caffe_main.hpp"
@@ -73,7 +74,8 @@ TYPED_TEST(MVNLayerTest, TestForward) {
 TYPED_TEST(MVNLayerTest, TestForwardMeanOnly) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
-  layer_param.ParseFromString("mvn_param{normalize_variance: false}");
+  CHECK(google::protobuf::TextFormat::ParseFromString(
+      "mvn_param{normalize_variance: false}", &layer_param));
   MVNLayer<Dtype> layer(layer_param);
   layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
   layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
@@ -105,7 +107,8 @@ TYPED_TEST(MVNLayerTest, TestForwardMeanOnly) {
 TYPED_TEST(MVNLayerTest, TestForwardAcrossChannels) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
-  layer_param.ParseFromString("mvn_param{across_channels: true}");
+  CHECK(google::protobuf::TextFormat::ParseFromString(
+      "mvn_param{across_channels: true}", &layer_param));
   MVNLayer<Dtype> layer(layer_param);
   layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
   layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
@@ -149,7 +152,8 @@ TYPED_TEST(MVNLayerTest, TestGradient) {
 TYPED_TEST(MVNLayerTest, TestGradientMeanOnly) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
-  layer_param.ParseFromString("mvn_param{normalize_variance: false}");
+  CHECK(google::protobuf::TextFormat::ParseFromString(
+      "mvn_param{normalize_variance: false}", &layer_param));
   MVNLayer<Dtype> layer(layer_param);
   GradientChecker<Dtype> checker(1e-2, 1e-3);
   checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
@@ -159,7 +163,8 @@ TYPED_TEST(MVNLayerTest, TestGradientMeanOnly) {
 TYPED_TEST(MVNLayerTest, TestGradientAcrossChannels) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
-  layer_param.ParseFromString("mvn_param{across_channels: true}");
+  CHECK(google::protobuf::TextFormat::ParseFromString(
+      "mvn_param{across_channels: true}", &layer_param));
   MVNLayer<Dtype> layer(layer_param);
   GradientChecker<Dtype> checker(1e-2, 1e-3);
   checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,