mean-variance normalization layer
authorqipeng <pengrobertqi@163.com>
Mon, 4 Aug 2014 01:19:58 +0000 (18:19 -0700)
committerqipeng <pengrobertqi@163.com>
Mon, 11 Aug 2014 16:53:22 +0000 (09:53 -0700)
include/caffe/vision_layers.hpp
src/caffe/layers/mvn_layer.cpp [new file with mode: 0644]
src/caffe/layers/mvn_layer.cu [new file with mode: 0644]
src/caffe/proto/caffe.proto
src/caffe/test/test_mvn_layer.cpp [new file with mode: 0644]

index 0797065..a6844cd 100644 (file)
@@ -234,6 +234,39 @@ class LRNLayer : public Layer<Dtype> {
   vector<Blob<Dtype>*> product_bottom_vec_;
 };
 
+/* MVNLayer
+ */
+template <typename Dtype>
+class MVNLayer : public Layer<Dtype> {
+ public:
+  explicit MVNLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
+  virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
+
+  virtual inline LayerParameter_LayerType type() const {
+    return LayerParameter_LayerType_MVN;
+  }
+  virtual inline int ExactNumBottomBlobs() const { return 1; }
+  virtual inline int ExactNumTopBlobs() const { return 1; }
+
+ protected:
+  virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
+  virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+     const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
+
+  Blob<Dtype> mean_, variance_, temp_;
+
+  // sum_multiplier is just used to carry out sum using blas
+  Blob<Dtype> sum_multiplier_;
+};
+
+
 /* PoolingLayer
 */
 template <typename Dtype>
diff --git a/src/caffe/layers/mvn_layer.cpp b/src/caffe/layers/mvn_layer.cpp
new file mode 100644 (file)
index 0000000..7e6a97c
--- /dev/null
@@ -0,0 +1,151 @@
+// Copyright 2014 BVLC and contributors.
+//
+#include <algorithm>
+#include <vector>
+
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void MVNLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top) {
+  Layer<Dtype>::SetUp(bottom, top);
+  (*top)[0]->Reshape(bottom[0]->num(), bottom[0]->channels(),
+      bottom[0]->height(), bottom[0]->width());
+  mean_.Reshape(bottom[0]->num(), bottom[0]->channels(),
+      1, 1);
+  variance_.Reshape(bottom[0]->num(), bottom[0]->channels(),
+      1, 1);
+  temp_.Reshape(bottom[0]->num(), bottom[0]->channels(),
+      bottom[0]->height(), bottom[0]->width());
+  sum_multiplier_.Reshape(1, 1,
+      bottom[0]->height(), bottom[0]->width());
+  Dtype* multiplier_data = sum_multiplier_.mutable_cpu_data();
+  for (int i = 0; i < sum_multiplier_.count(); ++i) {
+    multiplier_data[i] = 1.;
+  }
+}
+
+template <typename Dtype>
+Dtype MVNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+    vector<Blob<Dtype>*>* top) {
+  const Dtype* bottom_data = bottom[0]->cpu_data();
+  Dtype* top_data = (*top)[0]->mutable_cpu_data();
+  int num = bottom[0]->num() * bottom[0]->channels();
+  int dim = bottom[0]->count() / num;
+  Dtype eps = 1e-10;
+
+  // put the squares of bottom into temp_
+  caffe_powx(bottom[0]->count(), bottom_data, Dtype(2),
+      temp_.mutable_cpu_data());
+
+  // computes variance using var(X) = E(X^2) - (EX)^2
+  caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., bottom_data,
+      sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());
+  caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., temp_.cpu_data(),
+      sum_multiplier_.cpu_data(), 0., variance_.mutable_cpu_data());
+  caffe_cpu_scale(mean_.count(), Dtype(1. / dim), mean_.cpu_data(),
+      mean_.mutable_cpu_data());  // EX
+  caffe_cpu_scale(mean_.count(), Dtype(1. / dim), variance_.cpu_data(),
+      variance_.mutable_cpu_data());  // E(X^2)
+  caffe_powx(mean_.count(), mean_.cpu_data(), Dtype(2),
+      temp_.mutable_cpu_data());  // (EX)^2
+  caffe_sub(mean_.count(), variance_.cpu_data(), temp_.cpu_data(),
+      variance_.mutable_cpu_data());  // variance
+
+  // do mean and variance normalization
+  // subtract mean
+  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
+          mean_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
+          temp_.mutable_cpu_data());
+
+  caffe_add(temp_.count(), bottom_data, temp_.cpu_data(), top_data);
+
+  // normalize variance
+  caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5),
+        variance_.mutable_cpu_data());
+
+  caffe_add_scalar(variance_.count(), eps, variance_.mutable_cpu_data());
+
+  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
+        variance_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
+        temp_.mutable_cpu_data());
+
+  caffe_div(temp_.count(), top_data, temp_.cpu_data(), top_data);
+
+  return Dtype(0);
+}
+
+template <typename Dtype>
+void MVNLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down,
+    vector<Blob<Dtype>*>* bottom) {
+  const Dtype* top_diff = top[0]->cpu_diff();
+  const Dtype* top_data = top[0]->cpu_data();
+  const Dtype* bottom_data = (*bottom)[0]->cpu_data();
+  Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
+
+  int num = (*bottom)[0]->num() * (*bottom)[0]->channels();
+  int dim = (*bottom)[0]->count() / num;
+  Dtype eps = 1e-10;
+
+  caffe_mul(temp_.count(), top_data, top_diff, bottom_diff);
+  caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., bottom_diff,
+        sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());
+  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
+        mean_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
+        bottom_diff);
+  caffe_mul(temp_.count(), top_data, bottom_diff, bottom_diff);
+
+  caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., top_diff,
+          sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());
+  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
+          mean_.cpu_data(), sum_multiplier_.cpu_data(), 1.,
+          bottom_diff);
+
+  caffe_cpu_axpby(temp_.count(), Dtype(1), top_diff, Dtype(-1. / dim),
+      bottom_diff);
+
+  // put the squares of bottom into temp_
+  caffe_powx(temp_.count(), bottom_data, Dtype(2),
+      temp_.mutable_cpu_data());
+
+  // computes variance using var(X) = E(X^2) - (EX)^2
+  caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., bottom_data,
+      sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());
+  caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., temp_.cpu_data(),
+      sum_multiplier_.cpu_data(), 0., variance_.mutable_cpu_data());
+  caffe_cpu_scale(mean_.count(), Dtype(1. / dim), mean_.cpu_data(),
+      mean_.mutable_cpu_data());  // EX
+  caffe_cpu_scale(mean_.count(), Dtype(1. / dim), variance_.cpu_data(),
+      variance_.mutable_cpu_data());  // E(X^2)
+  caffe_powx(mean_.count(), mean_.cpu_data(), Dtype(2),
+      temp_.mutable_cpu_data());  // (EX)^2
+  caffe_sub(mean_.count(), variance_.cpu_data(), temp_.cpu_data(),
+      variance_.mutable_cpu_data());  // variance
+
+  // normalize variance
+  caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5),
+        variance_.mutable_cpu_data());
+
+  caffe_add_scalar(variance_.count(), eps, variance_.mutable_cpu_data());
+
+  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
+      variance_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
+      temp_.mutable_cpu_data());
+
+  caffe_div(temp_.count(), bottom_diff, temp_.cpu_data(), bottom_diff);
+}
+
+
+#ifdef CPU_ONLY
+STUB_GPU(MVNLayer);
+#endif
+
+INSTANTIATE_CLASS(MVNLayer);
+
+
+}  // namespace caffe
diff --git a/src/caffe/layers/mvn_layer.cu b/src/caffe/layers/mvn_layer.cu
new file mode 100644 (file)
index 0000000..3714d49
--- /dev/null
@@ -0,0 +1,129 @@
+// Copyright 2014 BVLC and contributors.
+//
+#include <algorithm>
+#include <vector>
+
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+Dtype MVNLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+    vector<Blob<Dtype>*>* top) {
+  const Dtype* bottom_data = bottom[0]->gpu_data();
+  Dtype* top_data = (*top)[0]->mutable_gpu_data();
+  int num = bottom[0]->num() * bottom[0]->channels();
+  int dim = bottom[0]->count() / num;
+
+  // put the squares of bottom into temp_
+  caffe_gpu_powx(bottom[0]->count(), bottom_data, Dtype(2),
+      temp_.mutable_gpu_data());
+
+  // computes variance using var(X) = E(X^2) - (EX)^2
+  caffe_gpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., bottom_data,
+      sum_multiplier_.gpu_data(), 0., mean_.mutable_gpu_data());
+  caffe_gpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., temp_.gpu_data(),
+      sum_multiplier_.gpu_data(), 0., variance_.mutable_gpu_data());
+  caffe_gpu_scale(mean_.count(), Dtype(1. / dim), mean_.gpu_data(),
+      mean_.mutable_gpu_data());  // EX
+  caffe_gpu_scale(mean_.count(), Dtype(1. / dim), variance_.gpu_data(),
+      variance_.mutable_gpu_data());  // E(X^2)
+  caffe_gpu_powx(mean_.count(), mean_.gpu_data(), Dtype(2),
+      temp_.mutable_gpu_data());  // (EX)^2
+  caffe_gpu_sub(mean_.count(), variance_.gpu_data(), temp_.gpu_data(),
+      variance_.mutable_gpu_data());  // variance
+
+  Dtype eps = 1e-10;
+
+  // do mean and variance normalization
+  // subtract mean
+  caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
+          mean_.gpu_data(), sum_multiplier_.gpu_data(), 0.,
+          temp_.mutable_gpu_data());
+
+  caffe_gpu_add(temp_.count(), bottom_data, temp_.gpu_data(), top_data);
+
+  // normalize variance
+  caffe_gpu_powx(variance_.count(), variance_.gpu_data(), Dtype(0.5),
+        variance_.mutable_gpu_data());
+
+  caffe_gpu_add_scalar(variance_.count(), eps, variance_.mutable_gpu_data());
+
+  caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
+        variance_.gpu_data(), sum_multiplier_.gpu_data(), 0.,
+        temp_.mutable_gpu_data());
+
+  caffe_gpu_div(temp_.count(), top_data, temp_.gpu_data(), top_data);
+
+  return Dtype(0);
+}
+
+template <typename Dtype>
+void MVNLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down,
+    vector<Blob<Dtype>*>* bottom) {
+  const Dtype* top_diff = top[0]->gpu_diff();
+  const Dtype* top_data = top[0]->gpu_data();
+  const Dtype* bottom_data = (*bottom)[0]->gpu_data();
+  Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
+
+  int num = (*bottom)[0]->num() * (*bottom)[0]->channels();
+  int dim = (*bottom)[0]->count() / num;
+
+  Dtype eps = 1e-10;
+
+  caffe_gpu_mul(temp_.count(), top_data, top_diff, bottom_diff);
+  caffe_gpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., bottom_diff,
+        sum_multiplier_.gpu_data(), 0., mean_.mutable_gpu_data());
+  caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
+        mean_.gpu_data(), sum_multiplier_.gpu_data(), 0.,
+        bottom_diff);
+  caffe_gpu_mul(temp_.count(), top_data, bottom_diff, bottom_diff);
+
+  caffe_gpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., top_diff,
+          sum_multiplier_.gpu_data(), 0., mean_.mutable_gpu_data());
+  caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
+          mean_.gpu_data(), sum_multiplier_.gpu_data(), 1.,
+          bottom_diff);
+
+  caffe_gpu_axpby(temp_.count(), Dtype(1), top_diff, Dtype(-1. / dim),
+      bottom_diff);
+
+  // put the squares of bottom into temp_
+  caffe_gpu_powx(temp_.count(), bottom_data, Dtype(2),
+      temp_.mutable_gpu_data());
+
+  // computes variance using var(X) = E(X^2) - (EX)^2
+  caffe_gpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., bottom_data,
+      sum_multiplier_.gpu_data(), 0., mean_.mutable_gpu_data());
+  caffe_gpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., temp_.gpu_data(),
+      sum_multiplier_.gpu_data(), 0., variance_.mutable_gpu_data());
+  caffe_gpu_scale(mean_.count(), Dtype(1. / dim), mean_.gpu_data(),
+      mean_.mutable_gpu_data());  // EX
+  caffe_gpu_scale(mean_.count(), Dtype(1. / dim), variance_.gpu_data(),
+      variance_.mutable_gpu_data());  // E(X^2)
+  caffe_gpu_powx(mean_.count(), mean_.gpu_data(), Dtype(2),
+      temp_.mutable_gpu_data());  // (EX)^2
+  caffe_gpu_sub(mean_.count(), variance_.gpu_data(), temp_.gpu_data(),
+      variance_.mutable_gpu_data());  // variance
+
+  // normalize variance
+  caffe_gpu_powx(variance_.count(), variance_.gpu_data(), Dtype(0.5),
+        variance_.mutable_gpu_data());
+
+  caffe_gpu_add_scalar(variance_.count(), eps, variance_.mutable_gpu_data());
+
+  caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
+      variance_.gpu_data(), sum_multiplier_.gpu_data(), 0.,
+      temp_.mutable_gpu_data());
+
+  caffe_gpu_div(temp_.count(), bottom_diff, temp_.gpu_data(), bottom_diff);
+}
+
+
+INSTANTIATE_CLASS(MVNLayer);
+
+
+}  // namespace caffe
index 44bfb63..fce5694 100644 (file)
@@ -201,7 +201,7 @@ message LayerParameter {
   // line above the enum. Update the next available ID when you add a new
   // LayerType.
   //
-  // LayerType next available ID: 34 (last added: SLICE)
+  // LayerType next available ID: 35 (last added: MVN)
   enum LayerType {
     // "NONE" layer type is 0th enum element so that we don't cause confusion
     // by defaulting to an existent LayerType (instead, should usually error if
@@ -228,6 +228,7 @@ message LayerParameter {
     LRN = 15;
     MEMORY_DATA = 29;
     MULTINOMIAL_LOGISTIC_LOSS = 16;
+    MVN = 34;
     POOLING = 17;
     POWER = 26;
     RELU = 18;
diff --git a/src/caffe/test/test_mvn_layer.cpp b/src/caffe/test/test_mvn_layer.cpp
new file mode 100644 (file)
index 0000000..d5bea3f
--- /dev/null
@@ -0,0 +1,85 @@
+// Copyright 2014 BVLC and contributors.
+
+#include <cmath>
+#include <cstring>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/test/test_gradient_check_util.hpp"
+
+#include "caffe/test/test_caffe_main.hpp"
+
+namespace caffe {
+
+template <typename TypeParam>
+class MVNLayerTest : public MultiDeviceTest<TypeParam> {
+  typedef typename TypeParam::Dtype Dtype;
+ protected:
+  MVNLayerTest()
+      : blob_bottom_(new Blob<Dtype>(2, 3, 4, 5)),
+        blob_top_(new Blob<Dtype>()) {
+    // fill the values
+    FillerParameter filler_param;
+    GaussianFiller<Dtype> filler(filler_param);
+    filler.Fill(this->blob_bottom_);
+    blob_bottom_vec_.push_back(blob_bottom_);
+    blob_top_vec_.push_back(blob_top_);
+  }
+  virtual ~MVNLayerTest() { delete blob_bottom_; delete blob_top_; }
+  Blob<Dtype>* const blob_bottom_;
+  Blob<Dtype>* const blob_top_;
+  vector<Blob<Dtype>*> blob_bottom_vec_;
+  vector<Blob<Dtype>*> blob_top_vec_;
+};
+
+TYPED_TEST_CASE(MVNLayerTest, TestDtypesAndDevices);
+
+TYPED_TEST(MVNLayerTest, TestForward) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  MVNLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  // Test mean
+  int num = this->blob_bottom_->num();
+  int channels = this->blob_bottom_->channels();
+  int height = this->blob_bottom_->height();
+  int width = this->blob_bottom_->width();
+
+  for (int i = 0; i < num; ++i) {
+    for (int j = 0; j < channels; ++j) {
+      Dtype sum = 0, var = 0;
+      for (int k = 0; k < height; ++k) {
+        for (int l = 0; l < width; ++l) {
+          Dtype data = this->blob_top_->data_at(i, j, k, l);
+          sum += data;
+          var += data * data;
+        }
+      }
+      sum /= height * width;
+      var /= height * width;
+
+      // expect zero mean
+      EXPECT_GE(sum, -0.001);
+      EXPECT_LE(sum, 0.001);
+      // expect unit variance
+      EXPECT_GE(var, 0.999);
+      EXPECT_LE(var, 1.001);
+    }
+  }
+}
+
+TYPED_TEST(MVNLayerTest, TestGradient) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  MVNLayer<Dtype> layer(layer_param);
+  GradientChecker<Dtype> checker(1e-2, 1e-3);
+  checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
+      &(this->blob_top_vec_));
+}
+
+}  // namespace caffe