From b04aa0036979d0c34ef96c150abb34e21bdf1a7b Mon Sep 17 00:00:00 2001 From: qipeng Date: Sun, 3 Aug 2014 18:19:58 -0700 Subject: [PATCH] mean-variance normalization layer --- include/caffe/vision_layers.hpp | 33 +++++++++ src/caffe/layers/mvn_layer.cpp | 151 ++++++++++++++++++++++++++++++++++++++ src/caffe/layers/mvn_layer.cu | 129 ++++++++++++++++++++++++++++++++ src/caffe/proto/caffe.proto | 3 +- src/caffe/test/test_mvn_layer.cpp | 85 +++++++++++++++++++++ 5 files changed, 400 insertions(+), 1 deletion(-) create mode 100644 src/caffe/layers/mvn_layer.cpp create mode 100644 src/caffe/layers/mvn_layer.cu create mode 100644 src/caffe/test/test_mvn_layer.cpp diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp index 0797065..a6844cd 100644 --- a/include/caffe/vision_layers.hpp +++ b/include/caffe/vision_layers.hpp @@ -234,6 +234,39 @@ class LRNLayer : public Layer { vector*> product_bottom_vec_; }; +/* MVNLayer + */ +template +class MVNLayer : public Layer { + public: + explicit MVNLayer(const LayerParameter& param) + : Layer(param) {} + virtual void SetUp(const vector*>& bottom, + vector*>* top); + + virtual inline LayerParameter_LayerType type() const { + return LayerParameter_LayerType_MVN; + } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + virtual Dtype Forward_cpu(const vector*>& bottom, + vector*>* top); + virtual Dtype Forward_gpu(const vector*>& bottom, + vector*>* top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, vector*>* bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, vector*>* bottom); + + Blob mean_, variance_, temp_; + + // sum_multiplier is just used to carry out sum using blas + Blob sum_multiplier_; +}; + + /* PoolingLayer */ template diff --git a/src/caffe/layers/mvn_layer.cpp b/src/caffe/layers/mvn_layer.cpp new file mode 100644 index 0000000..7e6a97c --- /dev/null +++ b/src/caffe/layers/mvn_layer.cpp @@ -0,0 +1,151 @@ +// Copyright 2014 BVLC and contributors. +// +#include +#include + +#include "caffe/layer.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +void MVNLayer::SetUp(const vector*>& bottom, + vector*>* top) { + Layer::SetUp(bottom, top); + (*top)[0]->Reshape(bottom[0]->num(), bottom[0]->channels(), + bottom[0]->height(), bottom[0]->width()); + mean_.Reshape(bottom[0]->num(), bottom[0]->channels(), + 1, 1); + variance_.Reshape(bottom[0]->num(), bottom[0]->channels(), + 1, 1); + temp_.Reshape(bottom[0]->num(), bottom[0]->channels(), + bottom[0]->height(), bottom[0]->width()); + sum_multiplier_.Reshape(1, 1, + bottom[0]->height(), bottom[0]->width()); + Dtype* multiplier_data = sum_multiplier_.mutable_cpu_data(); + for (int i = 0; i < sum_multiplier_.count(); ++i) { + multiplier_data[i] = 1.; + } +} + +template +Dtype MVNLayer::Forward_cpu(const vector*>& bottom, + vector*>* top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + Dtype* top_data = (*top)[0]->mutable_cpu_data(); + int num = bottom[0]->num() * bottom[0]->channels(); + int dim = bottom[0]->count() / num; + Dtype eps = 1e-10; + + // put the squares of bottom into temp_ + caffe_powx(bottom[0]->count(), bottom_data, Dtype(2), + temp_.mutable_cpu_data()); + + // computes variance using var(X) = E(X^2) - (EX)^2 + caffe_cpu_gemv(CblasNoTrans, num, dim, 1., bottom_data, + sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); + caffe_cpu_gemv(CblasNoTrans, num, dim, 1., temp_.cpu_data(), + sum_multiplier_.cpu_data(), 0., variance_.mutable_cpu_data()); + caffe_cpu_scale(mean_.count(), Dtype(1. / dim), mean_.cpu_data(), + mean_.mutable_cpu_data()); // EX + caffe_cpu_scale(mean_.count(), Dtype(1. / dim), variance_.cpu_data(), + variance_.mutable_cpu_data()); // E(X^2) + caffe_powx(mean_.count(), mean_.cpu_data(), Dtype(2), + temp_.mutable_cpu_data()); // (EX)^2 + caffe_sub(mean_.count(), variance_.cpu_data(), temp_.cpu_data(), + variance_.mutable_cpu_data()); // variance + + // do mean and variance normalization + // subtract mean + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., + mean_.cpu_data(), sum_multiplier_.cpu_data(), 0., + temp_.mutable_cpu_data()); + + caffe_add(temp_.count(), bottom_data, temp_.cpu_data(), top_data); + + // normalize variance + caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5), + variance_.mutable_cpu_data()); + + caffe_add_scalar(variance_.count(), eps, variance_.mutable_cpu_data()); + + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., + variance_.cpu_data(), sum_multiplier_.cpu_data(), 0., + temp_.mutable_cpu_data()); + + caffe_div(temp_.count(), top_data, temp_.cpu_data(), top_data); + + return Dtype(0); +} + +template +void MVNLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, + vector*>* bottom) { + const Dtype* top_diff = top[0]->cpu_diff(); + const Dtype* top_data = top[0]->cpu_data(); + const Dtype* bottom_data = (*bottom)[0]->cpu_data(); + Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); + + int num = (*bottom)[0]->num() * (*bottom)[0]->channels(); + int dim = (*bottom)[0]->count() / num; + Dtype eps = 1e-10; + + caffe_mul(temp_.count(), top_data, top_diff, bottom_diff); + caffe_cpu_gemv(CblasNoTrans, num, dim, 1., bottom_diff, + sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., + mean_.cpu_data(), sum_multiplier_.cpu_data(), 0., + bottom_diff); + caffe_mul(temp_.count(), top_data, bottom_diff, bottom_diff); + + caffe_cpu_gemv(CblasNoTrans, num, dim, 1., top_diff, + sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., + mean_.cpu_data(), sum_multiplier_.cpu_data(), 1., + bottom_diff); + + caffe_cpu_axpby(temp_.count(), Dtype(1), top_diff, Dtype(-1. / dim), + bottom_diff); + + // put the squares of bottom into temp_ + caffe_powx(temp_.count(), bottom_data, Dtype(2), + temp_.mutable_cpu_data()); + + // computes variance using var(X) = E(X^2) - (EX)^2 + caffe_cpu_gemv(CblasNoTrans, num, dim, 1., bottom_data, + sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); + caffe_cpu_gemv(CblasNoTrans, num, dim, 1., temp_.cpu_data(), + sum_multiplier_.cpu_data(), 0., variance_.mutable_cpu_data()); + caffe_cpu_scale(mean_.count(), Dtype(1. / dim), mean_.cpu_data(), + mean_.mutable_cpu_data()); // EX + caffe_cpu_scale(mean_.count(), Dtype(1. / dim), variance_.cpu_data(), + variance_.mutable_cpu_data()); // E(X^2) + caffe_powx(mean_.count(), mean_.cpu_data(), Dtype(2), + temp_.mutable_cpu_data()); // (EX)^2 + caffe_sub(mean_.count(), variance_.cpu_data(), temp_.cpu_data(), + variance_.mutable_cpu_data()); // variance + + // normalize variance + caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5), + variance_.mutable_cpu_data()); + + caffe_add_scalar(variance_.count(), eps, variance_.mutable_cpu_data()); + + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., + variance_.cpu_data(), sum_multiplier_.cpu_data(), 0., + temp_.mutable_cpu_data()); + + caffe_div(temp_.count(), bottom_diff, temp_.cpu_data(), bottom_diff); +} + + +#ifdef CPU_ONLY +STUB_GPU(MVNLayer); +#endif + +INSTANTIATE_CLASS(MVNLayer); + + +} // namespace caffe diff --git a/src/caffe/layers/mvn_layer.cu b/src/caffe/layers/mvn_layer.cu new file mode 100644 index 0000000..3714d49 --- /dev/null +++ b/src/caffe/layers/mvn_layer.cu @@ -0,0 +1,129 @@ +// Copyright 2014 BVLC and contributors. +// +#include +#include + +#include "caffe/layer.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +Dtype MVNLayer::Forward_gpu(const vector*>& bottom, + vector*>* top) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* top_data = (*top)[0]->mutable_gpu_data(); + int num = bottom[0]->num() * bottom[0]->channels(); + int dim = bottom[0]->count() / num; + + // put the squares of bottom into temp_ + caffe_gpu_powx(bottom[0]->count(), bottom_data, Dtype(2), + temp_.mutable_gpu_data()); + + // computes variance using var(X) = E(X^2) - (EX)^2 + caffe_gpu_gemv(CblasNoTrans, num, dim, 1., bottom_data, + sum_multiplier_.gpu_data(), 0., mean_.mutable_gpu_data()); + caffe_gpu_gemv(CblasNoTrans, num, dim, 1., temp_.gpu_data(), + sum_multiplier_.gpu_data(), 0., variance_.mutable_gpu_data()); + caffe_gpu_scale(mean_.count(), Dtype(1. / dim), mean_.gpu_data(), + mean_.mutable_gpu_data()); // EX + caffe_gpu_scale(mean_.count(), Dtype(1. / dim), variance_.gpu_data(), + variance_.mutable_gpu_data()); // E(X^2) + caffe_gpu_powx(mean_.count(), mean_.gpu_data(), Dtype(2), + temp_.mutable_gpu_data()); // (EX)^2 + caffe_gpu_sub(mean_.count(), variance_.gpu_data(), temp_.gpu_data(), + variance_.mutable_gpu_data()); // variance + + Dtype eps = 1e-10; + + // do mean and variance normalization + // subtract mean + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., + mean_.gpu_data(), sum_multiplier_.gpu_data(), 0., + temp_.mutable_gpu_data()); + + caffe_gpu_add(temp_.count(), bottom_data, temp_.gpu_data(), top_data); + + // normalize variance + caffe_gpu_powx(variance_.count(), variance_.gpu_data(), Dtype(0.5), + variance_.mutable_gpu_data()); + + caffe_gpu_add_scalar(variance_.count(), eps, variance_.mutable_gpu_data()); + + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., + variance_.gpu_data(), sum_multiplier_.gpu_data(), 0., + temp_.mutable_gpu_data()); + + caffe_gpu_div(temp_.count(), top_data, temp_.gpu_data(), top_data); + + return Dtype(0); +} + +template +void MVNLayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, + vector*>* bottom) { + const Dtype* top_diff = top[0]->gpu_diff(); + const Dtype* top_data = top[0]->gpu_data(); + const Dtype* bottom_data = (*bottom)[0]->gpu_data(); + Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff(); + + int num = (*bottom)[0]->num() * (*bottom)[0]->channels(); + int dim = (*bottom)[0]->count() / num; + + Dtype eps = 1e-10; + + caffe_gpu_mul(temp_.count(), top_data, top_diff, bottom_diff); + caffe_gpu_gemv(CblasNoTrans, num, dim, 1., bottom_diff, + sum_multiplier_.gpu_data(), 0., mean_.mutable_gpu_data()); + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., + mean_.gpu_data(), sum_multiplier_.gpu_data(), 0., + bottom_diff); + caffe_gpu_mul(temp_.count(), top_data, bottom_diff, bottom_diff); + + caffe_gpu_gemv(CblasNoTrans, num, dim, 1., top_diff, + sum_multiplier_.gpu_data(), 0., mean_.mutable_gpu_data()); + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., + mean_.gpu_data(), sum_multiplier_.gpu_data(), 1., + bottom_diff); + + caffe_gpu_axpby(temp_.count(), Dtype(1), top_diff, Dtype(-1. / dim), + bottom_diff); + + // put the squares of bottom into temp_ + caffe_gpu_powx(temp_.count(), bottom_data, Dtype(2), + temp_.mutable_gpu_data()); + + // computes variance using var(X) = E(X^2) - (EX)^2 + caffe_gpu_gemv(CblasNoTrans, num, dim, 1., bottom_data, + sum_multiplier_.gpu_data(), 0., mean_.mutable_gpu_data()); + caffe_gpu_gemv(CblasNoTrans, num, dim, 1., temp_.gpu_data(), + sum_multiplier_.gpu_data(), 0., variance_.mutable_gpu_data()); + caffe_gpu_scale(mean_.count(), Dtype(1. / dim), mean_.gpu_data(), + mean_.mutable_gpu_data()); // EX + caffe_gpu_scale(mean_.count(), Dtype(1. / dim), variance_.gpu_data(), + variance_.mutable_gpu_data()); // E(X^2) + caffe_gpu_powx(mean_.count(), mean_.gpu_data(), Dtype(2), + temp_.mutable_gpu_data()); // (EX)^2 + caffe_gpu_sub(mean_.count(), variance_.gpu_data(), temp_.gpu_data(), + variance_.mutable_gpu_data()); // variance + + // normalize variance + caffe_gpu_powx(variance_.count(), variance_.gpu_data(), Dtype(0.5), + variance_.mutable_gpu_data()); + + caffe_gpu_add_scalar(variance_.count(), eps, variance_.mutable_gpu_data()); + + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., + variance_.gpu_data(), sum_multiplier_.gpu_data(), 0., + temp_.mutable_gpu_data()); + + caffe_gpu_div(temp_.count(), bottom_diff, temp_.gpu_data(), bottom_diff); +} + + +INSTANTIATE_CLASS(MVNLayer); + + +} // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 44bfb63..fce5694 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -201,7 +201,7 @@ message LayerParameter { // line above the enum. Update the next available ID when you add a new // LayerType. // - // LayerType next available ID: 34 (last added: SLICE) + // LayerType next available ID: 35 (last added: MVN) enum LayerType { // "NONE" layer type is 0th enum element so that we don't cause confusion // by defaulting to an existent LayerType (instead, should usually error if @@ -228,6 +228,7 @@ message LayerParameter { LRN = 15; MEMORY_DATA = 29; MULTINOMIAL_LOGISTIC_LOSS = 16; + MVN = 34; POOLING = 17; POWER = 26; RELU = 18; diff --git a/src/caffe/test/test_mvn_layer.cpp b/src/caffe/test/test_mvn_layer.cpp new file mode 100644 index 0000000..d5bea3f --- /dev/null +++ b/src/caffe/test/test_mvn_layer.cpp @@ -0,0 +1,85 @@ +// Copyright 2014 BVLC and contributors. + +#include +#include +#include + +#include "gtest/gtest.h" +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +#include "caffe/test/test_caffe_main.hpp" + +namespace caffe { + +template +class MVNLayerTest : public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + protected: + MVNLayerTest() + : blob_bottom_(new Blob(2, 3, 4, 5)), + blob_top_(new Blob()) { + // fill the values + FillerParameter filler_param; + GaussianFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + blob_bottom_vec_.push_back(blob_bottom_); + blob_top_vec_.push_back(blob_top_); + } + virtual ~MVNLayerTest() { delete blob_bottom_; delete blob_top_; } + Blob* const blob_bottom_; + Blob* const blob_top_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +TYPED_TEST_CASE(MVNLayerTest, TestDtypesAndDevices); + +TYPED_TEST(MVNLayerTest, TestForward) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + MVNLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_)); + layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_)); + // Test mean + int num = this->blob_bottom_->num(); + int channels = this->blob_bottom_->channels(); + int height = this->blob_bottom_->height(); + int width = this->blob_bottom_->width(); + + for (int i = 0; i < num; ++i) { + for (int j = 0; j < channels; ++j) { + Dtype sum = 0, var = 0; + for (int k = 0; k < height; ++k) { + for (int l = 0; l < width; ++l) { + Dtype data = this->blob_top_->data_at(i, j, k, l); + sum += data; + var += data * data; + } + } + sum /= height * width; + var /= height * width; + + // expect zero mean + EXPECT_GE(sum, -0.001); + EXPECT_LE(sum, 0.001); + // expect unit variance + EXPECT_GE(var, 0.999); + EXPECT_LE(var, 1.001); + } + } +} + +TYPED_TEST(MVNLayerTest, TestGradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + MVNLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + &(this->blob_top_vec_)); +} + +} // namespace caffe -- 2.7.4