int count_;
};
+/* MVNLayer
+ */
+template <typename Dtype>
+class MVNLayer : public Layer<Dtype> {
+ public:
+ explicit MVNLayer(const LayerParameter& param)
+ : Layer<Dtype>(param) {}
+ virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top);
+
+ virtual inline LayerParameter_LayerType type() const {
+ return LayerParameter_LayerType_MVN;
+ }
+ virtual inline int ExactNumBottomBlobs() const { return 1; }
+ virtual inline int ExactNumTopBlobs() const { return 1; }
+
+ protected:
+ virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top);
+ virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top);
+ virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
+ virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
+
+ Blob<Dtype> mean_, variance_, temp_;
+
+ // sum_multiplier is just used to carry out sum using blas
+ Blob<Dtype> sum_multiplier_;
+};
+
/* SoftmaxLayer
*/
template <typename Dtype>
vector<Blob<Dtype>*> product_bottom_vec_;
};
-/* MVNLayer
- */
-template <typename Dtype>
-class MVNLayer : public Layer<Dtype> {
- public:
- explicit MVNLayer(const LayerParameter& param)
- : Layer<Dtype>(param) {}
- virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top);
-
- virtual inline LayerParameter_LayerType type() const {
- return LayerParameter_LayerType_MVN;
- }
- virtual inline int ExactNumBottomBlobs() const { return 1; }
- virtual inline int ExactNumTopBlobs() const { return 1; }
-
- protected:
- virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top);
- virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top);
- virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
- const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
- virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
- const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
-
- Blob<Dtype> mean_, variance_, temp_;
-
- // sum_multiplier is just used to carry out sum using blas
- Blob<Dtype> sum_multiplier_;
-};
-
/* PoolingLayer
*/
return new LRNLayer<Dtype>(param);
case LayerParameter_LayerType_MEMORY_DATA:
return new MemoryDataLayer<Dtype>(param);
+ case LayerParameter_LayerType_MVN:
+ return new MVNLayer(param);
case LayerParameter_LayerType_MULTINOMIAL_LOGISTIC_LOSS:
return new MultinomialLogisticLossLayer<Dtype>(param);
case LayerParameter_LayerType_POOLING:
-// Copyright 2014 BVLC and contributors.
-//
#include <algorithm>
#include <vector>
+#include "caffe/common_layers.hpp"
#include "caffe/layer.hpp"
-#include "caffe/vision_layers.hpp"
#include "caffe/util/math_functions.hpp"
namespace caffe {
sum_multiplier_.Reshape(1, 1,
bottom[0]->height(), bottom[0]->width());
Dtype* multiplier_data = sum_multiplier_.mutable_cpu_data();
- for (int i = 0; i < sum_multiplier_.count(); ++i) {
- multiplier_data[i] = 1.;
- }
+ caffe_set(sum_multiplier_.count(), Dtype(1), multiplier_data);
}
template <typename Dtype>
vector<Blob<Dtype>*>* top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
Dtype* top_data = (*top)[0]->mutable_cpu_data();
- int num = bottom[0]->num() * bottom[0]->channels();
+ int num;
+ if (this->layer_param_.mvn_param().across_channels())
+ num = bottom[0]->num();
+ else
+ num = bottom[0]->num() * bottom[0]->channels();
+
int dim = bottom[0]->count() / num;
Dtype eps = 1e-10;
- // put the squares of bottom into temp_
- caffe_powx(bottom[0]->count(), bottom_data, Dtype(2),
- temp_.mutable_cpu_data());
-
- // computes variance using var(X) = E(X^2) - (EX)^2
- caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data,
- sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); // EX
- caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, temp_.cpu_data(),
- sum_multiplier_.cpu_data(), 0., variance_.mutable_cpu_data()); // E(X^2)
- caffe_powx(mean_.count(), mean_.cpu_data(), Dtype(2),
- temp_.mutable_cpu_data()); // (EX)^2
- caffe_sub(mean_.count(), variance_.cpu_data(), temp_.cpu_data(),
- variance_.mutable_cpu_data()); // variance
-
- // do mean and variance normalization
- // subtract mean
- caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
- mean_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
- temp_.mutable_cpu_data());
+ if (this->layer_param_.mvn_param().normalize_variance()) {
+ // put the squares of bottom into temp_
+ caffe_powx(bottom[0]->count(), bottom_data, Dtype(2),
+ temp_.mutable_cpu_data());
- caffe_add(temp_.count(), bottom_data, temp_.cpu_data(), top_data);
+ // computes variance using var(X) = E(X^2) - (EX)^2
+ caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data,
+ sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); // EX
+ caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, temp_.cpu_data(),
+ sum_multiplier_.cpu_data(), 0., variance_.mutable_cpu_data()); // E(X^2)
+ caffe_powx(mean_.count(), mean_.cpu_data(), Dtype(2),
+ temp_.mutable_cpu_data()); // (EX)^2
+ caffe_sub(mean_.count(), variance_.cpu_data(), temp_.cpu_data(),
+ variance_.mutable_cpu_data()); // variance
- // normalize variance
- caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5),
- variance_.mutable_cpu_data());
+ // do mean and variance normalization
+ // subtract mean
+ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
+ mean_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
+ temp_.mutable_cpu_data());
- caffe_add_scalar(variance_.count(), eps, variance_.mutable_cpu_data());
+ caffe_add(temp_.count(), bottom_data, temp_.cpu_data(), top_data);
- caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
- variance_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
- temp_.mutable_cpu_data());
+ // normalize variance
+ caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5),
+ variance_.mutable_cpu_data());
+
+ caffe_add_scalar(variance_.count(), eps, variance_.mutable_cpu_data());
+
+ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
+ variance_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
+ temp_.mutable_cpu_data());
+
+ caffe_div(temp_.count(), top_data, temp_.cpu_data(), top_data);
+ } else {
+ caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data,
+ sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); // EX
- caffe_div(temp_.count(), top_data, temp_.cpu_data(), top_data);
+ // subtract mean
+ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
+ mean_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
+ temp_.mutable_cpu_data());
+
+ caffe_add(temp_.count(), bottom_data, temp_.cpu_data(), top_data);
+ }
return Dtype(0);
}
const Dtype* bottom_data = (*bottom)[0]->cpu_data();
Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
- int num = (*bottom)[0]->num() * (*bottom)[0]->channels();
+ int num;
+ if (this->layer_param_.mvn_param().across_channels())
+ num = (*bottom)[0]->num();
+ else
+ num = (*bottom)[0]->num() * (*bottom)[0]->channels();
+
int dim = (*bottom)[0]->count() / num;
Dtype eps = 1e-10;
- caffe_mul(temp_.count(), top_data, top_diff, bottom_diff);
- caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., bottom_diff,
- sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());
- caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
- mean_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
- bottom_diff);
- caffe_mul(temp_.count(), top_data, bottom_diff, bottom_diff);
-
- caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., top_diff,
+ if (this->layer_param_.mvn_param().normalize_variance()) {
+ caffe_mul(temp_.count(), top_data, top_diff, bottom_diff);
+ caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., bottom_diff,
sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());
- caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
- mean_.cpu_data(), sum_multiplier_.cpu_data(), 1.,
+ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
+ mean_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
bottom_diff);
+ caffe_mul(temp_.count(), top_data, bottom_diff, bottom_diff);
- caffe_cpu_axpby(temp_.count(), Dtype(1), top_diff, Dtype(-1. / dim),
- bottom_diff);
+ caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., top_diff,
+ sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());
+ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
+ mean_.cpu_data(), sum_multiplier_.cpu_data(), 1.,
+ bottom_diff);
- // put the squares of bottom into temp_
- caffe_powx(temp_.count(), bottom_data, Dtype(2),
- temp_.mutable_cpu_data());
+ caffe_cpu_axpby(temp_.count(), Dtype(1), top_diff, Dtype(-1. / dim),
+ bottom_diff);
- // computes variance using var(X) = E(X^2) - (EX)^2
- caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data,
- sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); // EX
- caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, temp_.cpu_data(),
- sum_multiplier_.cpu_data(), 0., variance_.mutable_cpu_data()); // E(X^2)
- caffe_powx(mean_.count(), mean_.cpu_data(), Dtype(2),
- temp_.mutable_cpu_data()); // (EX)^2
- caffe_sub(mean_.count(), variance_.cpu_data(), temp_.cpu_data(),
- variance_.mutable_cpu_data()); // variance
+ // put the squares of bottom into temp_
+ caffe_powx(temp_.count(), bottom_data, Dtype(2),
+ temp_.mutable_cpu_data());
- // normalize variance
- caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5),
- variance_.mutable_cpu_data());
+ // computes variance using var(X) = E(X^2) - (EX)^2
+ caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data,
+ sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); // EX
+ caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, temp_.cpu_data(),
+ sum_multiplier_.cpu_data(), 0., variance_.mutable_cpu_data()); // E(X^2)
+ caffe_powx(mean_.count(), mean_.cpu_data(), Dtype(2),
+ temp_.mutable_cpu_data()); // (EX)^2
+ caffe_sub(mean_.count(), variance_.cpu_data(), temp_.cpu_data(),
+ variance_.mutable_cpu_data()); // variance
- caffe_add_scalar(variance_.count(), eps, variance_.mutable_cpu_data());
+ // normalize variance
+ caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5),
+ variance_.mutable_cpu_data());
- caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
- variance_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
- temp_.mutable_cpu_data());
+ caffe_add_scalar(variance_.count(), eps, variance_.mutable_cpu_data());
- caffe_div(temp_.count(), bottom_diff, temp_.cpu_data(), bottom_diff);
+ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
+ variance_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
+ temp_.mutable_cpu_data());
+
+ caffe_div(temp_.count(), bottom_diff, temp_.cpu_data(), bottom_diff);
+ } else {
+ caffe_copy(temp_.count(), top_diff, bottom_diff);
+ }
}
-// Copyright 2014 BVLC and contributors.
-//
#include <algorithm>
#include <vector>
+#include "caffe/common_layers.hpp"
#include "caffe/layer.hpp"
-#include "caffe/vision_layers.hpp"
#include "caffe/util/math_functions.hpp"
namespace caffe {
vector<Blob<Dtype>*>* top) {
const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* top_data = (*top)[0]->mutable_gpu_data();
- int num = bottom[0]->num() * bottom[0]->channels();
- int dim = bottom[0]->count() / num;
-
- // put the squares of bottom into temp_
- caffe_gpu_powx(bottom[0]->count(), bottom_data, Dtype(2),
- temp_.mutable_gpu_data());
-
- // computes variance using var(X) = E(X^2) - (EX)^2
- caffe_gpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data,
- sum_multiplier_.gpu_data(), 0., mean_.mutable_gpu_data()); // EX
- caffe_gpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, temp_.gpu_data(),
- sum_multiplier_.gpu_data(), 0., variance_.mutable_gpu_data()); // E(X^2)
- caffe_gpu_powx(mean_.count(), mean_.gpu_data(), Dtype(2),
- temp_.mutable_gpu_data()); // (EX)^2
- caffe_gpu_sub(mean_.count(), variance_.gpu_data(), temp_.gpu_data(),
- variance_.mutable_gpu_data()); // variance
-
- Dtype eps = 1e-10;
-
- // do mean and variance normalization
- // subtract mean
- caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
- mean_.gpu_data(), sum_multiplier_.gpu_data(), 0.,
- temp_.mutable_gpu_data());
+ int num;
+ if (this->layer_param_.mvn_param().across_channels())
+ num = bottom[0]->num();
+ else
+ num = bottom[0]->num() * bottom[0]->channels();
- caffe_gpu_add(temp_.count(), bottom_data, temp_.gpu_data(), top_data);
-
- // normalize variance
- caffe_gpu_powx(variance_.count(), variance_.gpu_data(), Dtype(0.5),
- variance_.mutable_gpu_data());
-
- caffe_gpu_add_scalar(variance_.count(), eps, variance_.mutable_gpu_data());
+ int dim = bottom[0]->count() / num;
- caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
- variance_.gpu_data(), sum_multiplier_.gpu_data(), 0.,
+ if (this->layer_param_.mvn_param().normalize_variance()) {
+ // put the squares of bottom into temp_
+ caffe_gpu_powx(bottom[0]->count(), bottom_data, Dtype(2),
temp_.mutable_gpu_data());
-
- caffe_gpu_div(temp_.count(), top_data, temp_.gpu_data(), top_data);
+
+ // computes variance using var(X) = E(X^2) - (EX)^2
+ caffe_gpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data,
+ sum_multiplier_.gpu_data(), 0., mean_.mutable_gpu_data()); // EX
+ caffe_gpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, temp_.gpu_data(),
+ sum_multiplier_.gpu_data(), 0., variance_.mutable_gpu_data()); // E(X^2)
+ caffe_gpu_powx(mean_.count(), mean_.gpu_data(), Dtype(2),
+ temp_.mutable_gpu_data()); // (EX)^2
+ caffe_gpu_sub(mean_.count(), variance_.gpu_data(), temp_.gpu_data(),
+ variance_.mutable_gpu_data()); // variance
+
+ Dtype eps = 1e-10;
+
+ // do mean and variance normalization
+ // subtract mean
+ caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
+ mean_.gpu_data(), sum_multiplier_.gpu_data(), 0.,
+ temp_.mutable_gpu_data());
+
+ caffe_gpu_add(temp_.count(), bottom_data, temp_.gpu_data(), top_data);
+
+ // normalize variance
+ caffe_gpu_powx(variance_.count(), variance_.gpu_data(), Dtype(0.5),
+ variance_.mutable_gpu_data());
+
+ caffe_gpu_add_scalar(variance_.count(), eps, variance_.mutable_gpu_data());
+
+ caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
+ variance_.gpu_data(), sum_multiplier_.gpu_data(), 0.,
+ temp_.mutable_gpu_data());
+
+ caffe_gpu_div(temp_.count(), top_data, temp_.gpu_data(), top_data);
+ } else {
+ caffe_gpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data,
+ sum_multiplier_.gpu_data(), 0., mean_.mutable_gpu_data()); // EX
+
+ // subtract mean
+ caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
+ mean_.gpu_data(), sum_multiplier_.gpu_data(), 0.,
+ temp_.mutable_gpu_data());
+
+ caffe_gpu_add(temp_.count(), bottom_data, temp_.gpu_data(), top_data);
+ }
return Dtype(0);
}
const Dtype* bottom_data = (*bottom)[0]->gpu_data();
Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
- int num = (*bottom)[0]->num() * (*bottom)[0]->channels();
+ int num;
+ if (this->layer_param_.mvn_param().across_channels())
+ num = (*bottom)[0]->num();
+ else
+ num = (*bottom)[0]->num() * (*bottom)[0]->channels();
+
int dim = (*bottom)[0]->count() / num;
Dtype eps = 1e-10;
- caffe_gpu_mul(temp_.count(), top_data, top_diff, bottom_diff);
- caffe_gpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., bottom_diff,
- sum_multiplier_.gpu_data(), 0., mean_.mutable_gpu_data());
- caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
- mean_.gpu_data(), sum_multiplier_.gpu_data(), 0.,
- bottom_diff);
- caffe_gpu_mul(temp_.count(), top_data, bottom_diff, bottom_diff);
-
- caffe_gpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., top_diff,
+ if (this->layer_param_.mvn_param().normalize_variance()) {
+ caffe_gpu_mul(temp_.count(), top_data, top_diff, bottom_diff);
+ caffe_gpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., bottom_diff,
sum_multiplier_.gpu_data(), 0., mean_.mutable_gpu_data());
- caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
- mean_.gpu_data(), sum_multiplier_.gpu_data(), 1.,
+ caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
+ mean_.gpu_data(), sum_multiplier_.gpu_data(), 0.,
bottom_diff);
-
- caffe_gpu_axpby(temp_.count(), Dtype(1), top_diff, Dtype(-1. / dim),
- bottom_diff);
-
- // put the squares of bottom into temp_
- caffe_gpu_powx(temp_.count(), bottom_data, Dtype(2),
- temp_.mutable_gpu_data());
-
- // computes variance using var(X) = E(X^2) - (EX)^2
- caffe_gpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data,
- sum_multiplier_.gpu_data(), 0., mean_.mutable_gpu_data()); // EX
- caffe_gpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, temp_.gpu_data(),
- sum_multiplier_.gpu_data(), 0., variance_.mutable_gpu_data()); // E(X^2)
- caffe_gpu_powx(mean_.count(), mean_.gpu_data(), Dtype(2),
- temp_.mutable_gpu_data()); // (EX)^2
- caffe_gpu_sub(mean_.count(), variance_.gpu_data(), temp_.gpu_data(),
- variance_.mutable_gpu_data()); // variance
-
- // normalize variance
- caffe_gpu_powx(variance_.count(), variance_.gpu_data(), Dtype(0.5),
- variance_.mutable_gpu_data());
-
- caffe_gpu_add_scalar(variance_.count(), eps, variance_.mutable_gpu_data());
-
- caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
- variance_.gpu_data(), sum_multiplier_.gpu_data(), 0.,
- temp_.mutable_gpu_data());
-
- caffe_gpu_div(temp_.count(), bottom_diff, temp_.gpu_data(), bottom_diff);
+ caffe_gpu_mul(temp_.count(), top_data, bottom_diff, bottom_diff);
+
+ caffe_gpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., top_diff,
+ sum_multiplier_.gpu_data(), 0., mean_.mutable_gpu_data());
+ caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
+ mean_.gpu_data(), sum_multiplier_.gpu_data(), 1.,
+ bottom_diff);
+
+ caffe_gpu_axpby(temp_.count(), Dtype(1), top_diff, Dtype(-1. / dim),
+ bottom_diff);
+
+ // put the squares of bottom into temp_
+ caffe_gpu_powx(temp_.count(), bottom_data, Dtype(2),
+ temp_.mutable_gpu_data());
+
+ // computes variance using var(X) = E(X^2) - (EX)^2
+ caffe_gpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data,
+ sum_multiplier_.gpu_data(), 0., mean_.mutable_gpu_data()); // EX
+ caffe_gpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, temp_.gpu_data(),
+ sum_multiplier_.gpu_data(), 0., variance_.mutable_gpu_data()); // E(X^2)
+ caffe_gpu_powx(mean_.count(), mean_.gpu_data(), Dtype(2),
+ temp_.mutable_gpu_data()); // (EX)^2
+ caffe_gpu_sub(mean_.count(), variance_.gpu_data(), temp_.gpu_data(),
+ variance_.mutable_gpu_data()); // variance
+
+ // normalize variance
+ caffe_gpu_powx(variance_.count(), variance_.gpu_data(), Dtype(0.5),
+ variance_.mutable_gpu_data());
+
+ caffe_gpu_add_scalar(variance_.count(), eps, variance_.mutable_gpu_data());
+
+ caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
+ variance_.gpu_data(), sum_multiplier_.gpu_data(), 0.,
+ temp_.mutable_gpu_data());
+
+ caffe_gpu_div(temp_.count(), bottom_diff, temp_.gpu_data(), bottom_diff);
+ } else {
+ caffe_copy(temp_.count(), top_diff, bottom_diff);
+ }
}
// NOTE
// Update the next available ID when you add a new LayerParameter field.
//
-// LayerParameter next available ID: 34 (last added: exclude)
+// LayerParameter next available ID: 35 (last added: mvn_param)
message LayerParameter {
repeated string bottom = 2; // the name of the bottom blobs
repeated string top = 3; // the name of the top blobs
optional ThresholdParameter threshold_param = 25;
optional HingeLossParameter hinge_loss_param = 29;
optional SliceParameter slice_param = 31;
+ optional MVNParameter mvn_param = 34;
// DEPRECATED: The layer parameters specified as a V0LayerParameter.
// This should never be used by any code except to upgrade to the new
optional uint32 width = 4;
}
+// Message that stores parameters used by MVNLayer
+message MVNParameter {
+ // This parameter can be set to false to normalize mean only
+ optional bool normalize_variance = 1 [default = true];
+
+ // This parameter can be set to true to perform DNN-like MVN
+ optional bool across_channels = 2 [default = false];
+}
+
// Message that stores parameters used by PoolingLayer
message PoolingParameter {
enum PoolMethod {
-// Copyright 2014 BVLC and contributors.
-
#include <cmath>
#include <cstring>
#include <vector>
-#include "gtest/gtest.h"
#include "caffe/blob.hpp"
#include "caffe/common.hpp"
+#include "caffe/common_layers.hpp"
#include "caffe/filler.hpp"
-#include "caffe/vision_layers.hpp"
-#include "caffe/test/test_gradient_check_util.hpp"
+#include "gtest/gtest.h"
#include "caffe/test/test_caffe_main.hpp"
+#include "caffe/test/test_gradient_check_util.hpp"
namespace caffe {
}
}
+TYPED_TEST(MVNLayerTest, TestForwardMeanOnly) {
+ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+ layer_param.ParseFromString("mvn_param{normalize_variance: false}");
+ MVNLayer<Dtype> layer(layer_param);
+ layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+ layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+ // Test mean
+ int num = this->blob_bottom_->num();
+ int channels = this->blob_bottom_->channels();
+ int height = this->blob_bottom_->height();
+ int width = this->blob_bottom_->width();
+
+ for (int i = 0; i < num; ++i) {
+ for (int j = 0; j < channels; ++j) {
+ Dtype sum = 0, var = 0;
+ for (int k = 0; k < height; ++k) {
+ for (int l = 0; l < width; ++l) {
+ Dtype data = this->blob_top_->data_at(i, j, k, l);
+ sum += data;
+ var += data * data;
+ }
+ }
+ sum /= height * width;
+
+ const Dtype kErrorBound = 0.001;
+ // expect zero mean
+ EXPECT_NEAR(0, sum, kErrorBound);
+ }
+ }
+}
+
+TYPED_TEST(MVNLayerTest, TestForwardAcrossChannels) {
+ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+ layer_param.ParseFromString("mvn_param{across_channels: true}");
+ MVNLayer<Dtype> layer(layer_param);
+ layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+ layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+ // Test mean
+ int num = this->blob_bottom_->num();
+ int channels = this->blob_bottom_->channels();
+ int height = this->blob_bottom_->height();
+ int width = this->blob_bottom_->width();
+
+ for (int i = 0; i < num; ++i) {
+ Dtype sum = 0, var = 0;
+ for (int j = 0; j < channels; ++j) {
+ for (int k = 0; k < height; ++k) {
+ for (int l = 0; l < width; ++l) {
+ Dtype data = this->blob_top_->data_at(i, j, k, l);
+ sum += data;
+ var += data * data;
+ }
+ }
+ }
+ sum /= height * width * channels;
+ var /= height * width * channels;
+
+ const Dtype kErrorBound = 0.001;
+ // expect zero mean
+ EXPECT_NEAR(0, sum, kErrorBound);
+ // expect unit variance
+ EXPECT_NEAR(1, var, kErrorBound);
+ }
+}
+
TYPED_TEST(MVNLayerTest, TestGradient) {
typedef typename TypeParam::Dtype Dtype;
LayerParameter layer_param;
&(this->blob_top_vec_));
}
+TYPED_TEST(MVNLayerTest, TestGradientMeanOnly) {
+ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+ layer_param.ParseFromString("mvn_param{normalize_variance: false}");
+ MVNLayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-3);
+ checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
+ &(this->blob_top_vec_));
+}
+
+TYPED_TEST(MVNLayerTest, TestGradientAcrossChannels) {
+ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+ layer_param.ParseFromString("mvn_param{across_channels: true}");
+ MVNLayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-3);
+ checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
+ &(this->blob_top_vec_));
+}
+
} // namespace caffe