From bb5bf436c10dee68a31825edf65f39149034c4e0 Mon Sep 17 00:00:00 2001 From: Takuya Narihira Date: Mon, 16 Feb 2015 09:52:47 -0800 Subject: [PATCH] PReLU Layer and its tests described in Kaiming He et al, "Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification", arxiv 2015. Belows are commit message histories that I had while developing. PReLULayer takes FillerParameter for init PReLU testing consistency with ReLU Fix : PReLU test concistency check PReLU tests in-place computation, and it failed in GPU Fix: PReLU in-place backward in GPU PReLULayer called an incorrect API for copying data (caffe_gpu_memcpy). First argment of `caffe_gpu_memcpy` should be size of memory region in byte. I modified to use `caffe_copy` function. Fix: style errors Fix: number of axes of input blob must be >= 2 Use 1D blob, zero-D blob. Rename: hw -> dim --- include/caffe/neuron_layers.hpp | 84 +++++++++++++++ src/caffe/layers/prelu_layer.cpp | 140 +++++++++++++++++++++++++ src/caffe/layers/prelu_layer.cu | 130 +++++++++++++++++++++++ src/caffe/proto/caffe.proto | 14 ++- src/caffe/test/test_neuron_layer.cpp | 196 +++++++++++++++++++++++++++++++++++ 5 files changed, 563 insertions(+), 1 deletion(-) create mode 100644 src/caffe/layers/prelu_layer.cpp create mode 100644 src/caffe/layers/prelu_layer.cu diff --git a/include/caffe/neuron_layers.hpp b/include/caffe/neuron_layers.hpp index 0c306fb..8669923 100644 --- a/include/caffe/neuron_layers.hpp +++ b/include/caffe/neuron_layers.hpp @@ -654,6 +654,90 @@ class ThresholdLayer : public NeuronLayer { Dtype threshold_; }; +/** + * @brief Parameterized Rectified Linear Unit non-linearity @f$ + * y_i = \max(0, x_i) + a_i \min(0, x_i) + * @f$. The differences from ReLULayer are 1) negative slopes are + * learnable though backprop and 2) negative slopes can vary across + * channels. The number of axes of input blob should be greater than or + * equal to 2. The 1st axis (0-based) is seen as channels. + */ +template +class PReLULayer : public NeuronLayer { + public: + /** + * @param param provides PReLUParameter prelu_param, + * with PReLULayer options: + * - filler (\b optional, FillerParameter, + * default {'type': constant 'value':0.25}). + * - channel_shared (\b optional, default false). + * negative slopes are shared across channels. + */ + explicit PReLULayer(const LayerParameter& param) + : NeuronLayer(param) {} + + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "PReLU"; } + + protected: + /** + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times ...) @f$ + * the inputs @f$ x @f$ + * @param top output Blob vector (length 1) + * -# @f$ (N \times C \times ...) @f$ + * the computed outputs for each channel @f$i@f$ @f$ + * y_i = \max(0, x_i) + a_i \min(0, x_i) + * @f$. + */ + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + + /** + * @brief Computes the error gradient w.r.t. the PReLU inputs. + * + * @param top output Blob vector (length 1), providing the error gradient with + * respect to the outputs + * -# @f$ (N \times C \times ...) @f$ + * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ + * with respect to computed outputs @f$ y @f$ + * @param propagate_down see Layer::Backward. + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times ...) @f$ + * the inputs @f$ x @f$; For each channel @f$i@f$, backward fills their + * diff with gradients @f$ + * \frac{\partial E}{\partial x_i} = \left\{ + * \begin{array}{lr} + * a_i \frac{\partial E}{\partial y_i} & \mathrm{if} \; x_i \le 0 \\ + * \frac{\partial E}{\partial y_i} & \mathrm{if} \; x_i > 0 + * \end{array} \right. + * @f$. + * If param_propagate_down_[0] is true, it fills the diff with gradients + * @f$ + * \frac{\partial E}{\partial a_i} = \left\{ + * \begin{array}{lr} + * \sum_{x_i} x_i \frac{\partial E}{\partial y_i} & \mathrm{if} \; x_i \le 0 \\ + * 0 & \mathrm{if} \; x_i > 0 + * \end{array} \right. + * @f$. + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + bool channel_shared_; + Blob multiplier_; // dot multipler for backward computation of params + Blob bottom_memory_; // memory for in-place computation +}; + } // namespace caffe #endif // CAFFE_NEURON_LAYERS_HPP_ diff --git a/src/caffe/layers/prelu_layer.cpp b/src/caffe/layers/prelu_layer.cpp new file mode 100644 index 0000000..7119a27 --- /dev/null +++ b/src/caffe/layers/prelu_layer.cpp @@ -0,0 +1,140 @@ +#include +#include + +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/vision_layers.hpp" + +namespace caffe { + +template +void PReLULayer::LayerSetUp(const vector*>& bottom, + const vector*>& top) { + CHECK_GE(bottom[0]->num_axes(), 2) + << "Number of axes of bottom blob must be >=2."; + PReLUParameter prelu_param = this->layer_param().prelu_param(); + int channels = bottom[0]->channels(); + channel_shared_ = prelu_param.channel_shared(); + if (this->blobs_.size() > 0) { + LOG(INFO) << "Skipping parameter initialization"; + } else { + this->blobs_.resize(1); + if (channel_shared_) { + this->blobs_[0].reset(new Blob(vector(0))); + } else { + this->blobs_[0].reset(new Blob(vector(1, channels))); + } + shared_ptr > filler; + if (prelu_param.has_filler()) { + filler.reset(GetFiller(prelu_param.filler())); + } else { + FillerParameter filler_param; + filler_param.set_type("constant"); + filler_param.set_value(0.25); + filler.reset(GetFiller(filler_param)); + } + filler->Fill(this->blobs_[0].get()); + } + if (channel_shared_) { + CHECK_EQ(this->blobs_[0]->count(), 1) + << "Negative slope size is inconsistent with prototxt config"; + } else { + CHECK_EQ(this->blobs_[0]->count(), channels) + << "Negative slope size is inconsistent with prototxt config"; + } + + // Propagate gradients to the parameters (as directed by backward pass). + this->param_propagate_down_.resize(this->blobs_.size(), true); + multiplier_.Reshape(vector(1, bottom[0]->count() / bottom[0]->num())); + caffe_set(multiplier_.count(), Dtype(1), multiplier_.mutable_cpu_data()); +} + +template +void PReLULayer::Reshape(const vector*>& bottom, + const vector*>& top) { + CHECK_GE(bottom[0]->num_axes(), 2) + << "Number of axes of bottom blob must be >=2."; + top[0]->ReshapeLike(*bottom[0]); + if (bottom[0] == top[0]) { + // For in-place computation + bottom_memory_.ReshapeLike(*bottom[0]); + } +} + +template +void PReLULayer::Forward_cpu(const vector*>& bottom, + const vector*>& top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + Dtype* top_data = top[0]->mutable_cpu_data(); + const int count = bottom[0]->count(); + const int dim = bottom[0]->count(2); + const int channels = bottom[0]->channels(); + const Dtype* slope_data = this->blobs_[0]->cpu_data(); + + // For in-place computation + if (bottom[0] == top[0]) { + caffe_copy(count, bottom_data, bottom_memory_.mutable_cpu_data()); + } + + // if channel_shared, channel index in the following computation becomes + // always zero. + const int div_factor = channel_shared_ ? channels : 1; + for (int i = 0; i < count; ++i) { + int c = (i / dim) % channels / div_factor; + top_data[i] = std::max(bottom_data[i], Dtype(0)) + + slope_data[c] * std::min(bottom_data[i], Dtype(0)); + } +} + +template +void PReLULayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, + const vector*>& bottom) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + const Dtype* slope_data = this->blobs_[0]->cpu_data(); + const Dtype* top_diff = top[0]->cpu_diff(); + const int count = bottom[0]->count(); + const int dim = bottom[0]->count(2); + const int channels = bottom[0]->channels(); + + // For in-place computation + if (top[0] == bottom[0]) { + bottom_data = bottom_memory_.cpu_data(); + } + + // if channel_shared, channel index in the following computation becomes + // always zero. + const int div_factor = channel_shared_ ? channels : 1; + + // Propagte to param + // Since to write bottom diff will affect top diff if top and bottom blobs + // are identical (in-place computaion), we first compute param backward to + // keep top_diff unchanged. + if (this->param_propagate_down_[0]) { + Dtype* slope_diff = this->blobs_[0]->mutable_cpu_diff(); + caffe_set(this->blobs_[0]->count(), Dtype(0), slope_diff); + for (int i = 0; i < count; ++i) { + int c = (i / dim) % channels / div_factor; + slope_diff[c] += top_diff[i] * bottom_data[i] * (bottom_data[i] <= 0); + } + } + // Propagate to bottom + if (propagate_down[0]) { + Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); + for (int i = 0; i < count; ++i) { + int c = (i / dim) % channels / div_factor; + bottom_diff[i] = top_diff[i] * ((bottom_data[i] > 0) + + slope_data[c] * (bottom_data[i] <= 0)); + } + } +} + + +#ifdef CPU_ONLY +STUB_GPU(PReLULayer); +#endif + +INSTANTIATE_CLASS(PReLULayer); +REGISTER_LAYER_CLASS(PReLU); + +} // namespace caffe diff --git a/src/caffe/layers/prelu_layer.cu b/src/caffe/layers/prelu_layer.cu new file mode 100644 index 0000000..fd0eda5 --- /dev/null +++ b/src/caffe/layers/prelu_layer.cu @@ -0,0 +1,130 @@ +#include +#include + +#include "caffe/layer.hpp" +#include "caffe/vision_layers.hpp" + +namespace caffe { + +// CUDA kernele for forward +template +__global__ void PReLUForward(const int n, const int channels, const int dim, + const Dtype* in, Dtype* out, const Dtype* slope_data, + const int div_factor) { + CUDA_KERNEL_LOOP(index, n) { + int c = (index / dim) % channels / div_factor; + out[index] = in[index] > 0 ? in[index] : in[index] * slope_data[c]; + } +} + +// CUDA kernel for bottom backward +template +__global__ void PReLUBackward(const int n, const int channels, const int dim, + const Dtype* in_diff, const Dtype* in_data, Dtype* out_diff, + const Dtype* slope_data, const int div_factor) { + CUDA_KERNEL_LOOP(index, n) { + int c = (index / dim) % channels / div_factor; + out_diff[index] = in_diff[index] * ((in_data[index] > 0) + + (in_data[index] <= 0) * slope_data[c]); + } +} + +// CUDA kernel for element-wise parameter backward +template +__global__ void PReLUParamBackward(const int n, const Dtype* in_diff, + const Dtype* in_data, Dtype* out_diff) { + CUDA_KERNEL_LOOP(index, n) { + out_diff[index] = in_diff[index] * in_data[index] * (in_data[index] <= 0); + } +} + +template +void PReLULayer::Forward_gpu(const vector*>& bottom, + const vector*>& top) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* top_data = top[0]->mutable_gpu_data(); + const int count = bottom[0]->count(); + const int dim = bottom[0]->count(2); + const int channels = bottom[0]->channels(); + const Dtype* slope_data = this->blobs_[0]->gpu_data(); + const int div_factor = channel_shared_ ? channels : 1; + + // For in-place computation + if (top[0] == bottom[0]) { + caffe_copy(count, bottom_data, bottom_memory_.mutable_gpu_data()); + } + + // NOLINT_NEXT_LINE(whitespace/operators) + PReLUForward<<>>( + count, channels, dim, bottom_data, top_data, slope_data, div_factor); + CUDA_POST_KERNEL_CHECK; +} + +template +void PReLULayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, + const vector*>& bottom) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + const Dtype* top_diff = top[0]->gpu_diff(); + const int count = bottom[0]->count(); + const int dim = bottom[0]->count(2); + const int channels = bottom[0]->channels(); + + // For in-place computation + if (top[0] == bottom[0]) { + bottom_data = bottom_memory_.gpu_data(); + } + + // Propagte to param + // Since to write bottom diff will affect top diff if top and bottom blobs + // are identical (in-place computaion), we first compute param backward to + // keep top_diff unchanged. + if (this->param_propagate_down_[0]) { + Dtype* slope_diff = this->blobs_[0]->mutable_gpu_diff(); + // slope_diff is set as 0, then accumulated over batches + caffe_gpu_set(this->blobs_[0]->count(), Dtype(0), slope_diff); + int cdim = channels * dim; + Dtype dsum = 0.; + for (int n = 0; n < bottom[0]->num(); ++n) { + Dtype* temp_buff = multiplier_.mutable_gpu_diff(); + // compute element-wise diff + // NOLINT_NEXT_LINE(whitespace/operators) + PReLUParamBackward<<>>( + cdim, top_diff + top[0]->offset(n), + bottom_data + bottom[0]->offset(n), multiplier_.mutable_gpu_diff()); + CUDA_POST_KERNEL_CHECK; + if (channel_shared_) { + Dtype d; + caffe_gpu_dot(channels * dim, multiplier_.gpu_diff(), + multiplier_.gpu_data(), &d); + dsum += d; + } else { + caffe_gpu_gemv(CblasNoTrans, channels, dim, 1., + multiplier_.gpu_diff(), multiplier_.gpu_data(), 1., + slope_diff); + } + } + if (channel_shared_) { + caffe_gpu_set(this->blobs_[0]->count(), Dtype(dsum), slope_diff); + } + } + // Propagate to bottom + if (propagate_down[0]) { + Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); + const Dtype* slope_data = this->blobs_[0]->gpu_data(); + int div_factor = channel_shared_ ? channels : 1; + // NOLINT_NEXT_LINE(whitespace/operators) + PReLUBackward<<>>( + count, channels, dim, top_diff, bottom_data, bottom_diff, slope_data, + div_factor); + CUDA_POST_KERNEL_CHECK; + } +} + + +INSTANTIATE_LAYER_GPU_FUNCS(PReLULayer); + + +} // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index e523efa..888371d 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -259,7 +259,7 @@ message ParamSpec { // NOTE // Update the next available ID when you add a new LayerParameter field. // -// LayerParameter next available layer-specific ID: 131 (last added: python_param) +// LayerParameter next available layer-specific ID: 132 (last added: prelu_param) message LayerParameter { optional string name = 1; // the layer name optional string type = 2; // the layer type @@ -323,6 +323,7 @@ message LayerParameter { optional MVNParameter mvn_param = 120; optional PoolingParameter pooling_param = 121; optional PowerParameter power_param = 122; + optional PReLUParameter prelu_param = 131; optional PythonParameter python_param = 130; optional ReLUParameter relu_param = 123; optional SigmoidParameter sigmoid_param = 124; @@ -946,3 +947,14 @@ message V0LayerParameter { optional HDF5OutputParameter hdf5_output_param = 1001; } + +// Message that stores parameters used by PReLULayer +message PReLUParameter { + // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers: + // Surpassing Human-Level Performance on ImageNet Classification, 2015. + + // Initial value of a_i. Default is a_i=0.25 for all i. + optional FillerParameter filler = 1; + // Whether or not slope paramters are shared across channels. + optional bool channel_shared = 2 [default = false]; +} diff --git a/src/caffe/test/test_neuron_layer.cpp b/src/caffe/test/test_neuron_layer.cpp index ad10720..c9d52f2 100644 --- a/src/caffe/test/test_neuron_layer.cpp +++ b/src/caffe/test/test_neuron_layer.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -99,6 +100,23 @@ class NeuronLayerTest : public MultiDeviceTest { GradientChecker checker(1e-2, 1e-3); checker.CheckGradientEltwise(&layer, blob_bottom_vec_, blob_top_vec_); } + + void TestPReLU(PReLULayer *layer) { + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // Now, check values + const Dtype* bottom_data = this->blob_bottom_->cpu_data(); + const Dtype* top_data = this->blob_top_->cpu_data(); + const Dtype* slope_data = layer->blobs()[0]->cpu_data(); + int hw = this->blob_bottom_->height() * this->blob_bottom_->width(); + int channels = this->blob_bottom_->channels(); + bool channel_shared = layer->layer_param().prelu_param().channel_shared(); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + int c = channel_shared ? 0 : (i / hw) % channels; + EXPECT_EQ(top_data[i], + std::max(bottom_data[i], (Dtype)(0)) + + slope_data[c] * std::min(bottom_data[i], (Dtype)(0))); + } + } }; TYPED_TEST_CASE(NeuronLayerTest, TestDtypesAndDevices); @@ -392,6 +410,184 @@ TYPED_TEST(NeuronLayerTest, TestBNLLGradient) { this->blob_top_vec_); } +TYPED_TEST(NeuronLayerTest, TestPReLUParam) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + PReLULayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* slopes = layer.blobs()[0]->cpu_data(); + int count = layer.blobs()[0]->count(); + for (int i = 0; i < count; ++i, ++slopes) { + EXPECT_EQ(*slopes, 0.25); + } +} + +TYPED_TEST(NeuronLayerTest, TestPReLUForward) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + PReLULayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + FillerParameter filler_param; + GaussianFiller filler(filler_param); + filler.Fill(layer.blobs()[0].get()); + this->TestPReLU(&layer); +} + +TYPED_TEST(NeuronLayerTest, TestPReLUForwardChannelShared) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + layer_param.mutable_prelu_param()->set_channel_shared(true); + PReLULayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + this->TestPReLU(&layer); +} + +TYPED_TEST(NeuronLayerTest, TestPReLUGradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + PReLULayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + FillerParameter filler_param; + GaussianFiller filler(filler_param); + filler.Fill(layer.blobs()[0].get()); + GradientChecker checker(1e-2, 1e-3, 1701, 0., 0.01); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(NeuronLayerTest, TestPReLUGradientChannelShared) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + layer_param.mutable_prelu_param()->set_channel_shared(true); + PReLULayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + GradientChecker checker(1e-2, 1e-3, 1701, 0., 0.01); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(NeuronLayerTest, TestPReLUConsistencyReLU) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter prelu_layer_param; + LayerParameter relu_layer_param; + relu_layer_param.mutable_relu_param()->set_negative_slope(0.25); + PReLULayer prelu(prelu_layer_param); + ReLULayer relu(relu_layer_param); + // Set up blobs + vector*> blob_bottom_vec_2; + vector*> blob_top_vec_2; + shared_ptr > blob_bottom_2(new Blob()); + shared_ptr > blob_top_2(new Blob()); + blob_bottom_vec_2.push_back(blob_bottom_2.get()); + blob_top_vec_2.push_back(blob_top_2.get()); + blob_bottom_2->CopyFrom(*this->blob_bottom_, false, true); + // SetUp layers + prelu.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + relu.SetUp(blob_bottom_vec_2, blob_top_vec_2); + // Check forward + prelu.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + relu.Forward(this->blob_bottom_vec_, blob_top_vec_2); + for (int s = 0; s < blob_top_2->count(); ++s) { + EXPECT_EQ(this->blob_top_->cpu_data()[s], blob_top_2->cpu_data()[s]); + } + // Check backward + shared_ptr > tmp_blob(new Blob()); + tmp_blob->ReshapeLike(*blob_top_2.get()); + FillerParameter filler_param; + GaussianFiller filler(filler_param); + filler.Fill(tmp_blob.get()); + caffe_copy(blob_top_2->count(), tmp_blob->cpu_data(), + this->blob_top_->mutable_cpu_diff()); + caffe_copy(blob_top_2->count(), tmp_blob->cpu_data(), + blob_top_2->mutable_cpu_diff()); + vector propagate_down; + propagate_down.push_back(true); + prelu.Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_); + relu.Backward(blob_top_vec_2, propagate_down, blob_bottom_vec_2); + for (int s = 0; s < blob_bottom_2->count(); ++s) { + EXPECT_EQ(this->blob_bottom_->cpu_diff()[s], blob_bottom_2->cpu_diff()[s]); + } +} + +TYPED_TEST(NeuronLayerTest, TestPReLUInPlace) { + typedef typename TypeParam::Dtype Dtype; + // Set layer parameters + LayerParameter ip_layer_param; + LayerParameter prelu_layer_param; + InnerProductParameter *ip_param = + ip_layer_param.mutable_inner_product_param(); + ip_param->mutable_weight_filler()->set_type("gaussian"); + ip_param->set_num_output(3); + InnerProductLayer ip(ip_layer_param); + PReLULayer prelu(prelu_layer_param); + InnerProductLayer ip2(ip_layer_param); + PReLULayer prelu2(prelu_layer_param); + // Set up blobs + vector*> blob_bottom_vec_2; + vector*> blob_middle_vec_2; + vector*> blob_top_vec_2; + shared_ptr > blob_bottom_2(new Blob()); + shared_ptr > blob_middle_2(new Blob()); + shared_ptr > blob_top_2(new Blob()); + blob_bottom_vec_2.push_back(blob_bottom_2.get()); + blob_middle_vec_2.push_back(blob_middle_2.get()); + blob_top_vec_2.push_back(blob_top_2.get()); + blob_bottom_2->CopyFrom(*this->blob_bottom_, false, true); + // SetUp layers + ip.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + prelu.SetUp(this->blob_top_vec_, this->blob_top_vec_); + ip2.SetUp(blob_bottom_vec_2, blob_middle_vec_2); + prelu2.SetUp(blob_middle_vec_2, blob_top_vec_2); + caffe_copy(ip2.blobs()[0]->count(), ip.blobs()[0]->cpu_data(), + ip2.blobs()[0]->mutable_cpu_data()); + // Forward in-place + ip.Reshape(this->blob_bottom_vec_, this->blob_top_vec_); + ip.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + prelu.Reshape(this->blob_top_vec_, this->blob_top_vec_); + prelu.Forward(this->blob_top_vec_, this->blob_top_vec_); + // Forward non-in-place + ip2.Reshape(blob_bottom_vec_2, blob_middle_vec_2); + ip2.Forward(blob_bottom_vec_2, blob_middle_vec_2); + prelu2.Reshape(blob_middle_vec_2, blob_top_vec_2); + prelu2.Forward(blob_middle_vec_2, blob_top_vec_2); + // Check numbers + for (int s = 0; s < blob_top_2->count(); ++s) { + EXPECT_EQ(this->blob_top_->cpu_data()[s], blob_top_2->cpu_data()[s]); + } + // Fill top diff with random numbers + shared_ptr > tmp_blob(new Blob()); + tmp_blob->ReshapeLike(*blob_top_2.get()); + FillerParameter filler_param; + GaussianFiller filler(filler_param); + filler.Fill(tmp_blob.get()); + caffe_copy(blob_top_2->count(), tmp_blob->cpu_data(), + this->blob_top_->mutable_cpu_diff()); + caffe_copy(blob_top_2->count(), tmp_blob->cpu_data(), + blob_top_2->mutable_cpu_diff()); + // Backward in-place + vector propagate_down; + propagate_down.push_back(true); + prelu.Backward(this->blob_top_vec_, propagate_down, this->blob_top_vec_); + ip.Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_); + // Backward non-in-place + prelu2.Backward(blob_top_vec_2, propagate_down, blob_middle_vec_2); + ip2.Backward(blob_middle_vec_2, propagate_down, blob_bottom_vec_2); + // Check numbers + for (int s = 0; s < blob_bottom_2->count(); ++s) { + EXPECT_EQ(this->blob_bottom_->cpu_diff()[s], blob_bottom_2->cpu_diff()[s]); + } + for (int s = 0; s < ip.blobs()[0]->count(); ++s) { + EXPECT_EQ(ip.blobs()[0]->cpu_diff()[s], ip2.blobs()[0]->cpu_diff()[s]); + } + for (int s = 0; s < ip.blobs()[1]->count(); ++s) { + EXPECT_EQ(ip.blobs()[1]->cpu_diff()[s], ip2.blobs()[1]->cpu_diff()[s]); + } + for (int s = 0; s < prelu.blobs()[0]->count(); ++s) { + EXPECT_EQ(prelu.blobs()[0]->cpu_diff()[s], + prelu2.blobs()[0]->cpu_diff()[s]); + } +} + #ifdef USE_CUDNN template class CuDNNNeuronLayerTest : public ::testing::Test { -- 2.7.4