const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
bool channel_shared_;
- Blob<Dtype> multiplier_; // dot multipler for backward computation of params
+ Blob<Dtype> multiplier_; // dot multiplier for backward computation of params
+ Blob<Dtype> backward_buff_; // temporary buffer for backward computation
Blob<Dtype> bottom_memory_; // memory for in-place computation
};
// Propagate gradients to the parameters (as directed by backward pass).
this->param_propagate_down_.resize(this->blobs_.size(), true);
- multiplier_.Reshape(vector<int>(1, bottom[0]->count() / bottom[0]->num()));
+ multiplier_.Reshape(vector<int>(1, bottom[0]->count(1)));
+ backward_buff_.Reshape(vector<int>(1, bottom[0]->count(1)));
caffe_set(multiplier_.count(), Dtype(1), multiplier_.mutable_cpu_data());
}
int cdim = channels * dim;
Dtype dsum = 0.;
for (int n = 0; n < bottom[0]->num(); ++n) {
- Dtype* temp_buff = multiplier_.mutable_gpu_diff();
// compute element-wise diff
// NOLINT_NEXT_LINE(whitespace/operators)
PReLUParamBackward<Dtype><<<CAFFE_GET_BLOCKS(cdim),
CAFFE_CUDA_NUM_THREADS>>>(
cdim, top_diff + top[0]->offset(n),
- bottom_data + bottom[0]->offset(n), multiplier_.mutable_gpu_diff());
+ bottom_data + bottom[0]->offset(n),
+ backward_buff_.mutable_gpu_diff());
CUDA_POST_KERNEL_CHECK;
if (channel_shared_) {
Dtype d;
- caffe_gpu_dot<Dtype>(channels * dim, multiplier_.gpu_diff(),
+ caffe_gpu_dot<Dtype>(channels * dim, backward_buff_.gpu_diff(),
multiplier_.gpu_data(), &d);
dsum += d;
} else {
caffe_gpu_gemv<Dtype>(CblasNoTrans, channels, dim, 1.,
- multiplier_.gpu_diff(), multiplier_.gpu_data(), 1.,
+ backward_buff_.gpu_diff(), multiplier_.gpu_data(), 1.,
slope_diff);
}
}