compute the eltwise product gradient using a slower but stabler formula.
EltwiseParameter_EltwiseOp op_;
vector<Dtype> coeffs_;
+
+ bool stable_prod_grad_;
};
/* FlattenLayer
coeffs_[i] = this->layer_param().eltwise_param().coeff(i);
}
}
+ stable_prod_grad_ = this->layer_param_.eltwise_param().stable_prod_grad();
}
template <typename Dtype>
Dtype* bottom_diff = (*bottom)[i]->mutable_cpu_diff();
switch (op_) {
case EltwiseParameter_EltwiseOp_PROD:
- caffe_div(count, top_data, bottom_data, bottom_diff);
+ if (stable_prod_grad_) {
+ bool initialized = false;
+ for (int j = 0; j < bottom->size(); ++j) {
+ if (i == j) { continue; }
+ if (!initialized) {
+ caffe_copy(count, (*bottom)[j]->cpu_data(), bottom_diff);
+ initialized = true;
+ } else {
+ caffe_mul(count, (*bottom)[j]->cpu_data(), bottom_diff,
+ bottom_diff);
+ }
+ }
+ } else {
+ caffe_div(count, top_data, bottom_data, bottom_diff);
+ }
caffe_mul(count, bottom_diff, top_diff, bottom_diff);
break;
case EltwiseParameter_EltwiseOp_SUM:
Dtype* bottom_diff = (*bottom)[i]->mutable_gpu_diff();
switch (op_) {
case EltwiseParameter_EltwiseOp_PROD:
- caffe_gpu_div(count, top_data, bottom_data, bottom_diff);
+ if (stable_prod_grad_) {
+ bool initialized = false;
+ for (int j = 0; j < bottom->size(); ++j) {
+ if (i == j) { continue; }
+ if (!initialized) {
+ caffe_copy(count, (*bottom)[j]->gpu_data(), bottom_diff);
+ initialized = true;
+ } else {
+ caffe_gpu_mul(count, (*bottom)[j]->gpu_data(), bottom_diff,
+ bottom_diff);
+ }
+ }
+ } else {
+ caffe_gpu_div(count, top_data, bottom_data, bottom_diff);
+ }
caffe_gpu_mul(count, bottom_diff, top_diff, bottom_diff);
break;
case EltwiseParameter_EltwiseOp_SUM:
}
optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation
repeated float coeff = 2; // blob-wise coefficient for SUM operation
+
+ // Whether to use an asymptotically slower (for >2 inputs) but stabler method
+ // of computing the gradient for the PROD operation. (No effect for SUM op.)
+ optional bool stable_prod_grad = 3 [default = true];
}
// Message that stores parameters used by ThresholdLayer
}
}
-TYPED_TEST(EltwiseLayerTest, TestProdGradient) {
+TYPED_TEST(EltwiseLayerTest, TestStableProdGradient) {
typedef typename TypeParam::Dtype Dtype;
LayerParameter layer_param;
EltwiseParameter* eltwise_param = layer_param.mutable_eltwise_param();
eltwise_param->set_operation(EltwiseParameter_EltwiseOp_PROD);
+ eltwise_param->set_stable_prod_grad(true);
+ EltwiseLayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-3);
+ checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
+ &(this->blob_top_vec_));
+}
+
+TYPED_TEST(EltwiseLayerTest, TestUnstableProdGradient) {
+ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+ EltwiseParameter* eltwise_param = layer_param.mutable_eltwise_param();
+ eltwise_param->set_operation(EltwiseParameter_EltwiseOp_PROD);
+ eltwise_param->set_stable_prod_grad(false);
EltwiseLayer<Dtype> layer(layer_param);
GradientChecker<Dtype> checker(1e-2, 1e-3);
checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),