Add "stable_prod_grad" option (on by default) to ELTWISE layer to
authorJeff Donahue <jeff.donahue@gmail.com>
Mon, 25 Aug 2014 23:10:22 +0000 (16:10 -0700)
committerJeff Donahue <jeff.donahue@gmail.com>
Mon, 25 Aug 2014 23:19:48 +0000 (16:19 -0700)
compute the eltwise product gradient using a slower but stabler formula.

include/caffe/common_layers.hpp
src/caffe/layers/eltwise_layer.cpp
src/caffe/layers/eltwise_layer.cu
src/caffe/proto/caffe.proto
src/caffe/test/test_eltwise_layer.cpp

index ed47dbc..1b50e95 100644 (file)
@@ -115,6 +115,8 @@ class EltwiseLayer : public Layer<Dtype> {
 
   EltwiseParameter_EltwiseOp op_;
   vector<Dtype> coeffs_;
+
+  bool stable_prod_grad_;
 };
 
 /* FlattenLayer
index ec6a46f..56a1920 100644 (file)
@@ -35,6 +35,7 @@ void EltwiseLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       coeffs_[i] = this->layer_param().eltwise_param().coeff(i);
     }
   }
+  stable_prod_grad_ = this->layer_param_.eltwise_param().stable_prod_grad();
 }
 
 template <typename Dtype>
@@ -73,7 +74,21 @@ void EltwiseLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
       Dtype* bottom_diff = (*bottom)[i]->mutable_cpu_diff();
       switch (op_) {
       case EltwiseParameter_EltwiseOp_PROD:
-        caffe_div(count, top_data, bottom_data, bottom_diff);
+        if (stable_prod_grad_) {
+          bool initialized = false;
+          for (int j = 0; j < bottom->size(); ++j) {
+            if (i == j) { continue; }
+            if (!initialized) {
+              caffe_copy(count, (*bottom)[j]->cpu_data(), bottom_diff);
+              initialized = true;
+            } else {
+              caffe_mul(count, (*bottom)[j]->cpu_data(), bottom_diff,
+                        bottom_diff);
+            }
+          }
+        } else {
+          caffe_div(count, top_data, bottom_data, bottom_diff);
+        }
         caffe_mul(count, bottom_diff, top_diff, bottom_diff);
         break;
       case EltwiseParameter_EltwiseOp_SUM:
index 4b38949..e005cb9 100644 (file)
@@ -43,7 +43,21 @@ void EltwiseLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
       Dtype* bottom_diff = (*bottom)[i]->mutable_gpu_diff();
       switch (op_) {
       case EltwiseParameter_EltwiseOp_PROD:
-        caffe_gpu_div(count, top_data, bottom_data, bottom_diff);
+        if (stable_prod_grad_) {
+          bool initialized = false;
+          for (int j = 0; j < bottom->size(); ++j) {
+            if (i == j) { continue; }
+            if (!initialized) {
+              caffe_copy(count, (*bottom)[j]->gpu_data(), bottom_diff);
+              initialized = true;
+            } else {
+              caffe_gpu_mul(count, (*bottom)[j]->gpu_data(), bottom_diff,
+                            bottom_diff);
+            }
+          }
+        } else {
+          caffe_gpu_div(count, top_data, bottom_data, bottom_diff);
+        }
         caffe_gpu_mul(count, bottom_diff, top_diff, bottom_diff);
         break;
       case EltwiseParameter_EltwiseOp_SUM:
index 9c08252..49a6e14 100644 (file)
@@ -420,6 +420,10 @@ message EltwiseParameter {
   }
   optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation
   repeated float coeff = 2; // blob-wise coefficient for SUM operation
+
+  // Whether to use an asymptotically slower (for >2 inputs) but stabler method
+  // of computing the gradient for the PROD operation. (No effect for SUM op.)
+  optional bool stable_prod_grad = 3 [default = true];
 }
 
 // Message that stores parameters used by ThresholdLayer
index 4c17dfd..da5e353 100644 (file)
@@ -124,11 +124,24 @@ TYPED_TEST(EltwiseLayerTest, TestSumCoeff) {
   }
 }
 
-TYPED_TEST(EltwiseLayerTest, TestProdGradient) {
+TYPED_TEST(EltwiseLayerTest, TestStableProdGradient) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
   EltwiseParameter* eltwise_param = layer_param.mutable_eltwise_param();
   eltwise_param->set_operation(EltwiseParameter_EltwiseOp_PROD);
+  eltwise_param->set_stable_prod_grad(true);
+  EltwiseLayer<Dtype> layer(layer_param);
+  GradientChecker<Dtype> checker(1e-2, 1e-3);
+  checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
+      &(this->blob_top_vec_));
+}
+
+TYPED_TEST(EltwiseLayerTest, TestUnstableProdGradient) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  EltwiseParameter* eltwise_param = layer_param.mutable_eltwise_param();
+  eltwise_param->set_operation(EltwiseParameter_EltwiseOp_PROD);
+  eltwise_param->set_stable_prod_grad(false);
   EltwiseLayer<Dtype> layer(layer_param);
   GradientChecker<Dtype> checker(1e-2, 1e-3);
   checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),