weight elementwise sum with per-blob coefficients
authorEvan Shelhamer <shelhamer@imaginarynumber.net>
Fri, 23 May 2014 03:12:11 +0000 (20:12 -0700)
committerEvan Shelhamer <shelhamer@imaginarynumber.net>
Fri, 23 May 2014 03:27:46 +0000 (20:27 -0700)
include/caffe/vision_layers.hpp
src/caffe/layers/eltwise_layer.cpp
src/caffe/layers/eltwise_layer.cu
src/caffe/proto/caffe.proto
src/caffe/test/test_eltwise_layer.cpp

index 3acb8df..07ab577 100644 (file)
@@ -134,6 +134,7 @@ class EltwiseLayer : public Layer<Dtype> {
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
 
   EltwiseParameter_EltwiseOp op_;
+  vector<Dtype> coeffs_;
 };
 
 /* FlattenLayer
index 13cd34d..77eae30 100644 (file)
@@ -6,6 +6,8 @@
 #include "caffe/vision_layers.hpp"
 #include "caffe/util/math_functions.hpp"
 
+using std::fill;
+
 namespace caffe {
 
 template <typename Dtype>
@@ -15,6 +17,13 @@ void EltwiseLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
       "Eltwise Layer takes at least 2 blobs as input.";
   CHECK_EQ(top->size(), 1) <<
       "Eltwise Layer takes a single blob as output.";
+  CHECK(this->layer_param().eltwise_param().coeff_size() == 0
+      || this->layer_param().eltwise_param().coeff_size() == bottom.size()) <<
+      "Eltwise Layer takes one coefficient per bottom blob.";
+  CHECK(!(this->layer_param().eltwise_param().operation()
+      == EltwiseParameter_EltwiseOp_PROD
+      && this->layer_param().eltwise_param().coeff_size())) <<
+      "Eltwise layer only takes coefficients for summation.";
   const int num = bottom[0]->num();
   const int channels = bottom[0]->channels();
   const int height = bottom[0]->height();
@@ -27,6 +36,13 @@ void EltwiseLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
   }
   (*top)[0]->Reshape(num, channels, height, width);
   op_ = this->layer_param_.eltwise_param().operation();
+  // Blob-wise coefficients for the elementwise operation.
+  coeffs_ = vector<Dtype>(bottom.size(), 1);
+  if (this->layer_param().eltwise_param().coeff_size()) {
+    for (int i = 0; i < bottom.size(); ++i) {
+      coeffs_[i] = this->layer_param().eltwise_param().coeff(i);
+    }
+  }
 }
 
 template <typename Dtype>
@@ -42,9 +58,10 @@ Dtype EltwiseLayer<Dtype>::Forward_cpu(
     }
     break;
   case EltwiseParameter_EltwiseOp_SUM:
-    caffe_add(count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), top_data);
-    for (int i = 2; i < bottom.size(); ++i) {
-      caffe_add(count, top_data, bottom[i]->cpu_data(), top_data);
+    caffe_set(count, Dtype(0), top_data);
+    // TODO(shelhamer) does BLAS optimize to sum for coeff = 1?
+    for (int i = 0; i < bottom.size(); ++i) {
+      caffe_axpy(count, coeffs_[i], bottom[i]->cpu_data(), top_data);
     }
     break;
   default:
@@ -69,7 +86,11 @@ void EltwiseLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
         caffe_mul(count, bottom_diff, top_diff, bottom_diff);
         break;
       case EltwiseParameter_EltwiseOp_SUM:
-        caffe_copy(count, top_diff, bottom_diff);
+        if (coeffs_[i] == Dtype(1)) {
+          caffe_copy(count, top_diff, bottom_diff);
+        } else {
+          caffe_cpu_scale(count, coeffs_[i], top_diff, bottom_diff);
+        }
         break;
       default:
         LOG(FATAL) << "Unknown elementwise operation.";
index 8bdb6a3..e1b63b1 100644 (file)
@@ -21,9 +21,10 @@ Dtype EltwiseLayer<Dtype>::Forward_gpu(
     }
     break;
   case EltwiseParameter_EltwiseOp_SUM:
-    caffe_gpu_add(count, bottom[0]->gpu_data(), bottom[1]->gpu_data(), top_data);
-    for (int i = 2; i < bottom.size(); ++i) {
-      caffe_gpu_add(count, top_data, bottom[i]->gpu_data(), top_data);
+    caffe_gpu_set(count, Dtype(0.), top_data);
+    // TODO(shelhamer) does cuBLAS optimize to sum for coeff = 1?
+    for (int i = 0; i < bottom.size(); ++i) {
+      caffe_gpu_axpy(count, coeffs_[i], bottom[i]->gpu_data(), top_data);
     }
     break;
   default:
@@ -48,7 +49,11 @@ void EltwiseLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
         caffe_gpu_mul(count, bottom_diff, top_diff, bottom_diff);
         break;
       case EltwiseParameter_EltwiseOp_SUM:
-        caffe_gpu_copy(count, top_diff, bottom_diff);
+        if (coeffs_[i] == Dtype(1.)) {
+          caffe_gpu_copy(count, top_diff, bottom_diff);
+        } else {
+          caffe_gpu_scale(count, coeffs_[i], top_diff, bottom_diff);
+        }
         break;
       default:
         LOG(FATAL) << "Unknown elementwise operation.";
index da07af0..b792972 100644 (file)
@@ -258,6 +258,7 @@ message EltwiseParameter {
     SUM = 1;
   }
   optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation
+  repeated float coeff = 2; // blob-wise coefficient
 }
 
 // Message that stores parameters used by HDF5DataLayer
index dbb77fc..5f72f62 100644 (file)
@@ -103,6 +103,29 @@ TYPED_TEST(EltwiseLayerTest, TestSumCPU) {
   }
 }
 
+TYPED_TEST(EltwiseLayerTest, TestSumCoeffCPU) {
+  Caffe::set_mode(Caffe::CPU);
+  LayerParameter layer_param;
+  EltwiseParameter* eltwise_param = layer_param.mutable_eltwise_param();
+  eltwise_param->set_operation(EltwiseParameter_EltwiseOp_SUM);
+  eltwise_param->add_coeff(1);
+  eltwise_param->add_coeff(-0.5);
+  eltwise_param->add_coeff(2);
+  shared_ptr<EltwiseLayer<TypeParam> > layer(
+      new EltwiseLayer<TypeParam>(layer_param));
+  layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  layer->Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  const TypeParam* data = this->blob_top_->cpu_data();
+  const int count = this->blob_top_->count();
+  const TypeParam* in_data_a = this->blob_bottom_a_->cpu_data();
+  const TypeParam* in_data_b = this->blob_bottom_b_->cpu_data();
+  const TypeParam* in_data_c = this->blob_bottom_c_->cpu_data();
+  for (int i = 0; i < count; ++i) {
+    EXPECT_NEAR(data[i], in_data_a[i] - 0.5*in_data_b[i] + 2*in_data_c[i],
+        1e-4);
+  }
+}
+
 TYPED_TEST(EltwiseLayerTest, TestProdGPU) {
   Caffe::set_mode(Caffe::GPU);
   LayerParameter layer_param;
@@ -141,6 +164,29 @@ TYPED_TEST(EltwiseLayerTest, TestSumGPU) {
   }
 }
 
+TYPED_TEST(EltwiseLayerTest, TestSumCoeffGPU) {
+  Caffe::set_mode(Caffe::GPU);
+  LayerParameter layer_param;
+  EltwiseParameter* eltwise_param = layer_param.mutable_eltwise_param();
+  eltwise_param->set_operation(EltwiseParameter_EltwiseOp_SUM);
+  eltwise_param->add_coeff(1);
+  eltwise_param->add_coeff(-0.5);
+  eltwise_param->add_coeff(2);
+  shared_ptr<EltwiseLayer<TypeParam> > layer(
+      new EltwiseLayer<TypeParam>(layer_param));
+  layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  layer->Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  const TypeParam* data = this->blob_top_->cpu_data();
+  const int count = this->blob_top_->count();
+  const TypeParam* in_data_a = this->blob_bottom_a_->cpu_data();
+  const TypeParam* in_data_b = this->blob_bottom_b_->cpu_data();
+  const TypeParam* in_data_c = this->blob_bottom_c_->cpu_data();
+  for (int i = 0; i < count; ++i) {
+    EXPECT_NEAR(data[i], in_data_a[i] - 0.5*in_data_b[i] + 2*in_data_c[i],
+        1e-4);
+  }
+}
+
 TYPED_TEST(EltwiseLayerTest, TestProdCPUGradient) {
   Caffe::set_mode(Caffe::CPU);
   LayerParameter layer_param;
@@ -163,6 +209,20 @@ TYPED_TEST(EltwiseLayerTest, TestSumCPUGradient) {
       &(this->blob_top_vec_));
 }
 
+TYPED_TEST(EltwiseLayerTest, TestSumCoeffCPUGradient) {
+  Caffe::set_mode(Caffe::CPU);
+  LayerParameter layer_param;
+  EltwiseParameter* eltwise_param = layer_param.mutable_eltwise_param();
+  eltwise_param->set_operation(EltwiseParameter_EltwiseOp_SUM);
+  eltwise_param->add_coeff(1);
+  eltwise_param->add_coeff(-0.5);
+  eltwise_param->add_coeff(2);
+  EltwiseLayer<TypeParam> layer(layer_param);
+  GradientChecker<TypeParam> checker(1e-2, 1e-3);
+  checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
+      &(this->blob_top_vec_));
+}
+
 TYPED_TEST(EltwiseLayerTest, TestSumGPUGradient) {
   Caffe::set_mode(Caffe::GPU);
   LayerParameter layer_param;
@@ -174,4 +234,18 @@ TYPED_TEST(EltwiseLayerTest, TestSumGPUGradient) {
       &(this->blob_top_vec_));
 }
 
+TYPED_TEST(EltwiseLayerTest, TestSumCoeffGPUGradient) {
+  Caffe::set_mode(Caffe::GPU);
+  LayerParameter layer_param;
+  EltwiseParameter* eltwise_param = layer_param.mutable_eltwise_param();
+  eltwise_param->set_operation(EltwiseParameter_EltwiseOp_SUM);
+  eltwise_param->add_coeff(1);
+  eltwise_param->add_coeff(-0.5);
+  eltwise_param->add_coeff(2);
+  EltwiseLayer<TypeParam> layer(layer_param);
+  GradientChecker<TypeParam> checker(1e-2, 1e-3);
+  checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
+      &(this->blob_top_vec_));
+}
+
 }  // namespace caffe