From a3dcca2a2cc7bd891d13c2303372286e55596fd4 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Mon, 1 Sep 2014 17:15:33 -0700
Subject: [PATCH] add engine parameter for multiple computational strategies

add `engine` switch to layers for selecting a computational backend when
there is a choice. Currently the standard Caffe implementation is the
only backend.
---
 Makefile                           |  5 ++++
 Makefile.config.example            |  3 +++
 src/caffe/layers/conv_layer.cpp    |  7 +++++
 src/caffe/layers/pooling_layer.cpp |  6 +++++
 src/caffe/layers/relu_layer.cpp    |  9 ++++++-
 src/caffe/layers/sigmoid_layer.cpp |  7 +++++
 src/caffe/layers/softmax_layer.cpp |  7 +++++
 src/caffe/layers/tanh_layer.cpp    |  7 +++++
 src/caffe/proto/caffe.proto        | 54 ++++++++++++++++++++++++++++++++++++--
 9 files changed, 102 insertions(+), 3 deletions(-)
diff --git a/Makefile b/Makefile
index 546de93..89f7347 100644
--- a/Makefile
+++ b/Makefile
@@ -257,6 +257,11 @@ else
 	COMMON_FLAGS += -DNDEBUG -O2
 endif
 
+# Computation engine configuration
+ifeq ($(ENGINE), caffe)
+	COMMON_FLAGS += -DCAFFE_ENGINE
+endif
+
 # CPU-only configuration
 ifeq ($(CPU_ONLY), 1)
 	OBJS := $(PROTO_OBJS) $(CXX_OBJS)
diff --git a/Makefile.config.example b/Makefile.config.example
index 7c96d8a..12c4447 100644
--- a/Makefile.config.example
+++ b/Makefile.config.example
@@ -1,6 +1,9 @@
 ## Refer to http://caffe.berkeleyvision.org/installation.html
 # Contributions simplifying and improving our build system are welcome!
 
+# Computation engine switch: currently only the standard Caffe engine.
+ENGINE := caffe
+
 # CPU-only switch (uncomment to build without GPU support).
 # CPU_ONLY := 1
 
diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp
index 1a1248f..43ec6de 100644
--- a/src/caffe/layers/conv_layer.cpp
+++ b/src/caffe/layers/conv_layer.cpp
@@ -12,6 +12,7 @@ template <typename Dtype>
 void ConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top) {
   ConvolutionParameter conv_param = this->layer_param_.convolution_param();
+  // Check configuration.
   CHECK(!conv_param.has_kernel_size() !=
       !(conv_param.has_kernel_h() && conv_param.has_kernel_w()))
       << "Filter size is kernel_size OR kernel_h and kernel_w; not both";
@@ -112,6 +113,12 @@ void ConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
     caffe_set(N_, Dtype(1), bias_multiplier_.mutable_cpu_data());
   }
   this->param_propagate_down_.resize(this->blobs_.size(), true);
+  // Default computation engine.
+#ifdef CAFFE_ENGINE
+  if (conv_param.engine() == ConvolutionParameter_Engine_DEFAULT) {
+    conv_param.set_engine(ConvolutionParameter_Engine_CAFFE);
+  }
+#endif
 }
 
 
diff --git a/src/caffe/layers/pooling_layer.cpp b/src/caffe/layers/pooling_layer.cpp
index 9e77fa2..fd13b1d 100644
--- a/src/caffe/layers/pooling_layer.cpp
+++ b/src/caffe/layers/pooling_layer.cpp
@@ -96,6 +96,12 @@ void PoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
     rand_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,
       pooled_width_);
   }
+  // Default computation engine.
+#ifdef CAFFE_ENGINE
+  if (pool_param.engine() == PoolingParameter_Engine_DEFAULT) {
+    pool_param.set_engine(PoolingParameter_Engine_CAFFE);
+  }
+#endif
 }
 
 // TODO(Yangqing): Is there a faster way to do pooling in the channel-first
diff --git a/src/caffe/layers/relu_layer.cpp b/src/caffe/layers/relu_layer.cpp
index b50352f..7b41f7a 100644
--- a/src/caffe/layers/relu_layer.cpp
+++ b/src/caffe/layers/relu_layer.cpp
@@ -9,14 +9,21 @@ namespace caffe {
 template <typename Dtype>
 void ReLULayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
     vector<Blob<Dtype>*>* top) {
+  ReLUParameter relu_param = this->layer_param_.relu_param();
   const Dtype* bottom_data = bottom[0]->cpu_data();
   Dtype* top_data = (*top)[0]->mutable_cpu_data();
   const int count = bottom[0]->count();
-  Dtype negative_slope = this->layer_param_.relu_param().negative_slope();
+  Dtype negative_slope = relu_param.negative_slope();
   for (int i = 0; i < count; ++i) {
     top_data[i] = std::max(bottom_data[i], Dtype(0))
         + negative_slope * std::min(bottom_data[i], Dtype(0));
   }
+  // Default computation engine.
+#ifdef CAFFE_ENGINE
+  if (relu_param.engine() == ReLUParameter_Engine_DEFAULT) {
+    relu_param.set_engine(ReLUParameter_Engine_CAFFE);
+  }
+#endif
 }
 
 template <typename Dtype>
diff --git a/src/caffe/layers/sigmoid_layer.cpp b/src/caffe/layers/sigmoid_layer.cpp
index d7bba7f..e045b7e 100644
--- a/src/caffe/layers/sigmoid_layer.cpp
+++ b/src/caffe/layers/sigmoid_layer.cpp
@@ -15,12 +15,19 @@ inline Dtype sigmoid(Dtype x) {
 template <typename Dtype>
 void SigmoidLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
     vector<Blob<Dtype>*>* top) {
+  SigmoidParameter sigmoid_param = this->layer_param_.sigmoid_param();
   const Dtype* bottom_data = bottom[0]->cpu_data();
   Dtype* top_data = (*top)[0]->mutable_cpu_data();
   const int count = bottom[0]->count();
   for (int i = 0; i < count; ++i) {
     top_data[i] = sigmoid(bottom_data[i]);
   }
+  // Default computation engine.
+#ifdef CAFFE_ENGINE
+  if (sigmoid_param.engine() == SigmoidParameter_Engine_DEFAULT) {
+    sigmoid_param.set_engine(SigmoidParameter_Engine_CAFFE);
+  }
+#endif
 }
 
 template <typename Dtype>
diff --git a/src/caffe/layers/softmax_layer.cpp b/src/caffe/layers/softmax_layer.cpp
index 29767ac..bfc4116 100644
--- a/src/caffe/layers/softmax_layer.cpp
+++ b/src/caffe/layers/softmax_layer.cpp
@@ -11,6 +11,7 @@ namespace caffe {
 template <typename Dtype>
 void SoftmaxLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top) {
+  SoftmaxParameter softmax_param = this->layer_param_.softmax_param();
   (*top)[0]->Reshape(bottom[0]->num(), bottom[0]->channels(),
       bottom[0]->height(), bottom[0]->width());
   sum_multiplier_.Reshape(1, bottom[0]->channels(), 1, 1);
@@ -19,6 +20,12 @@ void SoftmaxLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
     multiplier_data[i] = 1.;
   }
   scale_.Reshape(bottom[0]->num(), 1, bottom[0]->height(), bottom[0]->width());
+  // Default computation engine.
+#ifdef CAFFE_ENGINE
+  if (softmax_param.engine() == SoftmaxParameter_Engine_DEFAULT) {
+    softmax_param.set_engine(SoftmaxParameter_Engine_CAFFE);
+  }
+#endif
 }
 
 template <typename Dtype>
diff --git a/src/caffe/layers/tanh_layer.cpp b/src/caffe/layers/tanh_layer.cpp
index 8dae005..1ea0073 100644
--- a/src/caffe/layers/tanh_layer.cpp
+++ b/src/caffe/layers/tanh_layer.cpp
@@ -12,6 +12,7 @@ namespace caffe {
 template <typename Dtype>
 void TanHLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
     vector<Blob<Dtype>*>* top) {
+  TanHParameter tanh_param = this->layer_param_.tanh_param();
   const Dtype* bottom_data = bottom[0]->cpu_data();
   Dtype* top_data = (*top)[0]->mutable_cpu_data();
   Dtype exp2x;
@@ -20,6 +21,12 @@ void TanHLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
     exp2x = exp(2 * bottom_data[i]);
     top_data[i] = (exp2x - Dtype(1)) / (exp2x + Dtype(1));
   }
+  // Default computation engine.
+#ifdef CAFFE_ENGINE
+  if (tanh_param.engine() == TanHParameter_Engine_DEFAULT) {
+    tanh_param.set_engine(TanHParameter_Engine_CAFFE);
+  }
+#endif
 }
 
 template <typename Dtype>
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 38db12b..082feb8 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -198,7 +198,7 @@ message NetStateRule {
 // NOTE
 // Update the next available ID when you add a new LayerParameter field.
 //
-// LayerParameter next available ID: 37 (last added: transform_param)
+// LayerParameter next available ID: 40 (last added: softmax_param)
 message LayerParameter {
   repeated string bottom = 2; // the name of the bottom blobs
   repeated string top = 3; // the name of the top blobs
@@ -309,13 +309,21 @@ message LayerParameter {
   optional PoolingParameter pooling_param = 19;
   optional PowerParameter power_param = 21;
   optional ReLUParameter relu_param = 30;
+  optional SigmoidParameter sigmoid_param = 38;
+  optional SoftmaxParameter softmax_param = 39;
   optional SliceParameter slice_param = 31;
+  optional TanHParameter tanh_param = 37;
   optional ThresholdParameter threshold_param = 25;
   optional WindowDataParameter window_data_param = 20;
-  
+
   // Parameters for data pre-processing.
   optional TransformationParameter transform_param = 36;
 
+  // Note: certain layers may have more than one computational engine
+  // for their implementation. These layers include an Engine type and
+  // engine parameter for selecting the implementation.
+  // The default for the engine is set by the ENGINE switch at compile-time.
+
   // DEPRECATED: The layer parameters specified as a V0LayerParameter.
   // This should never be used by any code except to upgrade to the new
   // LayerParameter specification.
@@ -377,6 +385,11 @@ message ConvolutionParameter {
   optional uint32 stride_w = 14; // The stride width
   optional FillerParameter weight_filler = 7; // The filler for the weight
   optional FillerParameter bias_filler = 8; // The filler for the bias
+  enum Engine {
+    DEFAULT = 0;
+    CAFFE = 1;
+  }
+  optional Engine engine = 15 [default = DEFAULT];
 }
 
 // Message that stores parameters used by DataLayer
@@ -563,6 +576,11 @@ message PoolingParameter {
   optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X)
   optional uint32 stride_h = 7; // The stride height
   optional uint32 stride_w = 8; // The stride width
+  enum Engine {
+    DEFAULT = 0;
+    CAFFE = 1;
+  }
+  optional Engine engine = 11 [default = DEFAULT];
 }
 
 // Message that stores parameters used by PowerLayer
@@ -581,6 +599,20 @@ message ReLUParameter {
   // improve neural network acoustic models. In ICML Workshop on Deep Learning
   // for Audio, Speech, and Language Processing.
   optional float negative_slope = 1 [default = 0];
+  enum Engine {
+    DEFAULT = 0;
+    CAFFE = 1;
+  }
+  optional Engine engine = 2 [default = DEFAULT];
+}
+
+// Message that stores parameters used by SigmoidLayer
+message SigmoidParameter {
+  enum Engine {
+    DEFAULT = 0;
+    CAFFE = 1;
+  }
+  optional Engine engine = 1 [default = DEFAULT];
 }
 
 // Message that stores parameters used by SliceLayer
@@ -593,6 +625,24 @@ message SliceParameter {
   repeated uint32 slice_point = 2;
 }
 
+// Message that stores parameters used by SoftmaxLayer, SoftMaxWithLossLayer
+message SoftmaxParameter {
+  enum Engine {
+    DEFAULT = 0;
+    CAFFE = 1;
+  }
+  optional Engine engine = 1 [default = DEFAULT];
+}
+
+// Message that stores parameters used by SigmoidLayer
+message TanHParameter {
+  enum Engine {
+    DEFAULT = 0;
+    CAFFE = 1;
+  }
+  optional Engine engine = 1 [default = DEFAULT];
+}
+
 // Message that stores parameters used by WindowDataLayer
 message WindowDataParameter {
   // Specify the data source.
-- 
2.7.4