ReLU6 layer ocl support

author Li Peng <peng.li@intel.com>

Fri, 16 Feb 2018 07:37:59 +0000 (15:37 +0800)

committer Li Peng <peng.li@intel.com>

Tue, 20 Feb 2018 07:11:09 +0000 (15:11 +0800)
author Li Peng <peng.li@intel.com>
Fri, 16 Feb 2018 07:37:59 +0000 (15:37 +0800)
committer Li Peng <peng.li@intel.com>
Tue, 20 Feb 2018 07:11:09 +0000 (15:11 +0800)
diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp

index 4219108..34f5616 100644 (file)
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@@ -406,6 +406,8 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
      class CV_EXPORTS ReLU6Layer : public ActivationLayer
      {
      public:
+        float minValue, maxValue;
+
          static Ptr<ReLU6Layer> create(const LayerParams &params);
      };
  
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp

index 31ae173..4b2b0a0 100644 (file)
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -1439,6 +1439,7 @@ struct Net::Impl
                           nextData &&
                          ((nextData->type == "ReLU") ||
                           (nextData->type == "ChannelsPReLU") ||
+                         (nextData->type == "ReLU6") ||
                           (nextData->type == "TanH") ||
                           (nextData->type == "Power"))) )
                  {
diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp

index 51a28d9..2517499 100644 (file)
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -860,6 +860,15 @@ public:
                  activType = OCL4DNN_CONV_FUSED_ACTIV_RELU;
              }
  
+            Ptr<ReLU6Layer> activ_relu6 = activ.dynamicCast<ReLU6Layer>();
+            if( !activ_relu6.empty() )
+            {
+                reluslope.resize(2);
+                reluslope[0] = activ_relu6->minValue;
+                reluslope[1] = activ_relu6->maxValue;
+                activType = OCL4DNN_CONV_FUSED_ACTIV_RELU6;
+            }
+
              Ptr<ChannelsPReLULayer> activ_chprelu = activ.dynamicCast<ChannelsPReLULayer>();
              if( !activ_chprelu.empty() )
              {
@@ -906,12 +915,17 @@ public:
              {
                  convolutionOp->setActivTanh(true);
              }
+            else if ( activType == OCL4DNN_CONV_FUSED_ACTIV_RELU6)
+            {
+                convolutionOp->setActivReLU6(true, reluslope[0], reluslope[1]);
+            }
              else
              {
                  convolutionOp->setActivReLU(false, 0);
                  convolutionOp->setActivPReLU(false, reluslope);
                  convolutionOp->setActivPower(false, 1.f);
                  convolutionOp->setActivTanh(false);
+                convolutionOp->setActivReLU6(false, 0, 0);
              }
              newActiv = false;
          }
diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp

index dbbe6ca..adf51c8 100644 (file)
--- a/modules/dnn/src/layers/elementwise_layers.cpp
+++ b/modules/dnn/src/layers/elementwise_layers.cpp
@@ -381,8 +381,30 @@ struct ReLU6Functor
  #ifdef HAVE_OPENCL
      bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
      {
-        // TODO: implement OCL version
-        return false;
+        std::vector<UMat> inputs;
+        std::vector<UMat> outputs;
+
+        inps.getUMatVector(inputs);
+        outs.getUMatVector(outputs);
+        String buildopt = oclGetTMacro(inputs[0]);
+
+        for (size_t i = 0; i < inputs.size(); i++)
+        {
+            UMat& src = inputs[i];
+            UMat& dst = outputs[i];
+
+            ocl::Kernel kernel("ReLU6Forward", ocl::dnn::activations_oclsrc, buildopt);
+            kernel.set(0, (int)src.total());
+            kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
+            kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
+            kernel.set(3, (float)minValue);
+            kernel.set(4, (float)maxValue);
+
+            size_t gSize = src.total();
+            CV_Assert(kernel.run(1, &gSize, NULL, false));
+        }
+
+        return true;
      }
  #endif
  
@@ -867,6 +889,9 @@ Ptr<ReLU6Layer> ReLU6Layer::create(const LayerParams& params)
      float maxValue = params.get<float>("max_value", 6.0f);
      Ptr<ReLU6Layer> l(new ElementWiseLayer<ReLU6Functor>(ReLU6Functor(minValue, maxValue)));
      l->setParamsFrom(params);
+    l->minValue = minValue;
+    l->maxValue = maxValue;
+
      return l;
  }
  
diff --git a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp

index b536ce4..93ac1a2 100644 (file)
--- a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
+++ b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
@@ -78,7 +78,8 @@ typedef enum {
      OCL4DNN_CONV_FUSED_ACTIV_RELU                 = 1,
      OCL4DNN_CONV_FUSED_ACTIV_PRELU                = 2,
      OCL4DNN_CONV_FUSED_ACTIV_POWER                = 3,
-    OCL4DNN_CONV_FUSED_ACTIV_TANH                 = 4
+    OCL4DNN_CONV_FUSED_ACTIV_TANH                 = 4,
+    OCL4DNN_CONV_FUSED_ACTIV_RELU6                = 5
  } ocl4dnnFusedActiv_t;
  
  template<typename Dtype>
@@ -96,6 +97,7 @@ class OCL4DNNConvSpatial
          void setActivPReLU(bool fuse_activ, std::vector<float> &slope);
          void setActivPower(bool fuse_activ, float power);
          void setActivTanh(bool fuse_activ);
+        void setActivReLU6(bool fuse_activ, float min, float max);
          void setBias(bool bias_term);
  
      private:
@@ -319,6 +321,8 @@ class OCL4DNNConvSpatial
          cv::ocl::ProgramSource src_;
          int32_t prev_kernel_type_;
          float negative_slope_;
+        float min_value_;
+        float max_value_;
          UMat negative_slope_umat_;
          ocl4dnnFusedActiv_t fused_activ_;
          float power_;
diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp

index 16bea92..84ea191 100644 (file)
--- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
+++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
@@ -82,6 +82,8 @@ OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
      fused_eltwise_ = false;
      power_ = 1.f;
      negative_slope_ = 0;
+    min_value_ = 0;
+    max_value_ = 0;
      prev_kernel_type_ = -1;
      tuned_ = false;
  
@@ -162,6 +164,9 @@ void OCL4DNNConvSpatial<Dtype>::setFusionDefine(ocl4dnnFusedActiv_t fused_activ,
          case OCL4DNN_CONV_FUSED_ACTIV_TANH:
              addDef("FUSED_CONV_TANH", 1);
              break;
+        case OCL4DNN_CONV_FUSED_ACTIV_RELU6:
+            addDef("FUSED_CONV_RELU6", 1);
+            break;
          default:
              ;
      }
@@ -184,6 +189,10 @@ void OCL4DNNConvSpatial<Dtype>::setFusionArg(ocl4dnnFusedActiv_t fused_activ, bo
          case OCL4DNN_CONV_FUSED_ACTIV_POWER:
              kernel.set(argIdx++, (float)power_);
              break;
+        case OCL4DNN_CONV_FUSED_ACTIV_RELU6:
+            kernel.set(argIdx++, (float)min_value_);
+            kernel.set(argIdx++, (float)max_value_);
+            break;
          default:
              ;
      }
@@ -394,6 +403,19 @@ void OCL4DNNConvSpatial<Dtype>::setActivReLU(bool fuse_activ, float slope)
  }
  
  template<typename Dtype>
+void OCL4DNNConvSpatial<Dtype>::setActivReLU6(bool fuse_activ, float min, float max)
+{
+    if ( fuse_activ )
+    {
+        fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_RELU6;
+        min_value_ = min;
+        max_value_ = max;
+    }
+    else
+        fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE;
+}
+
+template<typename Dtype>
  void OCL4DNNConvSpatial<Dtype>::setActivPReLU(bool fuse_activ, std::vector<float> &slope)
  {
      if ( fuse_activ )
diff --git a/modules/dnn/src/opencl/activations.cl b/modules/dnn/src/opencl/activations.cl

index df58a51..ab2532e 100644 (file)
--- a/modules/dnn/src/opencl/activations.cl
+++ b/modules/dnn/src/opencl/activations.cl
@@ -54,6 +54,17 @@ __kernel void ReLUForward(const int count, __global const T* in, __global T* out
  #endif
  }
  
+__kernel void ReLU6Forward(const int count, __global const T* in, __global T* out,
+                           const T minValue, const T maxValue)
+{
+  int index = get_global_id(0);
+  if(index < count)
+  {
+    T x = in[index];
+    out[index] = clamp(x, minValue, maxValue);
+  }
+}
+
  __kernel void PReLUForward(const int count, const int channels, const int plane_size,
                             __global const T* in, __global T* out, __global const T* slope_data)
  {
diff --git a/modules/dnn/src/opencl/conv_layer_spatial.cl b/modules/dnn/src/opencl/conv_layer_spatial.cl

index e31d173..8f6e5a3 100644 (file)
--- a/modules/dnn/src/opencl/conv_layer_spatial.cl
+++ b/modules/dnn/src/opencl/conv_layer_spatial.cl
@@ -48,19 +48,22 @@
  
  #if defined(FUSED_CONV_RELU)
  #define ACTIVATION_RELU_FUNCTION(x, c) ((Dtype)(x) > 0 ? (Dtype)(x) : ((Dtype)(x) * (Dtype)(negative_slope)))
-#define NEGATIVE_SLOPE_ARG Dtype negative_slope,
+#define FUSED_ARG Dtype negative_slope,
  #elif defined(FUSED_CONV_PRELU)
  #define ACTIVATION_RELU_FUNCTION(x, c) ((Dtype)(x) > 0 ? (Dtype)(x) : ((Dtype)(x) * (Dtype)(negative_slope[c])))
-#define NEGATIVE_SLOPE_ARG __global const Dtype *negative_slope,
+#define FUSED_ARG __global const Dtype *negative_slope,
  #elif defined(FUSED_CONV_POWER)
  #define ACTIVATION_RELU_FUNCTION(x, c) pow(x, power)
-#define NEGATIVE_SLOPE_ARG Dtype power,
+#define FUSED_ARG Dtype power,
  #elif defined(FUSED_CONV_TANH)
  #define ACTIVATION_RELU_FUNCTION(x, c) tanh(x)
-#define NEGATIVE_SLOPE_ARG
+#define FUSED_ARG
+#elif defined(FUSED_CONV_RELU6)
+#define ACTIVATION_RELU_FUNCTION(x, c) (clamp((Dtype)(x), min_value, max_value))
+#define FUSED_ARG Dtype min_value, Dtype max_value,
  #else
  #define ACTIVATION_RELU_FUNCTION(x, c) (x)
-#define NEGATIVE_SLOPE_ARG
+#define FUSED_ARG
  #endif
  
  #ifdef FUSED_CONV_ELTWISE
@@ -108,7 +111,7 @@
  
  __kernel void ConvolveBasic(
      ELTWISE_DATA_ARG
-    NEGATIVE_SLOPE_ARG
+    FUSED_ARG
      __global Dtype* image_data,
      int image_offset,
      __global Dtype* kernel_data,
@@ -197,7 +200,7 @@ __attribute__((intel_reqd_sub_group_size(SIMD_SIZE)))
  __kernel void
  convolve_simd(
      ELTWISE_DATA_ARG
-    NEGATIVE_SLOPE_ARG
+    FUSED_ARG
      __global Dtype* inputs_base,
      filter_qualifier Dtype* weights_base,
      BIAS_KERNEL_ARG
@@ -417,7 +420,7 @@ typedef struct float0 { float s0; } float0; //never used but makes compiler happ
  
  #define GEMM_LIKE_KERNEL_ARGS     \
      ELTWISE_DATA_ARG              \
-    NEGATIVE_SLOPE_ARG            \
+    FUSED_ARG                     \
      const __global Dtype *src0,   \
      const __global Dtype *src1,   \
      BIAS_KERNEL_ARG               \
@@ -1731,7 +1734,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
  
  __kernel void DWCONV(
      ELTWISE_DATA_ARG
-    NEGATIVE_SLOPE_ARG
+    FUSED_ARG
      __global Dtype* image_data,
      __global Dtype* kernel_data,
      BIAS_KERNEL_ARG
author	Li Peng <peng.li@intel.com>
	Fri, 16 Feb 2018 07:37:59 +0000 (15:37 +0800)
committer	Li Peng <peng.li@intel.com>
	Tue, 20 Feb 2018 07:11:09 +0000 (15:11 +0800)
modules/dnn/include/opencv2/dnn/all_layers.hpp		patch \| blob \| history
modules/dnn/src/dnn.cpp		patch \| blob \| history
modules/dnn/src/layers/convolution_layer.cpp		patch \| blob \| history
modules/dnn/src/layers/elementwise_layers.cpp		patch \| blob \| history
modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp		patch \| blob \| history
modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp		patch \| blob \| history
modules/dnn/src/opencl/activations.cl		patch \| blob \| history
modules/dnn/src/opencl/conv_layer_spatial.cl		patch \| blob \| history