From 2863f950d634ed456506362669355aa202df8508 Mon Sep 17 00:00:00 2001 From: Li Peng Date: Fri, 16 Feb 2018 15:37:59 +0800 Subject: [PATCH] ReLU6 layer ocl support include relu6 ocl kernel and layer fusion support Signed-off-by: Li Peng --- modules/dnn/include/opencv2/dnn/all_layers.hpp | 2 ++ modules/dnn/src/dnn.cpp | 1 + modules/dnn/src/layers/convolution_layer.cpp | 14 +++++++++++ modules/dnn/src/layers/elementwise_layers.cpp | 29 ++++++++++++++++++++-- modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp | 6 ++++- .../dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp | 22 ++++++++++++++++ modules/dnn/src/opencl/activations.cl | 11 ++++++++ modules/dnn/src/opencl/conv_layer_spatial.cl | 21 +++++++++------- 8 files changed, 94 insertions(+), 12 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 4219108..34f5616 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -406,6 +406,8 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN class CV_EXPORTS ReLU6Layer : public ActivationLayer { public: + float minValue, maxValue; + static Ptr create(const LayerParams ¶ms); }; diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 31ae173..4b2b0a0 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -1439,6 +1439,7 @@ struct Net::Impl nextData && ((nextData->type == "ReLU") || (nextData->type == "ChannelsPReLU") || + (nextData->type == "ReLU6") || (nextData->type == "TanH") || (nextData->type == "Power"))) ) { diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 51a28d9..2517499 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -860,6 +860,15 @@ public: activType = OCL4DNN_CONV_FUSED_ACTIV_RELU; } + Ptr activ_relu6 = activ.dynamicCast(); + if( !activ_relu6.empty() ) + { + reluslope.resize(2); + reluslope[0] = activ_relu6->minValue; + reluslope[1] = activ_relu6->maxValue; + activType = OCL4DNN_CONV_FUSED_ACTIV_RELU6; + } + Ptr activ_chprelu = activ.dynamicCast(); if( !activ_chprelu.empty() ) { @@ -906,12 +915,17 @@ public: { convolutionOp->setActivTanh(true); } + else if ( activType == OCL4DNN_CONV_FUSED_ACTIV_RELU6) + { + convolutionOp->setActivReLU6(true, reluslope[0], reluslope[1]); + } else { convolutionOp->setActivReLU(false, 0); convolutionOp->setActivPReLU(false, reluslope); convolutionOp->setActivPower(false, 1.f); convolutionOp->setActivTanh(false); + convolutionOp->setActivReLU6(false, 0, 0); } newActiv = false; } diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index dbbe6ca..adf51c8 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -381,8 +381,30 @@ struct ReLU6Functor #ifdef HAVE_OPENCL bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) { - // TODO: implement OCL version - return false; + std::vector inputs; + std::vector outputs; + + inps.getUMatVector(inputs); + outs.getUMatVector(outputs); + String buildopt = oclGetTMacro(inputs[0]); + + for (size_t i = 0; i < inputs.size(); i++) + { + UMat& src = inputs[i]; + UMat& dst = outputs[i]; + + ocl::Kernel kernel("ReLU6Forward", ocl::dnn::activations_oclsrc, buildopt); + kernel.set(0, (int)src.total()); + kernel.set(1, ocl::KernelArg::PtrReadOnly(src)); + kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst)); + kernel.set(3, (float)minValue); + kernel.set(4, (float)maxValue); + + size_t gSize = src.total(); + CV_Assert(kernel.run(1, &gSize, NULL, false)); + } + + return true; } #endif @@ -867,6 +889,9 @@ Ptr ReLU6Layer::create(const LayerParams& params) float maxValue = params.get("max_value", 6.0f); Ptr l(new ElementWiseLayer(ReLU6Functor(minValue, maxValue))); l->setParamsFrom(params); + l->minValue = minValue; + l->maxValue = maxValue; + return l; } diff --git a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp index b536ce4..93ac1a2 100644 --- a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp +++ b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp @@ -78,7 +78,8 @@ typedef enum { OCL4DNN_CONV_FUSED_ACTIV_RELU = 1, OCL4DNN_CONV_FUSED_ACTIV_PRELU = 2, OCL4DNN_CONV_FUSED_ACTIV_POWER = 3, - OCL4DNN_CONV_FUSED_ACTIV_TANH = 4 + OCL4DNN_CONV_FUSED_ACTIV_TANH = 4, + OCL4DNN_CONV_FUSED_ACTIV_RELU6 = 5 } ocl4dnnFusedActiv_t; template @@ -96,6 +97,7 @@ class OCL4DNNConvSpatial void setActivPReLU(bool fuse_activ, std::vector &slope); void setActivPower(bool fuse_activ, float power); void setActivTanh(bool fuse_activ); + void setActivReLU6(bool fuse_activ, float min, float max); void setBias(bool bias_term); private: @@ -319,6 +321,8 @@ class OCL4DNNConvSpatial cv::ocl::ProgramSource src_; int32_t prev_kernel_type_; float negative_slope_; + float min_value_; + float max_value_; UMat negative_slope_umat_; ocl4dnnFusedActiv_t fused_activ_; float power_; diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp index 16bea92..84ea191 100644 --- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp +++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp @@ -82,6 +82,8 @@ OCL4DNNConvSpatial::OCL4DNNConvSpatial(OCL4DNNConvConfig config) fused_eltwise_ = false; power_ = 1.f; negative_slope_ = 0; + min_value_ = 0; + max_value_ = 0; prev_kernel_type_ = -1; tuned_ = false; @@ -162,6 +164,9 @@ void OCL4DNNConvSpatial::setFusionDefine(ocl4dnnFusedActiv_t fused_activ, case OCL4DNN_CONV_FUSED_ACTIV_TANH: addDef("FUSED_CONV_TANH", 1); break; + case OCL4DNN_CONV_FUSED_ACTIV_RELU6: + addDef("FUSED_CONV_RELU6", 1); + break; default: ; } @@ -184,6 +189,10 @@ void OCL4DNNConvSpatial::setFusionArg(ocl4dnnFusedActiv_t fused_activ, bo case OCL4DNN_CONV_FUSED_ACTIV_POWER: kernel.set(argIdx++, (float)power_); break; + case OCL4DNN_CONV_FUSED_ACTIV_RELU6: + kernel.set(argIdx++, (float)min_value_); + kernel.set(argIdx++, (float)max_value_); + break; default: ; } @@ -394,6 +403,19 @@ void OCL4DNNConvSpatial::setActivReLU(bool fuse_activ, float slope) } template +void OCL4DNNConvSpatial::setActivReLU6(bool fuse_activ, float min, float max) +{ + if ( fuse_activ ) + { + fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_RELU6; + min_value_ = min; + max_value_ = max; + } + else + fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE; +} + +template void OCL4DNNConvSpatial::setActivPReLU(bool fuse_activ, std::vector &slope) { if ( fuse_activ ) diff --git a/modules/dnn/src/opencl/activations.cl b/modules/dnn/src/opencl/activations.cl index df58a51..ab2532e 100644 --- a/modules/dnn/src/opencl/activations.cl +++ b/modules/dnn/src/opencl/activations.cl @@ -54,6 +54,17 @@ __kernel void ReLUForward(const int count, __global const T* in, __global T* out #endif } +__kernel void ReLU6Forward(const int count, __global const T* in, __global T* out, + const T minValue, const T maxValue) +{ + int index = get_global_id(0); + if(index < count) + { + T x = in[index]; + out[index] = clamp(x, minValue, maxValue); + } +} + __kernel void PReLUForward(const int count, const int channels, const int plane_size, __global const T* in, __global T* out, __global const T* slope_data) { diff --git a/modules/dnn/src/opencl/conv_layer_spatial.cl b/modules/dnn/src/opencl/conv_layer_spatial.cl index e31d173..8f6e5a3 100644 --- a/modules/dnn/src/opencl/conv_layer_spatial.cl +++ b/modules/dnn/src/opencl/conv_layer_spatial.cl @@ -48,19 +48,22 @@ #if defined(FUSED_CONV_RELU) #define ACTIVATION_RELU_FUNCTION(x, c) ((Dtype)(x) > 0 ? (Dtype)(x) : ((Dtype)(x) * (Dtype)(negative_slope))) -#define NEGATIVE_SLOPE_ARG Dtype negative_slope, +#define FUSED_ARG Dtype negative_slope, #elif defined(FUSED_CONV_PRELU) #define ACTIVATION_RELU_FUNCTION(x, c) ((Dtype)(x) > 0 ? (Dtype)(x) : ((Dtype)(x) * (Dtype)(negative_slope[c]))) -#define NEGATIVE_SLOPE_ARG __global const Dtype *negative_slope, +#define FUSED_ARG __global const Dtype *negative_slope, #elif defined(FUSED_CONV_POWER) #define ACTIVATION_RELU_FUNCTION(x, c) pow(x, power) -#define NEGATIVE_SLOPE_ARG Dtype power, +#define FUSED_ARG Dtype power, #elif defined(FUSED_CONV_TANH) #define ACTIVATION_RELU_FUNCTION(x, c) tanh(x) -#define NEGATIVE_SLOPE_ARG +#define FUSED_ARG +#elif defined(FUSED_CONV_RELU6) +#define ACTIVATION_RELU_FUNCTION(x, c) (clamp((Dtype)(x), min_value, max_value)) +#define FUSED_ARG Dtype min_value, Dtype max_value, #else #define ACTIVATION_RELU_FUNCTION(x, c) (x) -#define NEGATIVE_SLOPE_ARG +#define FUSED_ARG #endif #ifdef FUSED_CONV_ELTWISE @@ -108,7 +111,7 @@ __kernel void ConvolveBasic( ELTWISE_DATA_ARG - NEGATIVE_SLOPE_ARG + FUSED_ARG __global Dtype* image_data, int image_offset, __global Dtype* kernel_data, @@ -197,7 +200,7 @@ __attribute__((intel_reqd_sub_group_size(SIMD_SIZE))) __kernel void convolve_simd( ELTWISE_DATA_ARG - NEGATIVE_SLOPE_ARG + FUSED_ARG __global Dtype* inputs_base, filter_qualifier Dtype* weights_base, BIAS_KERNEL_ARG @@ -417,7 +420,7 @@ typedef struct float0 { float s0; } float0; //never used but makes compiler happ #define GEMM_LIKE_KERNEL_ARGS \ ELTWISE_DATA_ARG \ - NEGATIVE_SLOPE_ARG \ + FUSED_ARG \ const __global Dtype *src0, \ const __global Dtype *src1, \ BIAS_KERNEL_ARG \ @@ -1731,7 +1734,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS) __kernel void DWCONV( ELTWISE_DATA_ARG - NEGATIVE_SLOPE_ARG + FUSED_ARG __global Dtype* image_data, __global Dtype* kernel_data, BIAS_KERNEL_ARG -- 2.7.4