From 7bc017601f64a3b683182b3b7a176461eed5e069 Mon Sep 17 00:00:00 2001 From: Li Peng Date: Thu, 23 Nov 2017 22:39:10 +0800 Subject: [PATCH] Power, Tanh and Channels ReLU layer ocl support Signed-off-by: Li Peng --- modules/dnn/src/layers/elementwise_layers.cpp | 83 ++++++++++++++++++++++++--- modules/dnn/src/opencl/activations.cl | 9 +++ modules/dnn/test/test_layers.cpp | 6 ++ 3 files changed, 90 insertions(+), 8 deletions(-) diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index d427e60..cc66fb0 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -267,7 +267,6 @@ struct ReLUFunctor bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) { - size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize(); std::vector inputs; std::vector outputs; @@ -287,7 +286,7 @@ struct ReLUFunctor kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst)); size_t gSize = src.total(); - CV_Assert(kernel.run(1, &gSize, &wgSize, false)); + CV_Assert(kernel.run(1, &gSize, NULL, false)); } return true; @@ -395,8 +394,28 @@ struct TanHFunctor #ifdef HAVE_OPENCL bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) { - // TODO: implement OCL version - return false; + std::vector inputs; + std::vector outputs; + + inps.getUMatVector(inputs); + outs.getUMatVector(outputs); + String buildopt = oclGetTMacro(inputs[0]); + + for (size_t i = 0; i < inputs.size(); i++) + { + UMat& src = inputs[i]; + UMat& dst = outputs[i]; + + ocl::Kernel kernel("TanHForward", ocl::dnn::activations_oclsrc, buildopt); + kernel.set(0, (int)src.total()); + kernel.set(1, ocl::KernelArg::PtrReadOnly(src)); + kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst)); + + size_t gSize = src.total(); + CV_Assert(kernel.run(1, &gSize, NULL, false)); + } + + return true; } #endif @@ -594,8 +613,31 @@ struct PowerFunctor #ifdef HAVE_OPENCL bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) { - // TODO: implement OCL version - return false; + std::vector inputs; + std::vector outputs; + + inps.getUMatVector(inputs); + outs.getUMatVector(outputs); + String buildopt = oclGetTMacro(inputs[0]); + + for (size_t i = 0; i < inputs.size(); i++) + { + UMat& src = inputs[i]; + UMat& dst = outputs[i]; + + ocl::Kernel kernel("PowForward", ocl::dnn::activations_oclsrc, buildopt); + kernel.set(0, (int)src.total()); + kernel.set(1, ocl::KernelArg::PtrReadOnly(src)); + kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst)); + kernel.set(3, (float)power); + kernel.set(4, (float)scale); + kernel.set(5, (float)shift); + + size_t gSize = src.total(); + CV_Assert(kernel.run(1, &gSize, NULL, false)); + } + + return true; } #endif @@ -624,9 +666,11 @@ struct ChannelsPReLUFunctor { typedef ChannelsPReLULayer Layer; Mat scale; + UMat scale_umat; explicit ChannelsPReLUFunctor(const Mat& scale_=Mat()) : scale(scale_) { + scale_umat = scale.getUMat(ACCESS_READ); } void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const @@ -669,8 +713,31 @@ struct ChannelsPReLUFunctor #ifdef HAVE_OPENCL bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) { - // TODO: implement OCL version - return false; + std::vector inputs; + std::vector outputs; + + inps.getUMatVector(inputs); + outs.getUMatVector(outputs); + String buildopt = oclGetTMacro(inputs[0]); + + for (size_t i = 0; i < inputs.size(); i++) + { + UMat& src = inputs[i]; + UMat& dst = outputs[i]; + + ocl::Kernel kernel("PReLUForward", ocl::dnn::activations_oclsrc, buildopt); + kernel.set(0, (int)src.total()); + kernel.set(1, (int)src.size[1]); + kernel.set(2, (int)total(shape(src), 2)); + kernel.set(3, ocl::KernelArg::PtrReadOnly(src)); + kernel.set(4, ocl::KernelArg::PtrWriteOnly(dst)); + kernel.set(5, ocl::KernelArg::PtrReadOnly(scale_umat)); + + size_t gSize = src.total(); + CV_Assert(kernel.run(1, &gSize, NULL, false)); + } + + return true; } #endif diff --git a/modules/dnn/src/opencl/activations.cl b/modules/dnn/src/opencl/activations.cl index 0649f2e..df58a51 100644 --- a/modules/dnn/src/opencl/activations.cl +++ b/modules/dnn/src/opencl/activations.cl @@ -54,6 +54,15 @@ __kernel void ReLUForward(const int count, __global const T* in, __global T* out #endif } +__kernel void PReLUForward(const int count, const int channels, const int plane_size, + __global const T* in, __global T* out, __global const T* slope_data) +{ + int index = get_global_id(0); + int c = (index / plane_size) % channels; + if(index < count) + out[index] = in[index] > 0 ? in[index] : in[index] * slope_data[c]; +} + __kernel void TanHForward(const int count, __global T* in, __global T* out) { int index = get_global_id(0); if(index < count) diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 5bf77ee..94c8774 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -331,6 +331,12 @@ TEST(Layer_Test_PReLU, Accuracy) testLayerUsingCaffeModels("layer_prelu_fc", DNN_TARGET_CPU, true, false); } +OCL_TEST(Layer_Test_PReLU, Accuracy) +{ + testLayerUsingCaffeModels("layer_prelu", DNN_TARGET_OPENCL, true); + testLayerUsingCaffeModels("layer_prelu_fc", DNN_TARGET_OPENCL, true, false); +} + //template //static void test_Layer_Concat() //{ -- 2.7.4