From e3b42bf93b03edc92f4c610c24f0181930e53b18 Mon Sep 17 00:00:00 2001 From: Li Peng Date: Thu, 4 Jan 2018 23:14:28 +0800 Subject: [PATCH] batch_norm and blank layer ocl implementation Signed-off-by: Li Peng --- modules/dnn/src/layers/batch_norm_layer.cpp | 65 +++++++++++++++++++++++++++++ modules/dnn/src/layers/blank_layer.cpp | 16 ++++++- modules/dnn/test/test_tf_importer.cpp | 7 ++++ modules/dnn/test/test_torch_importer.cpp | 10 +++++ 4 files changed, 97 insertions(+), 1 deletion(-) diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp index 8d9c639..5284e4d 100644 --- a/modules/dnn/src/layers/batch_norm_layer.cpp +++ b/modules/dnn/src/layers/batch_norm_layer.cpp @@ -22,6 +22,7 @@ class BatchNormLayerImpl : public BatchNormLayer { public: Mat weights_, bias_; + Mat weightMat, biasMat; BatchNormLayerImpl(const LayerParams& params) { @@ -96,17 +97,81 @@ public: return true; } + void finalize(const std::vector &inputs, std::vector &outputs) + { + if (inputs[0]->dims == 4) + { + int groups = inputs[0]->size[0]; + int channels = inputs[0]->size[1]; + int rows = inputs[0]->size[2]; + int cols = inputs[0]->size[3]; + MatShape s = shape(groups * channels, rows * cols); + weightMat = Mat(s[0], s[1], CV_32FC1); + biasMat = Mat(s[0], s[1], CV_32FC1); + for (int n = 0; n < s[0]; n++) + { + weightMat.row(n).setTo(weights_.at(n % channels)); + biasMat.row(n).setTo(bias_.at(n % channels)); + } + } + } + virtual bool supportBackend(int backendId) { return backendId == DNN_BACKEND_DEFAULT || backendId == DNN_BACKEND_HALIDE && haveHalide(); } +#ifdef HAVE_OPENCL + bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_) + { + std::vector inputs; + std::vector outputs; + + inputs_.getUMatVector(inputs); + outputs_.getUMatVector(outputs); + + CV_Assert(blobs.size() >= 2); + CV_Assert(inputs.size() == 1); + + UMat &inpBlob = inputs[0]; + CV_Assert(inpBlob.dims == 2 || inpBlob.dims == 4); + int groups = inpBlob.size[0]; + int channels = inpBlob.size[1]; + int rows = inpBlob.dims > 2 ? inpBlob.size[2] : 1; + int cols = inpBlob.dims > 2 ? inpBlob.size[3] : 1; + + for (size_t ii = 0; ii < outputs.size(); ii++) + { + if (inpBlob.dims == 2) + { + UMat& src = inputs[ii]; + UMat& dst = outputs[ii]; + multiply(src, weights_, dst); + add(dst, bias_, dst); + } + else + { + MatShape s = shape(groups * channels, rows * cols); + UMat src = inputs[ii].reshape(1, s.size(), &s[0]); + UMat dst = outputs[ii].reshape(1, s.size(), &s[0]); + multiply(src, weightMat, dst); + add(dst, biasMat, dst); + } + } + return true; + } +#endif + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && + OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + forward_ocl(inputs_arr, outputs_arr, internals_arr)) + Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); } diff --git a/modules/dnn/src/layers/blank_layer.cpp b/modules/dnn/src/layers/blank_layer.cpp index af2bfeb..c5568b0 100644 --- a/modules/dnn/src/layers/blank_layer.cpp +++ b/modules/dnn/src/layers/blank_layer.cpp @@ -63,8 +63,22 @@ public: } #ifdef HAVE_OPENCL - bool forward_ocl(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) + bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_) { + std::vector inputs; + std::vector outputs; + + inputs_.getUMatVector(inputs); + outputs_.getUMatVector(outputs); + + for (int i = 0, n = outputs.size(); i < n; ++i) + { + void *src_handle = inputs[i].handle(ACCESS_READ); + void *dst_handle = outputs[i].handle(ACCESS_WRITE); + if (src_handle != dst_handle) + inputs[i].copyTo(outputs[i]); + } + return true; } #endif diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 04b4c9a..bde5760 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -152,6 +152,13 @@ TEST(Test_TensorFlow, batch_norm) runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true); } +OCL_TEST(Test_TensorFlow, batch_norm) +{ + runTensorFlowNet("batch_norm", DNN_TARGET_OPENCL); + runTensorFlowNet("fused_batch_norm", DNN_TARGET_OPENCL); + runTensorFlowNet("batch_norm_text", DNN_TARGET_OPENCL, true); +} + TEST(Test_TensorFlow, pooling) { runTensorFlowNet("max_pool_even"); diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp index 8ae0ca2..f7471dd 100644 --- a/modules/dnn/test/test_torch_importer.cpp +++ b/modules/dnn/test/test_torch_importer.cpp @@ -170,6 +170,11 @@ TEST(Torch_Importer, run_batch_norm) runTorchNet("net_batch_norm", DNN_TARGET_CPU, "", false, true); } +OCL_TEST(Torch_Importer, run_batch_norm) +{ + runTorchNet("net_batch_norm", DNN_TARGET_OPENCL, "", false, true); +} + TEST(Torch_Importer, net_prelu) { runTorchNet("net_prelu"); @@ -242,6 +247,11 @@ TEST(Torch_Importer, net_non_spatial) runTorchNet("net_non_spatial", DNN_TARGET_CPU, "", false, true); } +OCL_TEST(Torch_Importer, net_non_spatial) +{ + runTorchNet("net_non_spatial", DNN_TARGET_OPENCL, "", false, true); +} + TEST(Torch_Importer, ENet_accuracy) { Net net; -- 2.7.4