reorg layer ocl implementation
authorLi Peng <peng.li@intel.com>
Wed, 29 Nov 2017 13:09:31 +0000 (21:09 +0800)
committerLi Peng <peng.li@intel.com>
Wed, 6 Dec 2017 18:26:46 +0000 (02:26 +0800)
Signed-off-by: Li Peng <peng.li@intel.com>
modules/dnn/src/layers/reorg_layer.cpp
modules/dnn/src/opencl/reorg.cl [new file with mode: 0644]

index 78c806f..51da9fd 100644 (file)
@@ -44,6 +44,7 @@
 #include <opencv2/dnn/shape_utils.hpp>
 #include <opencv2/dnn/all_layers.hpp>
 #include <iostream>
+#include "opencl_kernels_dnn.hpp"
 
 namespace cv
 {
@@ -86,11 +87,54 @@ public:
         return backendId == DNN_BACKEND_DEFAULT;
     }
 
+#ifdef HAVE_OPENCL
+    bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
+    {
+        std::vector<UMat> inputs;
+        std::vector<UMat> outputs;
+
+        inps.getUMatVector(inputs);
+        outs.getUMatVector(outputs);
+        String buildopt = String("-DDtype=") + ocl::typeToStr(inputs[0].type()) + String(" ");
+
+        for (size_t i = 0; i < inputs.size(); i++)
+        {
+            ocl::Kernel kernel("reorg", ocl::dnn::reorg_oclsrc, buildopt);
+            if (kernel.empty())
+                return false;
+
+            UMat& srcBlob = inputs[i];
+            UMat& dstBlob = outputs[0];
+            int channels = srcBlob.size[1];
+            int height = srcBlob.size[2];
+            int width = srcBlob.size[3];
+            size_t nthreads = channels * height * width;
+
+            kernel.set(0, (int)nthreads);
+            kernel.set(1, ocl::KernelArg::PtrReadOnly(srcBlob));
+            kernel.set(2, (int)channels);
+            kernel.set(3, (int)height);
+            kernel.set(4, (int)width);
+            kernel.set(5, (int)reorgStride);
+            kernel.set(6, ocl::KernelArg::PtrWriteOnly(dstBlob));
+
+            if (!kernel.run(1, &nthreads, NULL, false))
+                return false;
+        }
+
+        return true;
+    }
+#endif
+
     void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
     {
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
+        CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
+                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+                   forward_ocl(inputs_arr, outputs_arr, internals_arr))
+
         Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
     }
 
diff --git a/modules/dnn/src/opencl/reorg.cl b/modules/dnn/src/opencl/reorg.cl
new file mode 100644 (file)
index 0000000..a4b9cae
--- /dev/null
@@ -0,0 +1,63 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+__kernel void reorg(const int count,
+                    __global const Dtype* src,
+                    const int channels,
+                    const int height,
+                    const int width,
+                    const int reorgStride,
+                    __global Dtype* dst)
+{
+    for (int index = get_global_id(0); index < count; index += get_global_size(0))
+    {
+        int k = index / (height * width);
+        int j = (index - (k * height * width)) / width;
+        int i = (index - (k * height * width)) % width;
+        int out_c = channels / (reorgStride*reorgStride);
+        int c2 = k % out_c;
+        int offset = k / out_c;
+        int w2 = i*reorgStride + offset % reorgStride;
+        int h2 = j*reorgStride + offset / reorgStride;
+        int in_index = w2 + width*reorgStride*(h2 + height*reorgStride*c2);
+        dst[index] = src[in_index];
+    }
+}