From fa15769f39154ffdfbd5d5d8b82d376b37c9f5c2 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Mon, 4 Nov 2013 20:50:33 +0400
Subject: [PATCH] added CV_16SC2 && CV_16UC1 maps support to ocl::remap
 (nearest neighbour only)

---
 modules/ocl/src/imgproc.cpp                 | 20 +++++++++-----
 modules/ocl/src/opencl/imgproc_remap.cl     | 41 +++++++++++++++++++++++++++--
 modules/ocl/src/opencl/imgproc_threshold.cl |  4 +--
 modules/ocl/test/test_warp.cpp              |  1 +
 4 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp
index 3539dfa..193cb43 100644
--- a/modules/ocl/src/imgproc.cpp
+++ b/modules/ocl/src/imgproc.cpp
@@ -195,9 +195,14 @@ namespace cv
                 return;
             }
 
+            if (map1.empty())
+                map1.swap(map2);
+
             CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST
-                      || interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4);
-            CV_Assert((map1.type() == CV_16SC2 && !map2.data) || (map1.type() == CV_32FC2 && !map2.data) ||
+                      /*|| interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4*/);
+            CV_Assert((map1.type() == CV_16SC2 && (map2.empty() || (interpolation == INTER_NEAREST &&
+                                                                    (map2.type() == CV_16UC1 || map2.type() == CV_16SC1)) )) ||
+                      (map1.type() == CV_32FC2 && !map2.data) ||
                       (map1.type() == CV_32FC1 && map2.type() == CV_32FC1));
             CV_Assert(!map2.data || map2.size() == map1.size());
             CV_Assert(borderType == BORDER_CONSTANT || borderType == BORDER_REPLICATE || borderType == BORDER_WRAP
@@ -212,10 +217,14 @@ namespace cv
                                    "BORDER_REFLECT_101", "BORDER_TRANSPARENT" };
 
             string kernelName = "remap";
-            if ( map1.type() == CV_32FC2 && !map2.data )
+            if (map1.type() == CV_32FC2 && map2.empty())
                 kernelName += "_32FC2";
-            else if (map1.type() == CV_16SC2 && !map2.data)
+            else if (map1.type() == CV_16SC2)
+            {
                 kernelName += "_16SC2";
+                if (!map2.empty())
+                    kernelName += "_16UC1";
+            }
             else if (map1.type() == CV_32FC1 && map2.type() == CV_32FC1)
                 kernelName += "_2_32FC1";
             else
@@ -232,9 +241,6 @@ namespace cv
             if (interpolation != INTER_NEAREST)
             {
                 int wdepth = std::max(CV_32F, dst.depth());
-                if (!supportsDouble)
-                    wdepth = std::min(CV_32F, wdepth);
-
                 buildOptions += format(" -D WT=%s%s -D convertToT=convert_%s%s%s -D convertToWT=convert_%s%s"
                                        " -D convertToWT2=convert_%s2 -D WT2=%s2",
                                        typeMap[wdepth], channelMap[ocn],
diff --git a/modules/ocl/src/opencl/imgproc_remap.cl b/modules/ocl/src/opencl/imgproc_remap.cl
index 53c0539..b623091 100644
--- a/modules/ocl/src/opencl/imgproc_remap.cl
+++ b/modules/ocl/src/opencl/imgproc_remap.cl
@@ -51,6 +51,13 @@
 #endif
 #endif
 
+enum
+{
+    INTER_BITS = 5,
+    INTER_TAB_SIZE = 1 << INTER_BITS,
+    INTER_TAB_SIZE2 = INTER_TAB_SIZE * INTER_TAB_SIZE
+};
+
 #ifdef INTER_NEAREST
 #define convertToWT
 #endif
@@ -204,6 +211,36 @@ __kernel void remap_16SC2(__global const T * restrict src, __global T * dst, __g
     }
 }
 
+__kernel void remap_16SC2_16UC1(__global const T * restrict src, __global T * dst, __global short2 * map1, __global ushort * map2,
+        int src_offset, int dst_offset, int map1_offset, int map2_offset,
+        int src_step, int dst_step, int map1_step, int map2_step,
+        int src_cols, int src_rows, int dst_cols, int dst_rows, T scalar)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (x < dst_cols && y < dst_rows)
+    {
+        int dstIdx = mad24(y, dst_step, x + dst_offset);
+        int map1Idx = mad24(y, map1_step, x + map1_offset);
+        int map2Idx = mad24(y, map2_step, x + map2_offset);
+
+        int map2Value = convert_int(map2[map2Idx]) & (INTER_TAB_SIZE2 - 1);
+        int dx = (map2Value & (INTER_TAB_SIZE - 1)) < (INTER_TAB_SIZE >> 1) ? 1 : 0;
+        int dy = (map2Value >> INTER_BITS) < (INTER_TAB_SIZE >> 1) ? 1 : 0;
+        int2 gxy = convert_int2(map1[map1Idx]) + (int2)(dx, dy);
+        int gx = gxy.x, gy = gxy.y;
+
+        if (NEED_EXTRAPOLATION(gx, gy))
+            EXTRAPOLATE(gxy, dst[dstIdx])
+        else
+        {
+            int srcIdx = mad24(gy, src_step, gx + src_offset);
+            dst[dstIdx] = src[srcIdx];
+        }
+    }
+}
+
 #elif INTER_LINEAR
 
 __kernel void remap_2_32FC1(__global T const * restrict  src, __global T * dst,
@@ -229,7 +266,7 @@ __kernel void remap_2_32FC1(__global T const * restrict  src, __global T * dst,
         int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y +1);
 
         float2 _u = map_data - convert_float2(map_dataA);
-        WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)32)) / (WT2)32;
+        WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE;
         WT scalar = convertToWT(nVal);
         WT a = scalar, b = scalar, c = scalar, d = scalar;
 
@@ -282,7 +319,7 @@ __kernel void remap_32FC2(__global T const * restrict  src, __global T * dst,
         int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1);
 
         float2 _u = map_data - convert_float2(map_dataA);
-        WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)32)) / (WT2)32;
+        WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE;
         WT scalar = convertToWT(nVal);
         WT a = scalar, b = scalar, c = scalar, d = scalar;
 
diff --git a/modules/ocl/src/opencl/imgproc_threshold.cl b/modules/ocl/src/opencl/imgproc_threshold.cl
index 400ac80..6f97c04 100644
--- a/modules/ocl/src/opencl/imgproc_threshold.cl
+++ b/modules/ocl/src/opencl/imgproc_threshold.cl
@@ -93,8 +93,8 @@ __kernel void threshold(__global const T * restrict src, int src_offset, int src
 #endif
         else
         {
-            T array[VECSIZE];
-            VSTOREN(vecValue, 0, array);
+            __attribute__(( aligned(sizeof(VT)) )) T array[VECSIZE];
+            *((VT*)array) = vecValue;
             #pragma unroll
             for (int i = 0; i < VECSIZE; ++i)
                 if (gx + i < max_index)
diff --git a/modules/ocl/test/test_warp.cpp b/modules/ocl/test/test_warp.cpp
index 05554ce..b9231d1 100644
--- a/modules/ocl/test/test_warp.cpp
+++ b/modules/ocl/test/test_warp.cpp
@@ -355,6 +355,7 @@ INSTANTIATE_TEST_CASE_P(ImgprocWarp, Remap_INTER_NEAREST, Combine(
                             Values(1, 2, 3, 4),
                             Values(pair<MatType, MatType>((MatType)CV_32FC1, (MatType)CV_32FC1),
                                    pair<MatType, MatType>((MatType)CV_32FC2, noType),
+                                   pair<MatType, MatType>((MatType)CV_16SC2, (MatType)CV_16UC1),
                                    pair<MatType, MatType>((MatType)CV_16SC2, noType)),
                             Values((Border)BORDER_CONSTANT,
                                    (Border)BORDER_REPLICATE,
-- 
2.7.4