cv::resize 16uc1
authorIlya Lavrenov <ilya.lavrenov@itseez.com>
Thu, 25 Sep 2014 13:55:52 +0000 (13:55 +0000)
committerIlya Lavrenov <ilya.lavrenov@itseez.com>
Fri, 26 Sep 2014 09:41:46 +0000 (09:41 +0000)
modules/imgproc/src/imgwarp.cpp
modules/imgproc/test/test_imgwarp.cpp

index 01fc592..07835e6 100644 (file)
@@ -49,6 +49,8 @@
 #include "precomp.hpp"
 #include "opencl_kernels_imgproc.hpp"
 
+#include <iostream>
+
 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
 static IppStatus sts = ippInit();
 #endif
@@ -1384,7 +1386,55 @@ private:
     int cn, step;
 };
 
-typedef ResizeAreaFastNoVec<ushort, ushort> ResizeAreaFastVec_SIMD_16u;
+class ResizeAreaFastVec_SIMD_16u
+{
+public:
+    ResizeAreaFastVec_SIMD_16u(int _cn, int _step) :
+        cn(_cn), step(_step)
+    {
+    }
+
+    int operator() (const ushort * S, ushort * D, int w) const
+    {
+        int dx = 0;
+        const ushort * S0 = S, * S1 = (const ushort *)((const uchar *)(S0) + step);
+
+        uint32x4_t v_2 = vdupq_n_u32(2);
+
+        if (cn == 1)
+        {
+            for ( ; dx <= w - 8; dx += 8, S0 += 16, S1 += 16, D += 8)
+            {
+                uint16x8x2_t v_row0 = vld2q_u16(S0), v_row1 = vld2q_u16(S1);
+
+                uint32x4_t v_dst0 = vaddl_u16(vget_low_u16(v_row0.val[0]), vget_low_u16(v_row0.val[1]));
+                v_dst0 = vaddq_u32(v_dst0, vaddl_u16(vget_low_u16(v_row1.val[0]), vget_low_u16(v_row1.val[1])));
+                v_dst0 = vshrq_n_u32(vaddq_u32(v_dst0, v_2), 2);
+
+                uint32x4_t v_dst1 = vaddl_u16(vget_high_u16(v_row0.val[0]), vget_high_u16(v_row0.val[1]));
+                v_dst1 = vaddq_u32(v_dst1, vaddl_u16(vget_high_u16(v_row1.val[0]), vget_high_u16(v_row1.val[1])));
+                v_dst1 = vshrq_n_u32(vaddq_u32(v_dst1, v_2), 2);
+
+                vst1q_u16(D, vcombine_u16(vmovn_u32(v_dst0), vmovn_u32(v_dst1)));
+            }
+        }
+        else if (cn == 4)
+        {
+            for ( ; dx <= w - 4; dx += 4, S0 += 8, S1 += 8, D += 4)
+            {
+                uint16x8_t v_row0 = vld1q_u16(S0), v_row1 = vld1q_u16(S1);
+                uint32x4_t v_dst = vaddq_u32(vaddl_u16(vget_low_u16(v_row0), vget_high_u16(v_row0)),
+                                             vaddl_u16(vget_low_u16(v_row1), vget_high_u16(v_row1)));
+                vst1_u16(D, vmovn_u32(vshrq_n_u32(vaddq_u32(v_dst, v_2), 2)));
+            }
+        }
+
+        return dx;
+    }
+
+private:
+    int cn, step;
+};
 
 #elif CV_SSE2
 
index 91470cf..eb13d35 100644 (file)
@@ -1575,15 +1575,26 @@ void resizeArea(const cv::Mat & src, cv::Mat & dst)
 
 TEST(Resize, Area_half)
 {
-    int types[] = { CV_8UC1, CV_8UC4 };
+    const int size = 10;
+    int types[] = { CV_8UC1, CV_8UC4, CV_16UC1, CV_16UC4 };
 
-    for (int i = 0, size = sizeof(types) / sizeof(types[0]); i < size; ++i)
+    cv::RNG rng(17);
+
+    for (int i = 0, _size = sizeof(types) / sizeof(types[0]); i < _size; ++i)
     {
-        int type = types[i];
-        cv::Mat src(100, 100, type), dst_actual(50, 50, type), dst_reference(50, 50, type);
+        int type = types[i], depth = CV_MAT_DEPTH(type);
+
+        SCOPED_TRACE(depth);
+
+        cv::Mat src(size, size, type), dst_actual(size >> 1, size >> 1, type),
+            dst_reference(size >> 1, size >> 1, type);
+
+        rng.fill(src, cv::RNG::UNIFORM, 0, 1000, true);
 
-        if (CV_MAT_DEPTH(type) == CV_8U)
+        if (depth == CV_8U)
             resizeArea<uchar, ushort>(src, dst_reference);
+        else if (depth == CV_16U)
+            resizeArea<ushort, int>(src, dst_reference);
         else
             CV_Assert(0);