Updated findContours to use wide universal intrinsics

author Vitaly Tuzov <terfendail@mediana.jetos.com>

Mon, 19 Nov 2018 15:53:12 +0000 (18:53 +0300)

committer Vitaly Tuzov <terfendail@mediana.jetos.com>

Wed, 21 Nov 2018 16:57:02 +0000 (19:57 +0300)
author Vitaly Tuzov <terfendail@mediana.jetos.com>
Mon, 19 Nov 2018 15:53:12 +0000 (18:53 +0300)
committer Vitaly Tuzov <terfendail@mediana.jetos.com>
Wed, 21 Nov 2018 16:57:02 +0000 (19:57 +0300)
diff --git a/modules/imgproc/src/contours.cpp b/modules/imgproc/src/contours.cpp

index f4d0be5..b952296 100644 (file)
--- a/modules/imgproc/src/contours.cpp
+++ b/modules/imgproc/src/contours.cpp
@@ -41,6 +41,8 @@
  #include "precomp.hpp"
  #include "opencv2/core/hal/intrin.hpp"
  
+using namespace cv;
+
  /* initializes 8-element array for fast access to 3x3 neighborhood of a pixel */
  #define  CV_INIT_3X3_DELTAS( deltas, step, nch )            \
      ((deltas)[0] =  (nch),  (deltas)[1] = -(step) + (nch),  \
@@ -1006,10 +1008,6 @@ cvFindNextContour( CvContourScanner scanner )
      if( !scanner )
          CV_Error( CV_StsNullPtr, "" );
  
-#if CV_SSE2
-    bool haveSIMD = cv::checkHardwareSupport(CPU_SSE2);
-#endif
-
      CV_Assert(scanner->img_step >= 0);
  
      icvEndProcessContour( scanner );
@@ -1056,48 +1054,22 @@ cvFindNextContour( CvContourScanner scanner )
              }
              else
              {
-#if CV_SSE2
-                if ((p = img[x]) != prev) {
+#if CV_SIMD
+                if ((p = img[x]) != prev)
+                {
                      goto _next_contour;
-                } else if (haveSIMD) {
-
-                    __m128i v_prev = _mm_set1_epi8((char)prev);
-                    int v_size = width - 32;
-
-                    for (; x <= v_size; x += 32) {
-                        __m128i v_p1 = _mm_loadu_si128((const __m128i*)(img + x));
-                        __m128i v_p2 = _mm_loadu_si128((const __m128i*)(img + x + 16));
-
-                        __m128i v_cmp1 = _mm_cmpeq_epi8(v_p1, v_prev);
-                        __m128i v_cmp2 = _mm_cmpeq_epi8(v_p2, v_prev);
-
-                        unsigned int mask1 = _mm_movemask_epi8(v_cmp1);
-                        unsigned int mask2 = _mm_movemask_epi8(v_cmp2);
-
-                        mask1 ^= 0x0000ffff;
-                        mask2 ^= 0x0000ffff;
-
-                        if (mask1) {
-                            p = img[(x += cv::trailingZeros32(mask1))];
-                            goto _next_contour;
-                        }
-
-                        if (mask2) {
-                            p = img[(x += cv::trailingZeros32(mask2 << 16))];
-                            goto _next_contour;
-                        }
-                    }
-
-                    if(x <= width - 16) {
-                        __m128i v_p = _mm_loadu_si128((__m128i*)(img + x));
-
-                        unsigned int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(v_p, v_prev)) ^ 0x0000ffff;
-
-                        if (mask) {
+                }
+                else
+                {
+                    v_uint8 v_prev = vx_setall_u8((uchar)prev);
+                    for (; x <= width - v_uint8::nlanes; x += v_uint8::nlanes)
+                    {
+                        unsigned int mask = (unsigned int)v_signmask(vx_load((uchar*)(img + x)) != v_prev);
+                        if (mask)
+                        {
                              p = img[(x += cv::trailingZeros32(mask))];
                              goto _next_contour;
                          }
-                        x += 16;
                      }
                  }
  #endif
@@ -1107,7 +1079,7 @@ cvFindNextContour( CvContourScanner scanner )
  
              if( x >= width )
                  break;
-#if CV_SSE2
+#if CV_SIMD
          _next_contour:
  #endif
              {
@@ -1353,99 +1325,45 @@ typedef  struct CvLinkedRunPoint
  }
  CvLinkedRunPoint;
  
-inline int findStartContourPoint(uchar *src_data, CvSize img_size, int j, bool haveSIMD) {
-#if CV_SSE2
-    if (haveSIMD) {
-        __m128i v_zero = _mm_setzero_si128();
-        int v_size = img_size.width - 32;
-
-        for (; j <= v_size; j += 32) {
-            __m128i v_p1 = _mm_loadu_si128((const __m128i*)(src_data + j));
-            __m128i v_p2 = _mm_loadu_si128((const __m128i*)(src_data + j + 16));
-
-            __m128i v_cmp1 = _mm_cmpeq_epi8(v_p1, v_zero);
-            __m128i v_cmp2 = _mm_cmpeq_epi8(v_p2, v_zero);
-
-            unsigned int mask1 = _mm_movemask_epi8(v_cmp1);
-            unsigned int mask2 = _mm_movemask_epi8(v_cmp2);
-
-            mask1 ^= 0x0000ffff;
-            mask2 ^= 0x0000ffff;
-
-            if (mask1) {
-                j += cv::trailingZeros32(mask1);
-                return j;
-            }
-
-            if (mask2) {
-                j += cv::trailingZeros32(mask2 << 16);
-                return j;
-            }
-        }
-
-        if (j <= img_size.width - 16) {
-            __m128i v_p = _mm_loadu_si128((const __m128i*)(src_data + j));
-
-            unsigned int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(v_p, v_zero)) ^ 0x0000ffff;
-
-            if (mask) {
-                j += cv::trailingZeros32(mask);
-                return j;
-            }
-            j += 16;
+inline int findStartContourPoint(uchar *src_data, CvSize img_size, int j)
+{
+#if CV_SIMD
+    v_uint8 v_zero = vx_setzero_u8();
+    for (; j <= img_size.width - v_uint8::nlanes; j += v_uint8::nlanes)
+    {
+        unsigned int mask = (unsigned int)v_signmask(vx_load((uchar*)(src_data + j)) != v_zero);
+        if (mask)
+        {
+            j += cv::trailingZeros32(mask);
+            return j;
          }
      }
-#else
-    CV_UNUSED(haveSIMD);
  #endif
      for (; j < img_size.width && !src_data[j]; ++j)
          ;
      return j;
  }
  
-inline int findEndContourPoint(uchar *src_data, CvSize img_size, int j, bool haveSIMD) {
-#if CV_SSE2
-    if (j < img_size.width && !src_data[j]) {
+inline int findEndContourPoint(uchar *src_data, CvSize img_size, int j)
+{
+#if CV_SIMD
+    if (j < img_size.width && !src_data[j])
+    {
          return j;
-    } else if (haveSIMD) {
-        __m128i v_zero = _mm_setzero_si128();
-        int v_size = img_size.width - 32;
-
-        for (; j <= v_size; j += 32) {
-            __m128i v_p1 = _mm_loadu_si128((const __m128i*)(src_data + j));
-            __m128i v_p2 = _mm_loadu_si128((const __m128i*)(src_data + j + 16));
-
-            __m128i v_cmp1 = _mm_cmpeq_epi8(v_p1, v_zero);
-            __m128i v_cmp2 = _mm_cmpeq_epi8(v_p2, v_zero);
-
-            unsigned int mask1 = _mm_movemask_epi8(v_cmp1);
-            unsigned int mask2 = _mm_movemask_epi8(v_cmp2);
-
-            if (mask1) {
-                j += cv::trailingZeros32(mask1);
-                return j;
-            }
-
-            if (mask2) {
-                j += cv::trailingZeros32(mask2 << 16);
-                return j;
-            }
-        }
-
-        if (j <= img_size.width - 16) {
-            __m128i v_p = _mm_loadu_si128((const __m128i*)(src_data + j));
-
-            unsigned int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(v_p, v_zero));
-
-            if (mask) {
+    }
+    else
+    {
+        v_uint8 v_zero = vx_setzero_u8();
+        for (; j <= img_size.width - v_uint8::nlanes; j += v_uint8::nlanes)
+        {
+            unsigned int mask = (unsigned int)v_signmask(vx_load((uchar*)(src_data + j)) == v_zero);
+            if (mask)
+            {
                  j += cv::trailingZeros32(mask);
                  return j;
              }
-            j += 16;
          }
      }
-#else
-    CV_UNUSED(haveSIMD);
  #endif
      for (; j < img_size.width && src_data[j]; ++j)
          ;
@@ -1475,7 +1393,6 @@ icvFindContoursInInterval( const CvArr* src,
      int  lower_total;
      int  upper_total;
      int  all_total;
-    bool haveSIMD = false;
  
      CvSeq*  runs;
      CvLinkedRunPoint  tmp;
@@ -1505,9 +1422,7 @@ icvFindContoursInInterval( const CvArr* src,
  
      if( contourHeaderSize < (int)sizeof(CvContour))
          CV_Error( CV_StsBadSize, "Contour header size must be >= sizeof(CvContour)" );
-#if CV_SSE2
-    haveSIMD = cv::checkHardwareSupport(CPU_SSE2);
-#endif
+
      storage00.reset(cvCreateChildMemStorage(storage));
      storage01.reset(cvCreateChildMemStorage(storage));
  
@@ -1539,7 +1454,7 @@ icvFindContoursInInterval( const CvArr* src,
      tmp_prev = upper_line;
      for( j = 0; j < img_size.width; )
      {
-        j = findStartContourPoint(src_data, cvSize(img_size), j, haveSIMD);
+        j = findStartContourPoint(src_data, cvSize(img_size), j);
  
          if( j == img_size.width )
              break;
@@ -1549,7 +1464,7 @@ icvFindContoursInInterval( const CvArr* src,
          tmp_prev->next = (CvLinkedRunPoint*)CV_GET_WRITTEN_ELEM( writer );
          tmp_prev = tmp_prev->next;
  
-        j = findEndContourPoint(src_data, cvSize(img_size), j + 1, haveSIMD);
+        j = findEndContourPoint(src_data, cvSize(img_size), j + 1);
  
          tmp.pt.x = j - 1;
          CV_WRITE_SEQ_ELEM( tmp, writer );
@@ -1573,7 +1488,7 @@ icvFindContoursInInterval( const CvArr* src,
          all_total = runs->total;
          for( j = 0; j < img_size.width; )
          {
-            j = findStartContourPoint(src_data, cvSize(img_size), j, haveSIMD);
+            j = findStartContourPoint(src_data, cvSize(img_size), j);
  
              if( j == img_size.width ) break;
  
@@ -1582,7 +1497,7 @@ icvFindContoursInInterval( const CvArr* src,
              tmp_prev->next = (CvLinkedRunPoint*)CV_GET_WRITTEN_ELEM( writer );
              tmp_prev = tmp_prev->next;
  
-            j = findEndContourPoint(src_data, cvSize(img_size), j + 1, haveSIMD);
+            j = findEndContourPoint(src_data, cvSize(img_size), j + 1);
  
              tmp.pt.x = j - 1;
              CV_WRITE_SEQ_ELEM( tmp, writer );
author	Vitaly Tuzov <terfendail@mediana.jetos.com>
	Mon, 19 Nov 2018 15:53:12 +0000 (18:53 +0300)
committer	Vitaly Tuzov <terfendail@mediana.jetos.com>
	Wed, 21 Nov 2018 16:57:02 +0000 (19:57 +0300)