cv::threshold
authorIlya Lavrenov <ilya.lavrenov@itseez.com>
Fri, 26 Sep 2014 10:55:40 +0000 (10:55 +0000)
committerIlya Lavrenov <ilya.lavrenov@itseez.com>
Fri, 26 Sep 2014 13:16:48 +0000 (13:16 +0000)
modules/imgproc/src/thresh.cpp

index 09a1f6e..721a596 100644 (file)
@@ -264,6 +264,74 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
             }
         }
     }
+#elif CV_NEON
+    uint8x16_t v_thresh = vdupq_n_u8(thresh), v_maxval = vdupq_n_u8(maxval);
+
+    switch( type )
+    {
+    case THRESH_BINARY:
+        for( i = 0; i < roi.height; i++ )
+        {
+            const uchar* src = _src.ptr() + src_step*i;
+            uchar* dst = _dst.ptr() + dst_step*i;
+
+            for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16)
+                vst1q_u8(dst + j_scalar, vandq_u8(vcgtq_u8(vld1q_u8(src + j_scalar), v_thresh), v_maxval));
+        }
+        break;
+
+    case THRESH_BINARY_INV:
+        for( i = 0; i < roi.height; i++ )
+        {
+            const uchar* src = _src.ptr() + src_step*i;
+            uchar* dst = _dst.ptr() + dst_step*i;
+
+            for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16)
+                vst1q_u8(dst + j_scalar, vandq_u8(vcleq_u8(vld1q_u8(src + j_scalar), v_thresh), v_maxval));
+        }
+        break;
+
+    case THRESH_TRUNC:
+        for( i = 0; i < roi.height; i++ )
+        {
+            const uchar* src = _src.ptr() + src_step*i;
+            uchar* dst = _dst.ptr() + dst_step*i;
+
+            for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16)
+                vst1q_u8(dst + j_scalar, vminq_u8(vld1q_u8(src + j_scalar), v_thresh));
+        }
+        break;
+
+    case THRESH_TOZERO:
+        for( i = 0; i < roi.height; i++ )
+        {
+            const uchar* src = _src.ptr() + src_step*i;
+            uchar* dst = _dst.ptr() + dst_step*i;
+
+            for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16)
+            {
+                uint8x16_t v_src = vld1q_u8(src + j_scalar), v_mask = vcgtq_u8(v_src, v_thresh);
+                vst1q_u8(dst + j_scalar, vandq_u8(v_mask, v_src));
+            }
+        }
+        break;
+
+    case THRESH_TOZERO_INV:
+        for( i = 0; i < roi.height; i++ )
+        {
+            const uchar* src = _src.ptr() + src_step*i;
+            uchar* dst = _dst.ptr() + dst_step*i;
+
+            for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16)
+            {
+                uint8x16_t v_src = vld1q_u8(src + j_scalar), v_mask = vcleq_u8(v_src, v_thresh);
+                vst1q_u8(dst + j_scalar, vandq_u8(v_mask, v_src));
+            }
+        }
+        break;
+    default:
+        return CV_Error( CV_StsBadArg, "" );
+    }
 #endif
 
     if( j_scalar < roi.width )
@@ -382,6 +450,14 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
                     _mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
                 }
             }
+        #elif CV_NEON
+            int16x8_t v_thresh = vdupq_n_s16(thresh), v_maxval = vdupq_n_s16(maxval);
+
+            for( ; j <= roi.width - 8; j += 8 )
+            {
+                uint16x8_t v_mask = vcgtq_s16(vld1q_s16(src + j), v_thresh);
+                vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_maxval));
+            }
         #endif
 
             for( ; j < roi.width; j++ )
@@ -410,6 +486,14 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
                     _mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
                 }
             }
+        #elif CV_NEON
+            int16x8_t v_thresh = vdupq_n_s16(thresh), v_maxval = vdupq_n_s16(maxval);
+
+            for( ; j <= roi.width - 8; j += 8 )
+            {
+                uint16x8_t v_mask = vcleq_s16(vld1q_s16(src + j), v_thresh);
+                vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_maxval));
+            }
         #endif
 
             for( ; j < roi.width; j++ )
@@ -436,6 +520,11 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
                     _mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
                 }
             }
+        #elif CV_NEON
+            int16x8_t v_thresh = vdupq_n_s16(thresh);
+
+            for( ; j <= roi.width - 8; j += 8 )
+                vst1q_s16(dst + j, vminq_s16(vld1q_s16(src + j), v_thresh));
         #endif
 
             for( ; j < roi.width; j++ )
@@ -462,6 +551,15 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
                     _mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
                 }
             }
+        #elif CV_NEON
+            int16x8_t v_thresh = vdupq_n_s16(thresh);
+
+            for( ; j <= roi.width - 8; j += 8 )
+            {
+                int16x8_t v_src = vld1q_s16(src + j);
+                uint16x8_t v_mask = vcgtq_s16(v_src, v_thresh);
+                vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_src));
+            }
         #endif
 
             for( ; j < roi.width; j++ )
@@ -491,6 +589,15 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
                     _mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
                 }
             }
+        #elif CV_NEON
+            int16x8_t v_thresh = vdupq_n_s16(thresh);
+
+            for( ; j <= roi.width - 8; j += 8 )
+            {
+                int16x8_t v_src = vld1q_s16(src + j);
+                uint16x8_t v_mask = vcleq_s16(v_src, v_thresh);
+                vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_src));
+            }
         #endif
             for( ; j < roi.width; j++ )
             {
@@ -576,6 +683,16 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
                         _mm_storeu_ps( dst + j + 4, v1 );
                     }
                 }
+#elif CV_NEON
+                float32x4_t v_thresh = vdupq_n_f32(thresh);
+                uint32x4_t v_maxval = vreinterpretq_u32_f32(vdupq_n_f32(maxval));
+
+                for( ; j <= roi.width - 4; j += 4 )
+                {
+                    float32x4_t v_src = vld1q_f32(src + j);
+                    uint32x4_t v_dst = vandq_u32(vcgtq_f32(v_src, v_thresh), v_maxval);
+                    vst1q_f32(dst + j, vreinterpretq_f32_u32(v_dst));
+                }
 #endif
 
                 for( ; j < roi.width; j++ )
@@ -604,6 +721,16 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
                         _mm_storeu_ps( dst + j + 4, v1 );
                     }
                 }
+#elif CV_NEON
+                float32x4_t v_thresh = vdupq_n_f32(thresh);
+                uint32x4_t v_maxval = vreinterpretq_u32_f32(vdupq_n_f32(maxval));
+
+                for( ; j <= roi.width - 4; j += 4 )
+                {
+                    float32x4_t v_src = vld1q_f32(src + j);
+                    uint32x4_t v_dst = vandq_u32(vcleq_f32(v_src, v_thresh), v_maxval);
+                    vst1q_f32(dst + j, vreinterpretq_f32_u32(v_dst));
+                }
 #endif
 
                 for( ; j < roi.width; j++ )
@@ -630,6 +757,11 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
                         _mm_storeu_ps( dst + j + 4, v1 );
                     }
                 }
+#elif CV_NEON
+                float32x4_t v_thresh = vdupq_n_f32(thresh);
+
+                for( ; j <= roi.width - 4; j += 4 )
+                    vst1q_f32(dst + j, vminq_f32(vld1q_f32(src + j), v_thresh));
 #endif
 
                 for( ; j < roi.width; j++ )
@@ -656,6 +788,16 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
                         _mm_storeu_ps( dst + j + 4, v1 );
                     }
                 }
+#elif CV_NEON
+                float32x4_t v_thresh = vdupq_n_f32(thresh);
+
+                for( ; j <= roi.width - 4; j += 4 )
+                {
+                    float32x4_t v_src = vld1q_f32(src + j);
+                    uint32x4_t v_dst = vandq_u32(vcgtq_f32(v_src, v_thresh),
+                                                 vreinterpretq_u32_f32(v_src));
+                    vst1q_f32(dst + j, vreinterpretq_f32_u32(v_dst));
+                }
 #endif
 
                 for( ; j < roi.width; j++ )
@@ -685,6 +827,16 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
                         _mm_storeu_ps( dst + j + 4, v1 );
                     }
                 }
+#elif CV_NEON
+                float32x4_t v_thresh = vdupq_n_f32(thresh);
+
+                for( ; j <= roi.width - 4; j += 4 )
+                {
+                    float32x4_t v_src = vld1q_f32(src + j);
+                    uint32x4_t v_dst = vandq_u32(vcleq_f32(v_src, v_thresh),
+                                                 vreinterpretq_u32_f32(v_src));
+                    vst1q_f32(dst + j, vreinterpretq_f32_u32(v_dst));
+                }
 #endif
                 for( ; j < roi.width; j++ )
                 {