canny
authorIlya Lavrenov <ilya.lavrenov@itseez.com>
Wed, 24 Sep 2014 16:07:15 +0000 (16:07 +0000)
committerIlya Lavrenov <ilya.lavrenov@itseez.com>
Fri, 26 Sep 2014 09:41:45 +0000 (09:41 +0000)
modules/imgproc/src/canny.cpp

index fa751c9..bfb37dc 100644 (file)
@@ -361,6 +361,13 @@ void cv::Canny( InputArray _src, OutputArray _dst,
                         _mm_storeu_si128((__m128i *)(_norm + j + 4), v_norm);
                     }
                 }
+#elif CV_NEON
+                for ( ; j < width - 8; j += 8)
+                {
+                    int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j);
+                    vst1q_s32(_norm + j, vaddq_s32(vmovl_s16(vget_low_s16(v_dx)), vmovl_s16(vget_low_s16(v_dy))));
+                    vst1q_s32(_norm + j + 4, vaddq_s32(vmovl_s16(vget_high_s16(v_dx)), vmovl_s16(vget_high_s16(v_dy))));
+                }
 #endif
                 for ( ; j < width; ++j)
                     _norm[j] = std::abs(int(_dx[j])) + std::abs(int(_dy[j]));
@@ -386,6 +393,18 @@ void cv::Canny( InputArray _src, OutputArray _dst,
                         _mm_storeu_si128((__m128i *)(_norm + j + 4), v_norm);
                     }
                 }
+#elif CV_NEON
+                for ( ; j < width - 8; j += 8)
+                {
+                    int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j);
+                    int32x4_t v_dxp = vmovl_s16(vget_low_s16(v_dx)), v_dyp = vmovl_s16(vget_low_s16(v_dy));
+                    int32x4_t v_dst = vaddq_s32(vmulq_s32(v_dxp, v_dxp), vmulq_s32(v_dyp, v_dyp));
+                    vst1q_s32(_norm + j, v_dst);
+
+                    v_dxp = vmovl_s16(vget_high_s16(v_dx)), v_dyp = vmovl_s16(vget_high_s16(v_dy));
+                    v_dst = vaddq_s32(vmulq_s32(v_dxp, v_dxp), vmulq_s32(v_dyp, v_dyp));
+                    vst1q_s32(_norm + j, v_dst);
+                }
 #endif
                 for ( ; j < width; ++j)
                     _norm[j] = int(_dx[j])*_dx[j] + int(_dy[j])*_dy[j];