Add v_expand for AArch64, fuse vmovl+vget_high into vmovl_high

author Jonathan Deakin <jonathan.deakin@arm.com>

Tue, 23 Mar 2021 15:06:41 +0000 (15:06 +0000)

committer Jonathan Deakin <jonathan.deakin@arm.com>

Tue, 23 Mar 2021 15:06:41 +0000 (15:06 +0000)
author Jonathan Deakin <jonathan.deakin@arm.com>
Tue, 23 Mar 2021 15:06:41 +0000 (15:06 +0000)
committer Jonathan Deakin <jonathan.deakin@arm.com>
Tue, 23 Mar 2021 15:06:41 +0000 (15:06 +0000)
diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp

index 06e70b0c303c54aba3e015e7c97d4c5919522882..785648575a60477987b19d9720c7a9b387be8c0d 100644 (file)
--- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp
@@ -1539,6 +1539,26 @@ OPENCV_HAL_IMPL_NEON_SELECT(v_float32x4, f32, u32)
  OPENCV_HAL_IMPL_NEON_SELECT(v_float64x2, f64, u64)
  #endif
  
+#if CV_NEON_AARCH64
+#define OPENCV_HAL_IMPL_NEON_EXPAND(_Tpvec, _Tpwvec, _Tp, suffix) \
+inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
+{ \
+    b0.val = vmovl_##suffix(vget_low_##suffix(a.val)); \
+    b1.val = vmovl_high_##suffix(a.val); \
+} \
+inline _Tpwvec v_expand_low(const _Tpvec& a) \
+{ \
+    return _Tpwvec(vmovl_##suffix(vget_low_##suffix(a.val))); \
+} \
+inline _Tpwvec v_expand_high(const _Tpvec& a) \
+{ \
+    return _Tpwvec(vmovl_high_##suffix(a.val)); \
+} \
+inline _Tpwvec v_load_expand(const _Tp* ptr) \
+{ \
+    return _Tpwvec(vmovl_##suffix(vld1_##suffix(ptr))); \
+}
+#else
  #define OPENCV_HAL_IMPL_NEON_EXPAND(_Tpvec, _Tpwvec, _Tp, suffix) \
  inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
  { \
@@ -1557,6 +1577,7 @@ inline _Tpwvec v_load_expand(const _Tp* ptr) \
  { \
      return _Tpwvec(vmovl_##suffix(vld1_##suffix(ptr))); \
  }
+#endif
  
  OPENCV_HAL_IMPL_NEON_EXPAND(v_uint8x16, v_uint16x8, uchar, u8)
  OPENCV_HAL_IMPL_NEON_EXPAND(v_int8x16, v_int16x8, schar, s8)
author	Jonathan Deakin <jonathan.deakin@arm.com>
	Tue, 23 Mar 2021 15:06:41 +0000 (15:06 +0000)
committer	Jonathan Deakin <jonathan.deakin@arm.com>
	Tue, 23 Mar 2021 15:06:41 +0000 (15:06 +0000)