From 1dfcc3b541c52174e0d7d7f30e7e092d02000a7f Mon Sep 17 00:00:00 2001 From: Srinath Parvathaneni Date: Fri, 20 Mar 2020 16:56:23 +0000 Subject: [PATCH] [ARM][GCC][11x]: MVE ACLE vector interleaving store and deinterleaving load intrinsics and also aliases to vstr and vldr intrinsics. This patch supports following MVE ACLE intrinsics which are aliases of vstr and vldr intrinsics. vst1q_p_u8, vst1q_p_s8, vld1q_z_u8, vld1q_z_s8, vst1q_p_u16, vst1q_p_s16, vld1q_z_u16, vld1q_z_s16, vst1q_p_u32, vst1q_p_s32, vld1q_z_u32, vld1q_z_s32, vld1q_z_f16, vst1q_p_f16, vld1q_z_f32, vst1q_p_f32. This patch also supports following MVE ACLE vector deinterleaving loads and vector interleaving stores. vst2q_s8, vst2q_u8, vld2q_s8, vld2q_u8, vld4q_s8, vld4q_u8, vst2q_s16, vst2q_u16, vld2q_s16, vld2q_u16, vld4q_s16, vld4q_u16, vst2q_s32, vst2q_u32, vld2q_s32, vld2q_u32, vld4q_s32, vld4q_u32, vld4q_f16, vld2q_f16, vst2q_f16, vld4q_f32, vld2q_f32, vst2q_f32. Please refer to M-profile Vector Extension (MVE) intrinsics [1] for more details. [1] https://developer.arm.com/architectures/instruction-sets/simd-isas/helium/mve-intrinsics 2020-03-20 Srinath Parvathaneni Andre Vieira Mihail Ionescu * config/arm/arm_mve.h (vst1q_p_u8): Define macro. (vst1q_p_s8): Likewise. (vst2q_s8): Likewise. (vst2q_u8): Likewise. (vld1q_z_u8): Likewise. (vld1q_z_s8): Likewise. (vld2q_s8): Likewise. (vld2q_u8): Likewise. (vld4q_s8): Likewise. (vld4q_u8): Likewise. (vst1q_p_u16): Likewise. (vst1q_p_s16): Likewise. (vst2q_s16): Likewise. (vst2q_u16): Likewise. (vld1q_z_u16): Likewise. (vld1q_z_s16): Likewise. (vld2q_s16): Likewise. (vld2q_u16): Likewise. (vld4q_s16): Likewise. (vld4q_u16): Likewise. (vst1q_p_u32): Likewise. (vst1q_p_s32): Likewise. (vst2q_s32): Likewise. (vst2q_u32): Likewise. (vld1q_z_u32): Likewise. (vld1q_z_s32): Likewise. (vld2q_s32): Likewise. (vld2q_u32): Likewise. (vld4q_s32): Likewise. (vld4q_u32): Likewise. (vld4q_f16): Likewise. (vld2q_f16): Likewise. (vld1q_z_f16): Likewise. (vst2q_f16): Likewise. (vst1q_p_f16): Likewise. (vld4q_f32): Likewise. (vld2q_f32): Likewise. (vld1q_z_f32): Likewise. (vst2q_f32): Likewise. (vst1q_p_f32): Likewise. (__arm_vst1q_p_u8): Define intrinsic. (__arm_vst1q_p_s8): Likewise. (__arm_vst2q_s8): Likewise. (__arm_vst2q_u8): Likewise. (__arm_vld1q_z_u8): Likewise. (__arm_vld1q_z_s8): Likewise. (__arm_vld2q_s8): Likewise. (__arm_vld2q_u8): Likewise. (__arm_vld4q_s8): Likewise. (__arm_vld4q_u8): Likewise. (__arm_vst1q_p_u16): Likewise. (__arm_vst1q_p_s16): Likewise. (__arm_vst2q_s16): Likewise. (__arm_vst2q_u16): Likewise. (__arm_vld1q_z_u16): Likewise. (__arm_vld1q_z_s16): Likewise. (__arm_vld2q_s16): Likewise. (__arm_vld2q_u16): Likewise. (__arm_vld4q_s16): Likewise. (__arm_vld4q_u16): Likewise. (__arm_vst1q_p_u32): Likewise. (__arm_vst1q_p_s32): Likewise. (__arm_vst2q_s32): Likewise. (__arm_vst2q_u32): Likewise. (__arm_vld1q_z_u32): Likewise. (__arm_vld1q_z_s32): Likewise. (__arm_vld2q_s32): Likewise. (__arm_vld2q_u32): Likewise. (__arm_vld4q_s32): Likewise. (__arm_vld4q_u32): Likewise. (__arm_vld4q_f16): Likewise. (__arm_vld2q_f16): Likewise. (__arm_vld1q_z_f16): Likewise. (__arm_vst2q_f16): Likewise. (__arm_vst1q_p_f16): Likewise. (__arm_vld4q_f32): Likewise. (__arm_vld2q_f32): Likewise. (__arm_vld1q_z_f32): Likewise. (__arm_vst2q_f32): Likewise. (__arm_vst1q_p_f32): Likewise. (vld1q_z): Define polymorphic variant. (vld2q): Likewise. (vld4q): Likewise. (vst1q_p): Likewise. (vst2q): Likewise. * config/arm/arm_mve_builtins.def (STORE1): Use builtin qualifier. (LOAD1): Likewise. * config/arm/mve.md (mve_vst2q): Define RTL pattern. (mve_vld2q): Likewise. (mve_vld4q): Likewise. gcc/testsuite/ChangeLog: 2020-03-20 Srinath Parvathaneni Andre Vieira Mihail Ionescu * gcc.target/arm/mve/intrinsics/vld1q_z_f16.c: New test. * gcc.target/arm/mve/intrinsics/vld1q_z_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vld1q_z_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vld1q_z_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vld1q_z_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vld1q_z_u16.c: Likewise. * gcc.target/arm/mve/intrinsics/vld1q_z_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vld1q_z_u8.c: Likewise. * gcc.target/arm/mve/intrinsics/vld2q_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vld2q_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vld2q_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vld2q_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vld2q_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vld2q_u16.c: Likewise. * gcc.target/arm/mve/intrinsics/vld2q_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vld2q_u8.c: Likewise. * gcc.target/arm/mve/intrinsics/vld4q_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vld4q_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vld4q_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vld4q_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vld4q_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vld4q_u16.c: Likewise. * gcc.target/arm/mve/intrinsics/vld4q_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vld4q_u8.c: Likewise. * gcc.target/arm/mve/intrinsics/vst1q_p_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vst1q_p_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vst1q_p_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vst1q_p_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vst1q_p_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vst1q_p_u16.c: Likewise. * gcc.target/arm/mve/intrinsics/vst1q_p_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vst1q_p_u8.c: Likewise. * gcc.target/arm/mve/intrinsics/vst2q_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vst2q_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vst2q_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vst2q_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vst2q_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vst2q_u16.c: Likewise. * gcc.target/arm/mve/intrinsics/vst2q_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vst2q_u8.c: Likewise. --- gcc/ChangeLog | 95 ++++ gcc/config/arm/arm_mve.h | 482 +++++++++++++++++++++ gcc/config/arm/arm_mve_builtins.def | 3 + gcc/config/arm/mve.md | 90 +++- gcc/testsuite/ChangeLog | 45 ++ .../gcc.target/arm/mve/intrinsics/vld1q_z_f16.c | 22 + .../gcc.target/arm/mve/intrinsics/vld1q_z_f32.c | 22 + .../gcc.target/arm/mve/intrinsics/vld1q_z_s16.c | 22 + .../gcc.target/arm/mve/intrinsics/vld1q_z_s32.c | 22 + .../gcc.target/arm/mve/intrinsics/vld1q_z_s8.c | 22 + .../gcc.target/arm/mve/intrinsics/vld1q_z_u16.c | 22 + .../gcc.target/arm/mve/intrinsics/vld1q_z_u32.c | 22 + .../gcc.target/arm/mve/intrinsics/vld1q_z_u8.c | 22 + .../gcc.target/arm/mve/intrinsics/vld2q_f16.c | 23 + .../gcc.target/arm/mve/intrinsics/vld2q_f32.c | 23 + .../gcc.target/arm/mve/intrinsics/vld2q_s16.c | 23 + .../gcc.target/arm/mve/intrinsics/vld2q_s32.c | 23 + .../gcc.target/arm/mve/intrinsics/vld2q_s8.c | 23 + .../gcc.target/arm/mve/intrinsics/vld2q_u16.c | 23 + .../gcc.target/arm/mve/intrinsics/vld2q_u32.c | 23 + .../gcc.target/arm/mve/intrinsics/vld2q_u8.c | 23 + .../gcc.target/arm/mve/intrinsics/vld4q_f16.c | 25 ++ .../gcc.target/arm/mve/intrinsics/vld4q_f32.c | 25 ++ .../gcc.target/arm/mve/intrinsics/vld4q_s16.c | 25 ++ .../gcc.target/arm/mve/intrinsics/vld4q_s32.c | 25 ++ .../gcc.target/arm/mve/intrinsics/vld4q_s8.c | 25 ++ .../gcc.target/arm/mve/intrinsics/vld4q_u16.c | 25 ++ .../gcc.target/arm/mve/intrinsics/vld4q_u32.c | 25 ++ .../gcc.target/arm/mve/intrinsics/vld4q_u8.c | 25 ++ .../gcc.target/arm/mve/intrinsics/vst1q_p_f16.c | 22 + .../gcc.target/arm/mve/intrinsics/vst1q_p_f32.c | 22 + .../gcc.target/arm/mve/intrinsics/vst1q_p_s16.c | 22 + .../gcc.target/arm/mve/intrinsics/vst1q_p_s32.c | 22 + .../gcc.target/arm/mve/intrinsics/vst1q_p_s8.c | 22 + .../gcc.target/arm/mve/intrinsics/vst1q_p_u16.c | 22 + .../gcc.target/arm/mve/intrinsics/vst1q_p_u32.c | 22 + .../gcc.target/arm/mve/intrinsics/vst1q_p_u8.c | 22 + .../gcc.target/arm/mve/intrinsics/vst2q_f16.c | 23 + .../gcc.target/arm/mve/intrinsics/vst2q_f32.c | 23 + .../gcc.target/arm/mve/intrinsics/vst2q_s16.c | 23 + .../gcc.target/arm/mve/intrinsics/vst2q_s32.c | 23 + .../gcc.target/arm/mve/intrinsics/vst2q_s8.c | 23 + .../gcc.target/arm/mve/intrinsics/vst2q_u16.c | 23 + .../gcc.target/arm/mve/intrinsics/vst2q_u32.c | 23 + .../gcc.target/arm/mve/intrinsics/vst2q_u8.c | 23 + 45 files changed, 1634 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_f16.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_f32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_s16.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_s32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_s8.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_u16.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_u32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_u8.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_f16.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_f32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_s16.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_s32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_s8.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_u16.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_u32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_u8.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_f16.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_f32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_s16.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_s32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_s8.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_u16.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_u32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_u8.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_f16.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_f32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_s16.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_s32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_s8.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_u16.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_u32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_u8.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_f16.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_f32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_s16.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_s32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_s8.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_u16.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_u32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_u8.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 44888ca..9f50e43 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -2,6 +2,101 @@ Andre Vieira Mihail Ionescu + * config/arm/arm_mve.h (vst1q_p_u8): Define macro. + (vst1q_p_s8): Likewise. + (vst2q_s8): Likewise. + (vst2q_u8): Likewise. + (vld1q_z_u8): Likewise. + (vld1q_z_s8): Likewise. + (vld2q_s8): Likewise. + (vld2q_u8): Likewise. + (vld4q_s8): Likewise. + (vld4q_u8): Likewise. + (vst1q_p_u16): Likewise. + (vst1q_p_s16): Likewise. + (vst2q_s16): Likewise. + (vst2q_u16): Likewise. + (vld1q_z_u16): Likewise. + (vld1q_z_s16): Likewise. + (vld2q_s16): Likewise. + (vld2q_u16): Likewise. + (vld4q_s16): Likewise. + (vld4q_u16): Likewise. + (vst1q_p_u32): Likewise. + (vst1q_p_s32): Likewise. + (vst2q_s32): Likewise. + (vst2q_u32): Likewise. + (vld1q_z_u32): Likewise. + (vld1q_z_s32): Likewise. + (vld2q_s32): Likewise. + (vld2q_u32): Likewise. + (vld4q_s32): Likewise. + (vld4q_u32): Likewise. + (vld4q_f16): Likewise. + (vld2q_f16): Likewise. + (vld1q_z_f16): Likewise. + (vst2q_f16): Likewise. + (vst1q_p_f16): Likewise. + (vld4q_f32): Likewise. + (vld2q_f32): Likewise. + (vld1q_z_f32): Likewise. + (vst2q_f32): Likewise. + (vst1q_p_f32): Likewise. + (__arm_vst1q_p_u8): Define intrinsic. + (__arm_vst1q_p_s8): Likewise. + (__arm_vst2q_s8): Likewise. + (__arm_vst2q_u8): Likewise. + (__arm_vld1q_z_u8): Likewise. + (__arm_vld1q_z_s8): Likewise. + (__arm_vld2q_s8): Likewise. + (__arm_vld2q_u8): Likewise. + (__arm_vld4q_s8): Likewise. + (__arm_vld4q_u8): Likewise. + (__arm_vst1q_p_u16): Likewise. + (__arm_vst1q_p_s16): Likewise. + (__arm_vst2q_s16): Likewise. + (__arm_vst2q_u16): Likewise. + (__arm_vld1q_z_u16): Likewise. + (__arm_vld1q_z_s16): Likewise. + (__arm_vld2q_s16): Likewise. + (__arm_vld2q_u16): Likewise. + (__arm_vld4q_s16): Likewise. + (__arm_vld4q_u16): Likewise. + (__arm_vst1q_p_u32): Likewise. + (__arm_vst1q_p_s32): Likewise. + (__arm_vst2q_s32): Likewise. + (__arm_vst2q_u32): Likewise. + (__arm_vld1q_z_u32): Likewise. + (__arm_vld1q_z_s32): Likewise. + (__arm_vld2q_s32): Likewise. + (__arm_vld2q_u32): Likewise. + (__arm_vld4q_s32): Likewise. + (__arm_vld4q_u32): Likewise. + (__arm_vld4q_f16): Likewise. + (__arm_vld2q_f16): Likewise. + (__arm_vld1q_z_f16): Likewise. + (__arm_vst2q_f16): Likewise. + (__arm_vst1q_p_f16): Likewise. + (__arm_vld4q_f32): Likewise. + (__arm_vld2q_f32): Likewise. + (__arm_vld1q_z_f32): Likewise. + (__arm_vst2q_f32): Likewise. + (__arm_vst1q_p_f32): Likewise. + (vld1q_z): Define polymorphic variant. + (vld2q): Likewise. + (vld4q): Likewise. + (vst1q_p): Likewise. + (vst2q): Likewise. + * config/arm/arm_mve_builtins.def (STORE1): Use builtin qualifier. + (LOAD1): Likewise. + * config/arm/mve.md (mve_vst2q): Define RTL pattern. + (mve_vld2q): Likewise. + (mve_vld4q): Likewise. + +2020-03-20 Srinath Parvathaneni + Andre Vieira + Mihail Ionescu + * config/arm/arm-builtins.c (ARM_BUILTIN_GET_FPSCR_NZCVQC): Define. (ARM_BUILTIN_SET_FPSCR_NZCVQC): Likewise. (arm_init_mve_builtins): Add "__builtin_arm_get_fpscr_nzcvqc" and diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index 220319c..f6810dd 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -2466,6 +2466,46 @@ typedef struct { uint8x16_t val[4]; } uint8x16x4_t; #define vsbcq_u32(__a, __b, __carry) __arm_vsbcq_u32(__a, __b, __carry) #define vsbcq_m_s32(__inactive, __a, __b, __carry, __p) __arm_vsbcq_m_s32(__inactive, __a, __b, __carry, __p) #define vsbcq_m_u32(__inactive, __a, __b, __carry, __p) __arm_vsbcq_m_u32(__inactive, __a, __b, __carry, __p) +#define vst1q_p_u8(__addr, __value, __p) __arm_vst1q_p_u8(__addr, __value, __p) +#define vst1q_p_s8(__addr, __value, __p) __arm_vst1q_p_s8(__addr, __value, __p) +#define vst2q_s8(__addr, __value) __arm_vst2q_s8(__addr, __value) +#define vst2q_u8(__addr, __value) __arm_vst2q_u8(__addr, __value) +#define vld1q_z_u8(__base, __p) __arm_vld1q_z_u8(__base, __p) +#define vld1q_z_s8(__base, __p) __arm_vld1q_z_s8(__base, __p) +#define vld2q_s8(__addr) __arm_vld2q_s8(__addr) +#define vld2q_u8(__addr) __arm_vld2q_u8(__addr) +#define vld4q_s8(__addr) __arm_vld4q_s8(__addr) +#define vld4q_u8(__addr) __arm_vld4q_u8(__addr) +#define vst1q_p_u16(__addr, __value, __p) __arm_vst1q_p_u16(__addr, __value, __p) +#define vst1q_p_s16(__addr, __value, __p) __arm_vst1q_p_s16(__addr, __value, __p) +#define vst2q_s16(__addr, __value) __arm_vst2q_s16(__addr, __value) +#define vst2q_u16(__addr, __value) __arm_vst2q_u16(__addr, __value) +#define vld1q_z_u16(__base, __p) __arm_vld1q_z_u16(__base, __p) +#define vld1q_z_s16(__base, __p) __arm_vld1q_z_s16(__base, __p) +#define vld2q_s16(__addr) __arm_vld2q_s16(__addr) +#define vld2q_u16(__addr) __arm_vld2q_u16(__addr) +#define vld4q_s16(__addr) __arm_vld4q_s16(__addr) +#define vld4q_u16(__addr) __arm_vld4q_u16(__addr) +#define vst1q_p_u32(__addr, __value, __p) __arm_vst1q_p_u32(__addr, __value, __p) +#define vst1q_p_s32(__addr, __value, __p) __arm_vst1q_p_s32(__addr, __value, __p) +#define vst2q_s32(__addr, __value) __arm_vst2q_s32(__addr, __value) +#define vst2q_u32(__addr, __value) __arm_vst2q_u32(__addr, __value) +#define vld1q_z_u32(__base, __p) __arm_vld1q_z_u32(__base, __p) +#define vld1q_z_s32(__base, __p) __arm_vld1q_z_s32(__base, __p) +#define vld2q_s32(__addr) __arm_vld2q_s32(__addr) +#define vld2q_u32(__addr) __arm_vld2q_u32(__addr) +#define vld4q_s32(__addr) __arm_vld4q_s32(__addr) +#define vld4q_u32(__addr) __arm_vld4q_u32(__addr) +#define vld4q_f16(__addr) __arm_vld4q_f16(__addr) +#define vld2q_f16(__addr) __arm_vld2q_f16(__addr) +#define vld1q_z_f16(__base, __p) __arm_vld1q_z_f16(__base, __p) +#define vst2q_f16(__addr, __value) __arm_vst2q_f16(__addr, __value) +#define vst1q_p_f16(__addr, __value, __p) __arm_vst1q_p_f16(__addr, __value, __p) +#define vld4q_f32(__addr) __arm_vld4q_f32(__addr) +#define vld2q_f32(__addr) __arm_vld2q_f32(__addr) +#define vld1q_z_f32(__base, __p) __arm_vld1q_z_f32(__base, __p) +#define vst2q_f32(__addr, __value) __arm_vst2q_f32(__addr, __value) +#define vst1q_p_f32(__addr, __value, __p) __arm_vst1q_p_f32(__addr, __value, __p) #endif __extension__ extern __inline void @@ -16085,6 +16125,252 @@ __arm_vsbcq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t __b, unsign return __res; } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vst1q_p_u8 (uint8_t * __addr, uint8x16_t __value, mve_pred16_t __p) +{ + return vstrbq_p_u8 (__addr, __value, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vst1q_p_s8 (int8_t * __addr, int8x16_t __value, mve_pred16_t __p) +{ + return vstrbq_p_s8 (__addr, __value, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vst2q_s8 (int8_t * __addr, int8x16x2_t __value) +{ + union { int8x16x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__i = __value; + __builtin_mve_vst2qv16qi ((__builtin_neon_qi *) __addr, __rv.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vst2q_u8 (uint8_t * __addr, uint8x16x2_t __value) +{ + union { uint8x16x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__i = __value; + __builtin_mve_vst2qv16qi ((__builtin_neon_qi *) __addr, __rv.__o); +} + +__extension__ extern __inline uint8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vld1q_z_u8 (uint8_t const *__base, mve_pred16_t __p) +{ + return vldrbq_z_u8 ( __base, __p); +} + +__extension__ extern __inline int8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vld1q_z_s8 (int8_t const *__base, mve_pred16_t __p) +{ + return vldrbq_z_s8 ( __base, __p); +} + +__extension__ extern __inline int8x16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vld2q_s8 (int8_t const * __addr) +{ + union { int8x16x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_mve_vld2qv16qi ((__builtin_neon_qi *) __addr); + return __rv.__i; +} + +__extension__ extern __inline uint8x16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vld2q_u8 (uint8_t const * __addr) +{ + union { uint8x16x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_mve_vld2qv16qi ((__builtin_neon_qi *) __addr); + return __rv.__i; +} + +__extension__ extern __inline int8x16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vld4q_s8 (int8_t const * __addr) +{ + union { int8x16x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_mve_vld4qv16qi ((__builtin_neon_qi *) __addr); + return __rv.__i; +} + +__extension__ extern __inline uint8x16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vld4q_u8 (uint8_t const * __addr) +{ + union { uint8x16x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_mve_vld4qv16qi ((__builtin_neon_qi *) __addr); + return __rv.__i; +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vst1q_p_u16 (uint16_t * __addr, uint16x8_t __value, mve_pred16_t __p) +{ + return vstrhq_p_u16 (__addr, __value, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vst1q_p_s16 (int16_t * __addr, int16x8_t __value, mve_pred16_t __p) +{ + return vstrhq_p_s16 (__addr, __value, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vst2q_s16 (int16_t * __addr, int16x8x2_t __value) +{ + union { int16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__i = __value; + __builtin_mve_vst2qv8hi ((__builtin_neon_hi *) __addr, __rv.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vst2q_u16 (uint16_t * __addr, uint16x8x2_t __value) +{ + union { uint16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__i = __value; + __builtin_mve_vst2qv8hi ((__builtin_neon_hi *) __addr, __rv.__o); +} + +__extension__ extern __inline uint16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vld1q_z_u16 (uint16_t const *__base, mve_pred16_t __p) +{ + return vldrhq_z_u16 ( __base, __p); +} + +__extension__ extern __inline int16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vld1q_z_s16 (int16_t const *__base, mve_pred16_t __p) +{ + return vldrhq_z_s16 ( __base, __p); +} + +__extension__ extern __inline int16x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vld2q_s16 (int16_t const * __addr) +{ + union { int16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_mve_vld2qv8hi ((__builtin_neon_hi *) __addr); + return __rv.__i; +} + +__extension__ extern __inline uint16x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vld2q_u16 (uint16_t const * __addr) +{ + union { uint16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_mve_vld2qv8hi ((__builtin_neon_hi *) __addr); + return __rv.__i; +} + +__extension__ extern __inline int16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vld4q_s16 (int16_t const * __addr) +{ + union { int16x8x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_mve_vld4qv8hi ((__builtin_neon_hi *) __addr); + return __rv.__i; +} + +__extension__ extern __inline uint16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vld4q_u16 (uint16_t const * __addr) +{ + union { uint16x8x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_mve_vld4qv8hi ((__builtin_neon_hi *) __addr); + return __rv.__i; +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vst1q_p_u32 (uint32_t * __addr, uint32x4_t __value, mve_pred16_t __p) +{ + return vstrwq_p_u32 (__addr, __value, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vst1q_p_s32 (int32_t * __addr, int32x4_t __value, mve_pred16_t __p) +{ + return vstrwq_p_s32 (__addr, __value, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vst2q_s32 (int32_t * __addr, int32x4x2_t __value) +{ + union { int32x4x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__i = __value; + __builtin_mve_vst2qv4si ((__builtin_neon_si *) __addr, __rv.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vst2q_u32 (uint32_t * __addr, uint32x4x2_t __value) +{ + union { uint32x4x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__i = __value; + __builtin_mve_vst2qv4si ((__builtin_neon_si *) __addr, __rv.__o); +} + +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vld1q_z_u32 (uint32_t const *__base, mve_pred16_t __p) +{ + return vldrwq_z_u32 ( __base, __p); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vld1q_z_s32 (int32_t const *__base, mve_pred16_t __p) +{ + return vldrwq_z_s32 ( __base, __p); +} + +__extension__ extern __inline int32x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vld2q_s32 (int32_t const * __addr) +{ + union { int32x4x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_mve_vld2qv4si ((__builtin_neon_si *) __addr); + return __rv.__i; +} + +__extension__ extern __inline uint32x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vld2q_u32 (uint32_t const * __addr) +{ + union { uint32x4x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_mve_vld2qv4si ((__builtin_neon_si *) __addr); + return __rv.__i; +} + +__extension__ extern __inline int32x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vld4q_s32 (int32_t const * __addr) +{ + union { int32x4x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_mve_vld4qv4si ((__builtin_neon_si *) __addr); + return __rv.__i; +} + +__extension__ extern __inline uint32x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vld4q_u32 (uint32_t const * __addr) +{ + union { uint32x4x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_mve_vld4qv4si ((__builtin_neon_si *) __addr); + return __rv.__i; +} + #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */ __extension__ extern __inline void @@ -19436,6 +19722,88 @@ __arm_vrev64q_x_f32 (float32x4_t __a, mve_pred16_t __p) return __builtin_mve_vrev64q_m_fv4sf (vuninitializedq_f32 (), __a, __p); } +__extension__ extern __inline float16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vld4q_f16 (float16_t const * __addr) +{ + union { float16x8x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_mve_vld4qv8hf (__addr); + return __rv.__i; +} + +__extension__ extern __inline float16x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vld2q_f16 (float16_t const * __addr) +{ + union { float16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_mve_vld2qv8hf (__addr); + return __rv.__i; +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vld1q_z_f16 (float16_t const *__base, mve_pred16_t __p) +{ + return vldrhq_z_f16 ( __base, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vst2q_f16 (float16_t * __addr, float16x8x2_t __value) +{ + union { float16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__i = __value; + __builtin_mve_vst2qv8hf (__addr, __rv.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vst1q_p_f16 (float16_t * __addr, float16x8_t __value, mve_pred16_t __p) +{ + return vstrhq_p_f16 (__addr, __value, __p); +} + +__extension__ extern __inline float32x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vld4q_f32 (float32_t const * __addr) +{ + union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_mve_vld4qv4sf (__addr); + return __rv.__i; +} + +__extension__ extern __inline float32x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vld2q_f32 (float32_t const * __addr) +{ + union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_mve_vld2qv4sf (__addr); + return __rv.__i; +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vld1q_z_f32 (float32_t const *__base, mve_pred16_t __p) +{ + return vldrwq_z_f32 ( __base, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vst2q_f32 (float32_t * __addr, float32x4x2_t __value) +{ + union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__i = __value; + __builtin_mve_vst2qv4sf (__addr, __rv.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vst1q_p_f32 (float32_t * __addr, float32x4_t __value, mve_pred16_t __p) +{ + return vstrwq_p_f32 (__addr, __value, __p); +} + #endif enum { @@ -21911,6 +22279,42 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_float16_t_const_ptr]: __arm_vld1q_f16 (__ARM_mve_coerce(__p0, float16_t const *)), \ int (*)[__ARM_mve_type_float32_t_const_ptr]: __arm_vld1q_f32 (__ARM_mve_coerce(__p0, float32_t const *)));}) +#define vld1q_z(p0,p1) __arm_vld1q_z(p0, p1) +#define __arm_vld1q_z(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int8_t_const_ptr]: __arm_vld1q_z_s8 (__ARM_mve_coerce(__p0, int8_t const *), p1), \ + int (*)[__ARM_mve_type_int16_t_const_ptr]: __arm_vld1q_z_s16 (__ARM_mve_coerce(__p0, int16_t const *), p1), \ + int (*)[__ARM_mve_type_int32_t_const_ptr]: __arm_vld1q_z_s32 (__ARM_mve_coerce(__p0, int32_t const *), p1), \ + int (*)[__ARM_mve_type_uint8_t_const_ptr]: __arm_vld1q_z_u8 (__ARM_mve_coerce(__p0, uint8_t const *), p1), \ + int (*)[__ARM_mve_type_uint16_t_const_ptr]: __arm_vld1q_z_u16 (__ARM_mve_coerce(__p0, uint16_t const *), p1), \ + int (*)[__ARM_mve_type_uint32_t_const_ptr]: __arm_vld1q_z_u32 (__ARM_mve_coerce(__p0, uint32_t const *), p1), \ + int (*)[__ARM_mve_type_float16_t_const_ptr]: __arm_vld1q_z_f16 (__ARM_mve_coerce(__p0, float16_t const *), p1), \ + int (*)[__ARM_mve_type_float32_t_const_ptr]: __arm_vld1q_z_f32 (__ARM_mve_coerce(__p0, float32_t const *), p1));}) + +#define vld2q(p0) __arm_vld2q(p0) +#define __arm_vld2q(p0) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int8_t_const_ptr]: __arm_vld2q_s8 (__ARM_mve_coerce(__p0, int8_t const *)), \ + int (*)[__ARM_mve_type_int16_t_const_ptr]: __arm_vld2q_s16 (__ARM_mve_coerce(__p0, int16_t const *)), \ + int (*)[__ARM_mve_type_int32_t_const_ptr]: __arm_vld2q_s32 (__ARM_mve_coerce(__p0, int32_t const *)), \ + int (*)[__ARM_mve_type_uint8_t_const_ptr]: __arm_vld2q_u8 (__ARM_mve_coerce(__p0, uint8_t const *)), \ + int (*)[__ARM_mve_type_uint16_t_const_ptr]: __arm_vld2q_u16 (__ARM_mve_coerce(__p0, uint16_t const *)), \ + int (*)[__ARM_mve_type_uint32_t_const_ptr]: __arm_vld2q_u32 (__ARM_mve_coerce(__p0, uint32_t const *)), \ + int (*)[__ARM_mve_type_float16_t_const_ptr]: __arm_vld2q_f16 (__ARM_mve_coerce(__p0, float16_t const *)), \ + int (*)[__ARM_mve_type_float32_t_const_ptr]: __arm_vld2q_f32 (__ARM_mve_coerce(__p0, float32_t const *)));}) + +#define vld4q(p0) __arm_vld4q(p0) +#define __arm_vld4q(p0) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int8_t_const_ptr]: __arm_vld4q_s8 (__ARM_mve_coerce(__p0, int8_t const *)), \ + int (*)[__ARM_mve_type_int16_t_const_ptr]: __arm_vld4q_s16 (__ARM_mve_coerce(__p0, int16_t const *)), \ + int (*)[__ARM_mve_type_int32_t_const_ptr]: __arm_vld4q_s32 (__ARM_mve_coerce(__p0, int32_t const *)), \ + int (*)[__ARM_mve_type_uint8_t_const_ptr]: __arm_vld4q_u8 (__ARM_mve_coerce(__p0, uint8_t const *)), \ + int (*)[__ARM_mve_type_uint16_t_const_ptr]: __arm_vld4q_u16 (__ARM_mve_coerce(__p0, uint16_t const *)), \ + int (*)[__ARM_mve_type_uint32_t_const_ptr]: __arm_vld4q_u32 (__ARM_mve_coerce(__p0, uint32_t const *)), \ + int (*)[__ARM_mve_type_float16_t_const_ptr]: __arm_vld4q_f16 (__ARM_mve_coerce(__p0, float16_t const *)), \ + int (*)[__ARM_mve_type_float32_t_const_ptr]: __arm_vld4q_f32 (__ARM_mve_coerce(__p0, float32_t const *)));}) + #define vldrhq_gather_offset(p0,p1) __arm_vldrhq_gather_offset(p0,p1) #define __arm_vldrhq_gather_offset(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -21979,6 +22383,32 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint32_t_const_ptr]: __arm_vldrwq_gather_shifted_offset_z_u32 (__ARM_mve_coerce(__p0, uint32_t const *), p1, p2), \ int (*)[__ARM_mve_type_float32_t_const_ptr]: __arm_vldrwq_gather_shifted_offset_z_f32 (__ARM_mve_coerce(__p0, float32_t const *), p1, p2));}) +#define vst1q_p(p0,p1,p2) __arm_vst1q_p(p0,p1,p2) +#define __arm_vst1q_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vst1q_p_s8 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t), p2), \ + int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8_t]: __arm_vst1q_p_s16 (__ARM_mve_coerce(__p0, int16_t *), __ARM_mve_coerce(__p1, int16x8_t), p2), \ + int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vst1q_p_s32 (__ARM_mve_coerce(__p0, int32_t *), __ARM_mve_coerce(__p1, int32x4_t), p2), \ + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vst1q_p_u8 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \ + int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vst1q_p_u16 (__ARM_mve_coerce(__p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ + int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vst1q_p_u32 (__ARM_mve_coerce(__p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \ + int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8_t]: __arm_vst1q_p_f16 (__ARM_mve_coerce(__p0, float16_t *), __ARM_mve_coerce(__p1, float16x8_t), p2), \ + int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vst1q_p_f32 (__ARM_mve_coerce(__p0, float32_t *), __ARM_mve_coerce(__p1, float32x4_t), p2));}) + +#define vst2q(p0,p1) __arm_vst2q(p0,p1) +#define __arm_vst2q(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16x2_t]: __arm_vst2q_s8 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, int8x16x2_t)), \ + int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8x2_t]: __arm_vst2q_s16 (__ARM_mve_coerce(__p0, int16_t *), __ARM_mve_coerce(__p1, int16x8x2_t)), \ + int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4x2_t]: __arm_vst2q_s32 (__ARM_mve_coerce(__p0, int32_t *), __ARM_mve_coerce(__p1, int32x4x2_t)), \ + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16x2_t]: __arm_vst2q_u8 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16x2_t)), \ + int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8x2_t]: __arm_vst2q_u16 (__ARM_mve_coerce(__p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8x2_t)), \ + int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4x2_t]: __arm_vst2q_u32 (__ARM_mve_coerce(__p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4x2_t)), \ + int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8x2_t]: __arm_vst2q_f16 (__ARM_mve_coerce(__p0, float16_t *), __ARM_mve_coerce(__p1, float16x8x2_t)), \ + int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4x2_t]: __arm_vst2q_f32 (__ARM_mve_coerce(__p0, float32_t *), __ARM_mve_coerce(__p1, float32x4x2_t)));}) + #define vst1q(p0,p1) __arm_vst1q(p0,p1) #define __arm_vst1q(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -24849,6 +25279,28 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vst1q_u16 (__ARM_mve_coerce(__p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \ int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vst1q_u32 (__ARM_mve_coerce(__p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4_t)));}) +#define vst1q_p(p0,p1,p2) __arm_vst1q_p(p0,p1,p2) +#define __arm_vst1q_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vst1q_p_s8 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t), p2), \ + int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8_t]: __arm_vst1q_p_s16 (__ARM_mve_coerce(__p0, int16_t *), __ARM_mve_coerce(__p1, int16x8_t), p2), \ + int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vst1q_p_s32 (__ARM_mve_coerce(__p0, int32_t *), __ARM_mve_coerce(__p1, int32x4_t), p2), \ + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vst1q_p_u8 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \ + int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vst1q_p_u16 (__ARM_mve_coerce(__p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ + int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vst1q_p_u32 (__ARM_mve_coerce(__p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));}) + +#define vst2q(p0,p1) __arm_vst2q(p0,p1) +#define __arm_vst2q(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16x2_t]: __arm_vst2q_s8 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, int8x16x2_t)), \ + int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8x2_t]: __arm_vst2q_s16 (__ARM_mve_coerce(__p0, int16_t *), __ARM_mve_coerce(__p1, int16x8x2_t)), \ + int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4x2_t]: __arm_vst2q_s32 (__ARM_mve_coerce(__p0, int32_t *), __ARM_mve_coerce(__p1, int32x4x2_t)), \ + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16x2_t]: __arm_vst2q_u8 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16x2_t)), \ + int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8x2_t]: __arm_vst2q_u16 (__ARM_mve_coerce(__p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8x2_t)), \ + int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4x2_t]: __arm_vst2q_u32 (__ARM_mve_coerce(__p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4x2_t)));}) + #define vstrhq(p0,p1) __arm_vstrhq(p0,p1) #define __arm_vstrhq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -25403,6 +25855,36 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint16x8_t]: __arm_vbrsrq_x_n_u16 (__ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \ int (*)[__ARM_mve_type_uint32x4_t]: __arm_vbrsrq_x_n_u32 (__ARM_mve_coerce(__p1, uint32x4_t), p2, p3));}) +#define vld1q_z(p0,p1) __arm_vld1q_z(p0, p1) +#define __arm_vld1q_z(p0,p1) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int8_t_const_ptr]: __arm_vld1q_z_s8 (__ARM_mve_coerce(__p0, int8_t const *), p1), \ + int (*)[__ARM_mve_type_int16_t_const_ptr]: __arm_vld1q_z_s16 (__ARM_mve_coerce(__p0, int16_t const *), p1), \ + int (*)[__ARM_mve_type_int32_t_const_ptr]: __arm_vld1q_z_s32 (__ARM_mve_coerce(__p0, int32_t const *), p1), \ + int (*)[__ARM_mve_type_uint8_t_const_ptr]: __arm_vld1q_z_u8 (__ARM_mve_coerce(__p0, uint8_t const *), p1), \ + int (*)[__ARM_mve_type_uint16_t_const_ptr]: __arm_vld1q_z_u16 (__ARM_mve_coerce(__p0, uint16_t const *), p1), \ + int (*)[__ARM_mve_type_uint32_t_const_ptr]: __arm_vld1q_z_u32 (__ARM_mve_coerce(__p0, uint32_t const *), p1));}) + +#define vld2q(p0) __arm_vld2q(p0) +#define __arm_vld2q(p0) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int8_t_const_ptr]: __arm_vld2q_s8 (__ARM_mve_coerce(__p0, int8_t const *)), \ + int (*)[__ARM_mve_type_int16_t_const_ptr]: __arm_vld2q_s16 (__ARM_mve_coerce(__p0, int16_t const *)), \ + int (*)[__ARM_mve_type_int32_t_const_ptr]: __arm_vld2q_s32 (__ARM_mve_coerce(__p0, int32_t const *)), \ + int (*)[__ARM_mve_type_uint8_t_const_ptr]: __arm_vld2q_u8 (__ARM_mve_coerce(__p0, uint8_t const *)), \ + int (*)[__ARM_mve_type_uint16_t_const_ptr]: __arm_vld2q_u16 (__ARM_mve_coerce(__p0, uint16_t const *)), \ + int (*)[__ARM_mve_type_uint32_t_const_ptr]: __arm_vld2q_u32 (__ARM_mve_coerce(__p0, uint32_t const *)));}) + +#define vld4q(p0) __arm_vld4q(p0) +#define __arm_vld4q(p0) ({ __typeof(p0) __p0 = (p0); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ + int (*)[__ARM_mve_type_int8_t_const_ptr]: __arm_vld4q_s8 (__ARM_mve_coerce(__p0, int8_t const *)), \ + int (*)[__ARM_mve_type_int16_t_const_ptr]: __arm_vld4q_s16 (__ARM_mve_coerce(__p0, int16_t const *)), \ + int (*)[__ARM_mve_type_int32_t_const_ptr]: __arm_vld4q_s32 (__ARM_mve_coerce(__p0, int32_t const *)), \ + int (*)[__ARM_mve_type_uint8_t_const_ptr]: __arm_vld4q_u8 (__ARM_mve_coerce(__p0, uint8_t const *)), \ + int (*)[__ARM_mve_type_uint16_t_const_ptr]: __arm_vld4q_u16 (__ARM_mve_coerce(__p0, uint16_t const *)), \ + int (*)[__ARM_mve_type_uint32_t_const_ptr]: __arm_vld4q_u32 (__ARM_mve_coerce(__p0, uint32_t const *)));}) + #endif /* MVE Integer. */ #define vmvnq_x(p1,p2) __arm_vmvnq_x(p1,p2) diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def index 38f46be..a60650c 100644 --- a/gcc/config/arm/arm_mve_builtins.def +++ b/gcc/config/arm/arm_mve_builtins.def @@ -873,3 +873,6 @@ VAR1 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vsbciq_m_s, v4si) VAR1 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vsbciq_m_u, v4si) VAR1 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vsbcq_m_s, v4si) VAR1 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vsbcq_m_u, v4si) +VAR5 (STORE1, vst2q, v16qi, v8hi, v4si, v8hf, v4sf) +VAR5 (LOAD1, vld4q, v16qi, v8hi, v4si, v8hf, v4sf) +VAR5 (LOAD1, vld2q, v16qi, v8hi, v4si, v8hf, v4sf) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 25b5973..2e28d9d 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -214,7 +214,7 @@ VLDRDQGBWB_S VLDRDQGBWB_U VADCQ_U VADCQ_M_U VADCQ_S VADCQ_M_S VSBCIQ_U VSBCIQ_S VSBCIQ_M_U VSBCIQ_M_S VSBCQ_U VSBCQ_S VSBCQ_M_U VSBCQ_M_S VADCIQ_U VADCIQ_M_U - VADCIQ_S VADCIQ_M_S]) + VADCIQ_S VADCIQ_M_S VLD2Q VLD4Q VST2Q]) (define_mode_attr MVE_CNVT [(V8HI "V8HF") (V4SI "V4SF") (V8HF "V8HI") (V4SF "V4SI")]) @@ -10797,3 +10797,91 @@ "vsbc.i32\t%q0, %q1, %q2" [(set_attr "type" "mve_move") (set_attr "length" "4")]) + +;; +;; [vst2q]) +;; +(define_insn "mve_vst2q" + [(set (match_operand:OI 0 "neon_struct_operand" "=Um") + (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") + (unspec:MVE_VLD_ST [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + VST2Q)) + ] + "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (mode)) + || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (mode))" +{ + rtx ops[4]; + int regno = REGNO (operands[1]); + ops[0] = gen_rtx_REG (TImode, regno); + ops[1] = gen_rtx_REG (TImode, regno + 4); + rtx reg = operands[0]; + while (reg && !REG_P (reg)) + reg = XEXP (reg, 0); + gcc_assert (REG_P (reg)); + ops[2] = reg; + ops[3] = operands[0]; + output_asm_insn ("vst20.\t{%q0, %q1}, [%2]\n\t" + "vst21.\t{%q0, %q1}, %3", ops); + return ""; +} + [(set_attr "length" "8")]) + +;; +;; [vld2q]) +;; +(define_insn "mve_vld2q" + [(set (match_operand:OI 0 "s_register_operand" "=w") + (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") + (unspec:MVE_VLD_ST [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + VLD2Q)) + ] + "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (mode)) + || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (mode))" +{ + rtx ops[4]; + int regno = REGNO (operands[0]); + ops[0] = gen_rtx_REG (TImode, regno); + ops[1] = gen_rtx_REG (TImode, regno + 4); + rtx reg = operands[1]; + while (reg && !REG_P (reg)) + reg = XEXP (reg, 0); + gcc_assert (REG_P (reg)); + ops[2] = reg; + ops[3] = operands[1]; + output_asm_insn ("vld20.\t{%q0, %q1}, [%2]\n\t" + "vld21.\t{%q0, %q1}, %3", ops); + return ""; +} + [(set_attr "length" "8")]) + +;; +;; [vld4q]) +;; +(define_insn "mve_vld4q" + [(set (match_operand:XI 0 "s_register_operand" "=w") + (unspec:XI [(match_operand:XI 1 "neon_struct_operand" "Um") + (unspec:MVE_VLD_ST [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + VLD4Q)) + ] + "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (mode)) + || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (mode))" +{ + rtx ops[6]; + int regno = REGNO (operands[0]); + ops[0] = gen_rtx_REG (TImode, regno); + ops[1] = gen_rtx_REG (TImode, regno+4); + ops[2] = gen_rtx_REG (TImode, regno+8); + ops[3] = gen_rtx_REG (TImode, regno + 12); + rtx reg = operands[1]; + while (reg && !REG_P (reg)) + reg = XEXP (reg, 0); + gcc_assert (REG_P (reg)); + ops[4] = reg; + ops[5] = operands[1]; + output_asm_insn ("vld40.\t{%q0, %q1, %q2, %q3}, [%4]\n\t" + "vld41.\t{%q0, %q1, %q2, %q3}, [%4]\n\t" + "vld42.\t{%q0, %q1, %q2, %q3}, [%4]\n\t" + "vld43.\t{%q0, %q1, %q2, %q3}, %5", ops); + return ""; +} + [(set_attr "length" "16")]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 2408590..c9fff88 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,48 @@ +2020-03-20 Srinath Parvathaneni + Andre Vieira + Mihail Ionescu + + * gcc.target/arm/mve/intrinsics/vld1q_z_f16.c: New test. + * gcc.target/arm/mve/intrinsics/vld1q_z_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld1q_z_s16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld1q_z_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld1q_z_s8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld1q_z_u16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld1q_z_u32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld1q_z_u8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld2q_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld2q_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld2q_s16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld2q_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld2q_s8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld2q_u16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld2q_u32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld2q_u8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld4q_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld4q_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld4q_s16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld4q_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld4q_s8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld4q_u16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld4q_u32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vld4q_u8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vst1q_p_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vst1q_p_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vst1q_p_s16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vst1q_p_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vst1q_p_s8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vst1q_p_u16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vst1q_p_u32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vst1q_p_u8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vst2q_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vst2q_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vst2q_s16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vst2q_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vst2q_s8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vst2q_u16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vst2q_u32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vst2q_u8.c: Likewise. + 2020-03-20 Richard Sandiford PR middle-end/94072 diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_f16.c new file mode 100644 index 0000000..830d817 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_f16.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16_t const * base, mve_pred16_t p) +{ + return vld1q_z_f16 (base, p); +} + +/* { dg-final { scan-assembler "vldrht.f16" } } */ + +float16x8_t +foo1 (float16_t const * base, mve_pred16_t p) +{ + return vld1q_z (base, p); +} + +/* { dg-final { scan-assembler "vldrht.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_f32.c new file mode 100644 index 0000000..84f976a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_f32.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32_t const * base, mve_pred16_t p) +{ + return vld1q_z_f32 (base, p); +} + +/* { dg-final { scan-assembler "vldrwt.f32" } } */ + +float32x4_t +foo1 (float32_t const * base, mve_pred16_t p) +{ + return vld1q_z (base, p); +} + +/* { dg-final { scan-assembler "vldrwt.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_s16.c new file mode 100644 index 0000000..8bb7ef3 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_s16.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +int16x8_t +foo (int16_t const * base, mve_pred16_t p) +{ + return vld1q_z_s16 (base, p); +} + +/* { dg-final { scan-assembler "vldrht.s16" } } */ + +int16x8_t +foo1 (int16_t const * base, mve_pred16_t p) +{ + return vld1q_z (base, p); +} + +/* { dg-final { scan-assembler "vldrht.s16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_s32.c new file mode 100644 index 0000000..f5d7cc0 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_s32.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +int32x4_t +foo (int32_t const * base, mve_pred16_t p) +{ + return vld1q_z_s32 (base, p); +} + +/* { dg-final { scan-assembler "vldrwt.s32" } } */ + +int32x4_t +foo1 (int32_t const * base, mve_pred16_t p) +{ + return vld1q_z (base, p); +} + +/* { dg-final { scan-assembler "vldrwt.s32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_s8.c new file mode 100644 index 0000000..a3999e6 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_s8.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +int8x16_t +foo (int8_t const * base, mve_pred16_t p) +{ + return vld1q_z_s8 (base, p); +} + +/* { dg-final { scan-assembler "vldrbt.s8" } } */ + +int8x16_t +foo1 (int8_t const * base, mve_pred16_t p) +{ + return vld1q_z (base, p); +} + +/* { dg-final { scan-assembler "vldrbt.s8" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_u16.c new file mode 100644 index 0000000..ada9c2f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_u16.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +uint16x8_t +foo (uint16_t const * base, mve_pred16_t p) +{ + return vld1q_z_u16 (base, p); +} + +/* { dg-final { scan-assembler "vldrht.u16" } } */ + +uint16x8_t +foo1 (uint16_t const * base, mve_pred16_t p) +{ + return vld1q_z (base, p); +} + +/* { dg-final { scan-assembler "vldrht.u16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_u32.c new file mode 100644 index 0000000..c96be7b --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_u32.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +uint32x4_t +foo (uint32_t const * base, mve_pred16_t p) +{ + return vld1q_z_u32 (base, p); +} + +/* { dg-final { scan-assembler "vldrwt.u32" } } */ + +uint32x4_t +foo1 (uint32_t const * base, mve_pred16_t p) +{ + return vld1q_z (base, p); +} + +/* { dg-final { scan-assembler "vldrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_u8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_u8.c new file mode 100644 index 0000000..faca38d --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld1q_z_u8.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +uint8x16_t +foo (uint8_t const * base, mve_pred16_t p) +{ + return vld1q_z_u8 (base, p); +} + +/* { dg-final { scan-assembler "vldrbt.u8" } } */ + +uint8x16_t +foo1 (uint8_t const * base, mve_pred16_t p) +{ + return vld1q_z (base, p); +} + +/* { dg-final { scan-assembler "vldrbt.u8" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_f16.c new file mode 100644 index 0000000..cb2bc6f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_f16.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8x2_t +foo (float16_t const * addr) +{ + return vld2q_f16 (addr); +} + +/* { dg-final { scan-assembler "vld20.16" } } */ +/* { dg-final { scan-assembler "vld21.16" } } */ + +float16x8x2_t +foo1 (float16_t const * addr) +{ + return vld2q (addr); +} + +/* { dg-final { scan-assembler "vld20.16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_f32.c new file mode 100644 index 0000000..f701d3d --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_f32.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4x2_t +foo (float32_t const * addr) +{ + return vld2q_f32 (addr); +} + +/* { dg-final { scan-assembler "vld20.32" } } */ +/* { dg-final { scan-assembler "vld21.32" } } */ + +float32x4x2_t +foo1 (float32_t const * addr) +{ + return vld2q (addr); +} + +/* { dg-final { scan-assembler "vld20.32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_s16.c new file mode 100644 index 0000000..85e844c --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_s16.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +int16x8x2_t +foo (int16_t const * addr) +{ + return vld2q_s16 (addr); +} + +/* { dg-final { scan-assembler "vld20.16" } } */ +/* { dg-final { scan-assembler "vld21.16" } } */ + +int16x8x2_t +foo1 (int16_t const * addr) +{ + return vld2q (addr); +} + +/* { dg-final { scan-assembler "vld20.16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_s32.c new file mode 100644 index 0000000..f46a9d1 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_s32.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +int32x4x2_t +foo (int32_t const * addr) +{ + return vld2q_s32 (addr); +} + +/* { dg-final { scan-assembler "vld20.32" } } */ +/* { dg-final { scan-assembler "vld21.32" } } */ + +int32x4x2_t +foo1 (int32_t const * addr) +{ + return vld2q (addr); +} + +/* { dg-final { scan-assembler "vld20.32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_s8.c new file mode 100644 index 0000000..29dc288 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_s8.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +int8x16x2_t +foo (int8_t const * addr) +{ + return vld2q_s8 (addr); +} + +/* { dg-final { scan-assembler "vld20.8" } } */ +/* { dg-final { scan-assembler "vld21.8" } } */ + +int8x16x2_t +foo1 (int8_t const * addr) +{ + return vld2q (addr); +} + +/* { dg-final { scan-assembler "vld20.8" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_u16.c new file mode 100644 index 0000000..7d867b5 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_u16.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +uint16x8x2_t +foo (uint16_t const * addr) +{ + return vld2q_u16 (addr); +} + +/* { dg-final { scan-assembler "vld20.16" } } */ +/* { dg-final { scan-assembler "vld21.16" } } */ + +uint16x8x2_t +foo1 (uint16_t const * addr) +{ + return vld2q (addr); +} + +/* { dg-final { scan-assembler "vld20.16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_u32.c new file mode 100644 index 0000000..6c9d12e --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_u32.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +uint32x4x2_t +foo (uint32_t const * addr) +{ + return vld2q_u32 (addr); +} + +/* { dg-final { scan-assembler "vld20.32" } } */ +/* { dg-final { scan-assembler "vld21.32" } } */ + +uint32x4x2_t +foo1 (uint32_t const * addr) +{ + return vld2q (addr); +} + +/* { dg-final { scan-assembler "vld20.32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_u8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_u8.c new file mode 100644 index 0000000..002a645 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld2q_u8.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +uint8x16x2_t +foo (uint8_t const * addr) +{ + return vld2q_u8 (addr); +} + +/* { dg-final { scan-assembler "vld20.8" } } */ +/* { dg-final { scan-assembler "vld21.8" } } */ + +uint8x16x2_t +foo1 (uint8_t const * addr) +{ + return vld2q (addr); +} + +/* { dg-final { scan-assembler "vld20.8" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_f16.c new file mode 100644 index 0000000..386b71b --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_f16.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8x4_t +foo (float16_t const * addr) +{ + return vld4q_f16 (addr); +} + +/* { dg-final { scan-assembler "vld40.16" } } */ +/* { dg-final { scan-assembler "vld41.16" } } */ +/* { dg-final { scan-assembler "vld42.16" } } */ +/* { dg-final { scan-assembler "vld43.16" } } */ + +float16x8x4_t +foo1 (float16_t const * addr) +{ + return vld4q (addr); +} + +/* { dg-final { scan-assembler "vld40.16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_f32.c new file mode 100644 index 0000000..c38bb54 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_f32.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4x4_t +foo (float32_t const * addr) +{ + return vld4q_f32 (addr); +} + +/* { dg-final { scan-assembler "vld40.32" } } */ +/* { dg-final { scan-assembler "vld41.32" } } */ +/* { dg-final { scan-assembler "vld42.32" } } */ +/* { dg-final { scan-assembler "vld43.32" } } */ + +float32x4x4_t +foo1 (float32_t const * addr) +{ + return vld4q (addr); +} + +/* { dg-final { scan-assembler "vld40.32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_s16.c new file mode 100644 index 0000000..68e6b98 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_s16.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +int16x8x4_t +foo (int16_t const * addr) +{ + return vld4q_s16 (addr); +} + +/* { dg-final { scan-assembler "vld40.16" } } */ +/* { dg-final { scan-assembler "vld41.16" } } */ +/* { dg-final { scan-assembler "vld42.16" } } */ +/* { dg-final { scan-assembler "vld43.16" } } */ + +int16x8x4_t +foo1 (int16_t const * addr) +{ + return vld4q (addr); +} + +/* { dg-final { scan-assembler "vld40.16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_s32.c new file mode 100644 index 0000000..db0ba20 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_s32.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +int32x4x4_t +foo (int32_t const * addr) +{ + return vld4q_s32 (addr); +} + +/* { dg-final { scan-assembler "vld40.32" } } */ +/* { dg-final { scan-assembler "vld41.32" } } */ +/* { dg-final { scan-assembler "vld42.32" } } */ +/* { dg-final { scan-assembler "vld43.32" } } */ + +int32x4x4_t +foo1 (int32_t const * addr) +{ + return vld4q (addr); +} + +/* { dg-final { scan-assembler "vld40.32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_s8.c new file mode 100644 index 0000000..e38bdea --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_s8.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +int8x16x4_t +foo (int8_t const * addr) +{ + return vld4q_s8 (addr); +} + +/* { dg-final { scan-assembler "vld40.8" } } */ +/* { dg-final { scan-assembler "vld41.8" } } */ +/* { dg-final { scan-assembler "vld42.8" } } */ +/* { dg-final { scan-assembler "vld43.8" } } */ + +int8x16x4_t +foo1 (int8_t const * addr) +{ + return vld4q (addr); +} + +/* { dg-final { scan-assembler "vld40.8" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_u16.c new file mode 100644 index 0000000..7f6a783 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_u16.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +uint16x8x4_t +foo (uint16_t const * addr) +{ + return vld4q_u16 (addr); +} + +/* { dg-final { scan-assembler "vld40.16" } } */ +/* { dg-final { scan-assembler "vld41.16" } } */ +/* { dg-final { scan-assembler "vld42.16" } } */ +/* { dg-final { scan-assembler "vld43.16" } } */ + +uint16x8x4_t +foo1 (uint16_t const * addr) +{ + return vld4q (addr); +} + +/* { dg-final { scan-assembler "vld40.16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_u32.c new file mode 100644 index 0000000..29af573 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_u32.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +uint32x4x4_t +foo (uint32_t const * addr) +{ + return vld4q_u32 (addr); +} + +/* { dg-final { scan-assembler "vld40.32" } } */ +/* { dg-final { scan-assembler "vld41.32" } } */ +/* { dg-final { scan-assembler "vld42.32" } } */ +/* { dg-final { scan-assembler "vld43.32" } } */ + +uint32x4x4_t +foo1 (uint32_t const * addr) +{ + return vld4q (addr); +} + +/* { dg-final { scan-assembler "vld40.32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_u8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_u8.c new file mode 100644 index 0000000..f540362 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vld4q_u8.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +uint8x16x4_t +foo (uint8_t const * addr) +{ + return vld4q_u8 (addr); +} + +/* { dg-final { scan-assembler "vld40.8" } } */ +/* { dg-final { scan-assembler "vld41.8" } } */ +/* { dg-final { scan-assembler "vld42.8" } } */ +/* { dg-final { scan-assembler "vld43.8" } } */ + +uint8x16x4_t +foo1 (uint8_t const * addr) +{ + return vld4q (addr); +} + +/* { dg-final { scan-assembler "vld40.8" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_f16.c new file mode 100644 index 0000000..7ef5cce --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_f16.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (float16_t * addr, float16x8_t value, mve_pred16_t p) +{ + vst1q_p_f16 (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrht.16" } } */ + +void +foo1 (float16_t * addr, float16x8_t value, mve_pred16_t p) +{ + vst1q_p (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrht.16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_f32.c new file mode 100644 index 0000000..2cd7221 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_f32.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (float32_t * addr, float32x4_t value, mve_pred16_t p) +{ + vst1q_p_f32 (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.32" } } */ + +void +foo1 (float32_t * addr, float32x4_t value, mve_pred16_t p) +{ + vst1q_p (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_s16.c new file mode 100644 index 0000000..ca56f73 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_s16.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (int16_t * addr, int16x8_t value, mve_pred16_t p) +{ + vst1q_p_s16 (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrht.16" } } */ + +void +foo1 (int16_t * addr, int16x8_t value, mve_pred16_t p) +{ + vst1q_p (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrht.16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_s32.c new file mode 100644 index 0000000..782496f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_s32.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (int32_t * addr, int32x4_t value, mve_pred16_t p) +{ + vst1q_p_s32 (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.32" } } */ + +void +foo1 (int32_t * addr, int32x4_t value, mve_pred16_t p) +{ + vst1q_p (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_s8.c new file mode 100644 index 0000000..92bbc0a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_s8.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (int8_t * addr, int8x16_t value, mve_pred16_t p) +{ + vst1q_p_s8 (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.8" } } */ + +void +foo1 (int8_t * addr, int8x16_t value, mve_pred16_t p) +{ + vst1q_p (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.8" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_u16.c new file mode 100644 index 0000000..12c50f7 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_u16.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint16_t * addr, uint16x8_t value, mve_pred16_t p) +{ + vst1q_p_u16 (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrht.16" } } */ + +void +foo1 (uint16_t * addr, uint16x8_t value, mve_pred16_t p) +{ + vst1q_p (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrht.16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_u32.c new file mode 100644 index 0000000..2f7ef61 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_u32.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint32_t * addr, uint32x4_t value, mve_pred16_t p) +{ + vst1q_p_u32 (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.32" } } */ + +void +foo1 (uint32_t * addr, uint32x4_t value, mve_pred16_t p) +{ + vst1q_p (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_u8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_u8.c new file mode 100644 index 0000000..56fde60 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_p_u8.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint8_t * addr, uint8x16_t value, mve_pred16_t p) +{ + vst1q_p_u8 (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.8" } } */ + +void +foo1 (uint8_t * addr, uint8x16_t value, mve_pred16_t p) +{ + vst1q_p (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.8" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_f16.c new file mode 100644 index 0000000..79e1b5c --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_f16.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (float16_t * addr, float16x8x2_t value) +{ + vst2q_f16 (addr, value); +} + +/* { dg-final { scan-assembler "vst20.16" } } */ +/* { dg-final { scan-assembler "vst21.16" } } */ + +void +foo1 (float16_t * addr, float16x8x2_t value) +{ + vst2q (addr, value); +} + +/* { dg-final { scan-assembler "vst20.16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_f32.c new file mode 100644 index 0000000..7d256aa --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_f32.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (float32_t * addr, float32x4x2_t value) +{ + vst2q_f32 (addr, value); +} + +/* { dg-final { scan-assembler "vst20.32" } } */ +/* { dg-final { scan-assembler "vst21.32" } } */ + +void +foo1 (float32_t * addr, float32x4x2_t value) +{ + vst2q (addr, value); +} + +/* { dg-final { scan-assembler "vst20.32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_s16.c new file mode 100644 index 0000000..f2fd867 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_s16.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (int16_t * addr, int16x8x2_t value) +{ + vst2q_s16 (addr, value); +} + +/* { dg-final { scan-assembler "vst20.16" } } */ +/* { dg-final { scan-assembler "vst21.16" } } */ + +void +foo1 (int16_t * addr, int16x8x2_t value) +{ + vst2q (addr, value); +} + +/* { dg-final { scan-assembler "vst20.16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_s32.c new file mode 100644 index 0000000..85e36df --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_s32.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (int32_t * addr, int32x4x2_t value) +{ + vst2q_s32 (addr, value); +} + +/* { dg-final { scan-assembler "vst20.32" } } */ +/* { dg-final { scan-assembler "vst21.32" } } */ + +void +foo1 (int32_t * addr, int32x4x2_t value) +{ + vst2q (addr, value); +} + +/* { dg-final { scan-assembler "vst20.32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_s8.c new file mode 100644 index 0000000..57e9efc --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_s8.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (int8_t * addr, int8x16x2_t value) +{ + vst2q_s8 (addr, value); +} + +/* { dg-final { scan-assembler "vst20.8" } } */ +/* { dg-final { scan-assembler "vst21.8" } } */ + +void +foo1 (int8_t * addr, int8x16x2_t value) +{ + vst2q (addr, value); +} + +/* { dg-final { scan-assembler "vst20.8" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_u16.c new file mode 100644 index 0000000..b54c791 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_u16.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint16_t * addr, uint16x8x2_t value) +{ + vst2q_u16 (addr, value); +} + +/* { dg-final { scan-assembler "vst20.16" } } */ +/* { dg-final { scan-assembler "vst21.16" } } */ + +void +foo1 (uint16_t * addr, uint16x8x2_t value) +{ + vst2q (addr, value); +} + +/* { dg-final { scan-assembler "vst20.16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_u32.c new file mode 100644 index 0000000..167f8bd --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_u32.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint32_t * addr, uint32x4x2_t value) +{ + vst2q_u32 (addr, value); +} + +/* { dg-final { scan-assembler "vst20.32" } } */ +/* { dg-final { scan-assembler "vst21.32" } } */ + +void +foo1 (uint32_t * addr, uint32x4x2_t value) +{ + vst2q (addr, value); +} + +/* { dg-final { scan-assembler "vst20.32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_u8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_u8.c new file mode 100644 index 0000000..9f7a5f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst2q_u8.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint8_t * addr, uint8x16x2_t value) +{ + vst2q_u8 (addr, value); +} + +/* { dg-final { scan-assembler "vst20.8" } } */ +/* { dg-final { scan-assembler "vst21.8" } } */ + +void +foo1 (uint8_t * addr, uint8x16x2_t value) +{ + vst2q (addr, value); +} + +/* { dg-final { scan-assembler "vst20.8" } } */ -- 2.7.4