From 41e1a7ffae9e1e03dcfcad30b8d92561b44eb97b Mon Sep 17 00:00:00 2001 From: Srinath Parvathaneni Date: Fri, 20 Mar 2020 12:06:26 +0000 Subject: [PATCH] [ARM][GCC][2/8x]: MVE ACLE gather load and scatter store intrinsics with writeback. This patch supports following MVE ACLE intrinsics with writeback. vldrdq_gather_base_wb_s64, vldrdq_gather_base_wb_u64, vldrdq_gather_base_wb_z_s64, vldrdq_gather_base_wb_z_u64, vldrwq_gather_base_wb_f32, vldrwq_gather_base_wb_s32, vldrwq_gather_base_wb_u32, vldrwq_gather_base_wb_z_f32, vldrwq_gather_base_wb_z_s32, vldrwq_gather_base_wb_z_u32, vstrdq_scatter_base_wb_p_s64, vstrdq_scatter_base_wb_p_u64, vstrdq_scatter_base_wb_s64, vstrdq_scatter_base_wb_u64, vstrwq_scatter_base_wb_p_s32, vstrwq_scatter_base_wb_p_f32, vstrwq_scatter_base_wb_p_u32, vstrwq_scatter_base_wb_s32, vstrwq_scatter_base_wb_u32, vstrwq_scatter_base_wb_f32. Please refer to M-profile Vector Extension (MVE) intrinsics [1] for more details. [1] https://developer.arm.com/architectures/instruction-sets/simd-isas/helium/mve-intrinsics 2020-03-20 Srinath Parvathaneni Andre Vieira Mihail Ionescu * config/arm/arm-builtins.c (LDRGBWBS_QUALIFIERS): Define builtin qualifier. (LDRGBWBU_QUALIFIERS): Likewise. (LDRGBWBS_Z_QUALIFIERS): Likewise. (LDRGBWBU_Z_QUALIFIERS): Likewise. (STRSBWBS_QUALIFIERS): Likewise. (STRSBWBU_QUALIFIERS): Likewise. (STRSBWBS_P_QUALIFIERS): Likewise. (STRSBWBU_P_QUALIFIERS): Likewise. * config/arm/arm_mve.h (vldrdq_gather_base_wb_s64): Define macro. (vldrdq_gather_base_wb_u64): Likewise. (vldrdq_gather_base_wb_z_s64): Likewise. (vldrdq_gather_base_wb_z_u64): Likewise. (vldrwq_gather_base_wb_f32): Likewise. (vldrwq_gather_base_wb_s32): Likewise. (vldrwq_gather_base_wb_u32): Likewise. (vldrwq_gather_base_wb_z_f32): Likewise. (vldrwq_gather_base_wb_z_s32): Likewise. (vldrwq_gather_base_wb_z_u32): Likewise. (vstrdq_scatter_base_wb_p_s64): Likewise. (vstrdq_scatter_base_wb_p_u64): Likewise. (vstrdq_scatter_base_wb_s64): Likewise. (vstrdq_scatter_base_wb_u64): Likewise. (vstrwq_scatter_base_wb_p_s32): Likewise. (vstrwq_scatter_base_wb_p_f32): Likewise. (vstrwq_scatter_base_wb_p_u32): Likewise. (vstrwq_scatter_base_wb_s32): Likewise. (vstrwq_scatter_base_wb_u32): Likewise. (vstrwq_scatter_base_wb_f32): Likewise. (__arm_vldrdq_gather_base_wb_s64): Define intrinsic. (__arm_vldrdq_gather_base_wb_u64): Likewise. (__arm_vldrdq_gather_base_wb_z_s64): Likewise. (__arm_vldrdq_gather_base_wb_z_u64): Likewise. (__arm_vldrwq_gather_base_wb_s32): Likewise. (__arm_vldrwq_gather_base_wb_u32): Likewise. (__arm_vldrwq_gather_base_wb_z_s32): Likewise. (__arm_vldrwq_gather_base_wb_z_u32): Likewise. (__arm_vstrdq_scatter_base_wb_s64): Likewise. (__arm_vstrdq_scatter_base_wb_u64): Likewise. (__arm_vstrdq_scatter_base_wb_p_s64): Likewise. (__arm_vstrdq_scatter_base_wb_p_u64): Likewise. (__arm_vstrwq_scatter_base_wb_p_s32): Likewise. (__arm_vstrwq_scatter_base_wb_p_u32): Likewise. (__arm_vstrwq_scatter_base_wb_s32): Likewise. (__arm_vstrwq_scatter_base_wb_u32): Likewise. (__arm_vldrwq_gather_base_wb_f32): Likewise. (__arm_vldrwq_gather_base_wb_z_f32): Likewise. (__arm_vstrwq_scatter_base_wb_f32): Likewise. (__arm_vstrwq_scatter_base_wb_p_f32): Likewise. (vstrwq_scatter_base_wb): Define polymorphic variant. (vstrwq_scatter_base_wb_p): Likewise. (vstrdq_scatter_base_wb_p): Likewise. (vstrdq_scatter_base_wb): Likewise. * config/arm/arm_mve_builtins.def (LDRGBWBS_QUALIFIERS): Use builtin qualifier. * config/arm/mve.md (mve_vstrwq_scatter_base_wb_v4si): Define RTL pattern. (mve_vstrwq_scatter_base_wb_add_v4si): Likewise. (mve_vstrwq_scatter_base_wb_v4si_insn): Likewise. (mve_vstrwq_scatter_base_wb_p_v4si): Likewise. (mve_vstrwq_scatter_base_wb_p_add_v4si): Likewise. (mve_vstrwq_scatter_base_wb_p_v4si_insn): Likewise. (mve_vstrwq_scatter_base_wb_fv4sf): Likewise. (mve_vstrwq_scatter_base_wb_add_fv4sf): Likewise. (mve_vstrwq_scatter_base_wb_fv4sf_insn): Likewise. (mve_vstrwq_scatter_base_wb_p_fv4sf): Likewise. (mve_vstrwq_scatter_base_wb_p_add_fv4sf): Likewise. (mve_vstrwq_scatter_base_wb_p_fv4sf_insn): Likewise. (mve_vstrdq_scatter_base_wb_v2di): Likewise. (mve_vstrdq_scatter_base_wb_add_v2di): Likewise. (mve_vstrdq_scatter_base_wb_v2di_insn): Likewise. (mve_vstrdq_scatter_base_wb_p_v2di): Likewise. (mve_vstrdq_scatter_base_wb_p_add_v2di): Likewise. (mve_vstrdq_scatter_base_wb_p_v2di_insn): Likewise. (mve_vldrwq_gather_base_wb_v4si): Likewise. (mve_vldrwq_gather_base_wb_v4si_insn): Likewise. (mve_vldrwq_gather_base_wb_z_v4si): Likewise. (mve_vldrwq_gather_base_wb_z_v4si_insn): Likewise. (mve_vldrwq_gather_base_wb_fv4sf): Likewise. (mve_vldrwq_gather_base_wb_fv4sf_insn): Likewise. (mve_vldrwq_gather_base_wb_z_fv4sf): Likewise. (mve_vldrwq_gather_base_wb_z_fv4sf_insn): Likewise. (mve_vldrdq_gather_base_wb_v2di): Likewise. (mve_vldrdq_gather_base_wb_v2di_insn): Likewise. (mve_vldrdq_gather_base_wb_z_v2di): Likewise. (mve_vldrdq_gather_base_wb_z_v2di_insn): Likewise. gcc/testsuite/ChangeLog: 2020-03-20 Srinath Parvathaneni Andre Vieira Mihail Ionescu * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c: New test. * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_s64.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_u64.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_s64.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_u64.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_s64.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_u64.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_u32.c: Likewise. --- gcc/ChangeLog | 91 ++++ gcc/config/arm/arm-builtins.c | 44 ++ gcc/config/arm/arm_mve.h | 238 +++++++++ gcc/config/arm/arm_mve_builtins.def | 30 ++ gcc/config/arm/mve.md | 583 ++++++++++++++++++++- gcc/testsuite/ChangeLog | 30 ++ .../arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c | 14 + .../arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c | 14 + .../mve/intrinsics/vldrdq_gather_base_wb_z_s64.c | 12 + .../mve/intrinsics/vldrdq_gather_base_wb_z_u64.c | 12 + .../arm/mve/intrinsics/vldrwq_gather_base_wb_f32.c | 14 + .../arm/mve/intrinsics/vldrwq_gather_base_wb_s32.c | 14 + .../arm/mve/intrinsics/vldrwq_gather_base_wb_u32.c | 14 + .../mve/intrinsics/vldrwq_gather_base_wb_z_f32.c | 14 + .../mve/intrinsics/vldrwq_gather_base_wb_z_s32.c | 14 + .../mve/intrinsics/vldrwq_gather_base_wb_z_u32.c | 14 + .../mve/intrinsics/vstrdq_scatter_base_wb_p_s64.c | 22 + .../mve/intrinsics/vstrdq_scatter_base_wb_p_u64.c | 22 + .../mve/intrinsics/vstrdq_scatter_base_wb_s64.c | 22 + .../mve/intrinsics/vstrdq_scatter_base_wb_u64.c | 22 + .../mve/intrinsics/vstrwq_scatter_base_wb_f32.c | 22 + .../mve/intrinsics/vstrwq_scatter_base_wb_p_f32.c | 22 + .../mve/intrinsics/vstrwq_scatter_base_wb_p_s32.c | 22 + .../mve/intrinsics/vstrwq_scatter_base_wb_p_u32.c | 22 + .../mve/intrinsics/vstrwq_scatter_base_wb_s32.c | 22 + .../mve/intrinsics/vstrwq_scatter_base_wb_u32.c | 22 + 26 files changed, 1370 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_s64.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_u64.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_f32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_s32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_u32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_f32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_s32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_u32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_s64.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_u64.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_s64.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_u64.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_f32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_f32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_s32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_u32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_s32.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_u32.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 22c9766..ab8d421 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -2,6 +2,97 @@ Andre Vieira Mihail Ionescu + * config/arm/arm-builtins.c (LDRGBWBS_QUALIFIERS): Define builtin + qualifier. + (LDRGBWBU_QUALIFIERS): Likewise. + (LDRGBWBS_Z_QUALIFIERS): Likewise. + (LDRGBWBU_Z_QUALIFIERS): Likewise. + (STRSBWBS_QUALIFIERS): Likewise. + (STRSBWBU_QUALIFIERS): Likewise. + (STRSBWBS_P_QUALIFIERS): Likewise. + (STRSBWBU_P_QUALIFIERS): Likewise. + * config/arm/arm_mve.h (vldrdq_gather_base_wb_s64): Define macro. + (vldrdq_gather_base_wb_u64): Likewise. + (vldrdq_gather_base_wb_z_s64): Likewise. + (vldrdq_gather_base_wb_z_u64): Likewise. + (vldrwq_gather_base_wb_f32): Likewise. + (vldrwq_gather_base_wb_s32): Likewise. + (vldrwq_gather_base_wb_u32): Likewise. + (vldrwq_gather_base_wb_z_f32): Likewise. + (vldrwq_gather_base_wb_z_s32): Likewise. + (vldrwq_gather_base_wb_z_u32): Likewise. + (vstrdq_scatter_base_wb_p_s64): Likewise. + (vstrdq_scatter_base_wb_p_u64): Likewise. + (vstrdq_scatter_base_wb_s64): Likewise. + (vstrdq_scatter_base_wb_u64): Likewise. + (vstrwq_scatter_base_wb_p_s32): Likewise. + (vstrwq_scatter_base_wb_p_f32): Likewise. + (vstrwq_scatter_base_wb_p_u32): Likewise. + (vstrwq_scatter_base_wb_s32): Likewise. + (vstrwq_scatter_base_wb_u32): Likewise. + (vstrwq_scatter_base_wb_f32): Likewise. + (__arm_vldrdq_gather_base_wb_s64): Define intrinsic. + (__arm_vldrdq_gather_base_wb_u64): Likewise. + (__arm_vldrdq_gather_base_wb_z_s64): Likewise. + (__arm_vldrdq_gather_base_wb_z_u64): Likewise. + (__arm_vldrwq_gather_base_wb_s32): Likewise. + (__arm_vldrwq_gather_base_wb_u32): Likewise. + (__arm_vldrwq_gather_base_wb_z_s32): Likewise. + (__arm_vldrwq_gather_base_wb_z_u32): Likewise. + (__arm_vstrdq_scatter_base_wb_s64): Likewise. + (__arm_vstrdq_scatter_base_wb_u64): Likewise. + (__arm_vstrdq_scatter_base_wb_p_s64): Likewise. + (__arm_vstrdq_scatter_base_wb_p_u64): Likewise. + (__arm_vstrwq_scatter_base_wb_p_s32): Likewise. + (__arm_vstrwq_scatter_base_wb_p_u32): Likewise. + (__arm_vstrwq_scatter_base_wb_s32): Likewise. + (__arm_vstrwq_scatter_base_wb_u32): Likewise. + (__arm_vldrwq_gather_base_wb_f32): Likewise. + (__arm_vldrwq_gather_base_wb_z_f32): Likewise. + (__arm_vstrwq_scatter_base_wb_f32): Likewise. + (__arm_vstrwq_scatter_base_wb_p_f32): Likewise. + (vstrwq_scatter_base_wb): Define polymorphic variant. + (vstrwq_scatter_base_wb_p): Likewise. + (vstrdq_scatter_base_wb_p): Likewise. + (vstrdq_scatter_base_wb): Likewise. + * config/arm/arm_mve_builtins.def (LDRGBWBS_QUALIFIERS): Use builtin + qualifier. + * config/arm/mve.md (mve_vstrwq_scatter_base_wb_v4si): Define RTL + pattern. + (mve_vstrwq_scatter_base_wb_add_v4si): Likewise. + (mve_vstrwq_scatter_base_wb_v4si_insn): Likewise. + (mve_vstrwq_scatter_base_wb_p_v4si): Likewise. + (mve_vstrwq_scatter_base_wb_p_add_v4si): Likewise. + (mve_vstrwq_scatter_base_wb_p_v4si_insn): Likewise. + (mve_vstrwq_scatter_base_wb_fv4sf): Likewise. + (mve_vstrwq_scatter_base_wb_add_fv4sf): Likewise. + (mve_vstrwq_scatter_base_wb_fv4sf_insn): Likewise. + (mve_vstrwq_scatter_base_wb_p_fv4sf): Likewise. + (mve_vstrwq_scatter_base_wb_p_add_fv4sf): Likewise. + (mve_vstrwq_scatter_base_wb_p_fv4sf_insn): Likewise. + (mve_vstrdq_scatter_base_wb_v2di): Likewise. + (mve_vstrdq_scatter_base_wb_add_v2di): Likewise. + (mve_vstrdq_scatter_base_wb_v2di_insn): Likewise. + (mve_vstrdq_scatter_base_wb_p_v2di): Likewise. + (mve_vstrdq_scatter_base_wb_p_add_v2di): Likewise. + (mve_vstrdq_scatter_base_wb_p_v2di_insn): Likewise. + (mve_vldrwq_gather_base_wb_v4si): Likewise. + (mve_vldrwq_gather_base_wb_v4si_insn): Likewise. + (mve_vldrwq_gather_base_wb_z_v4si): Likewise. + (mve_vldrwq_gather_base_wb_z_v4si_insn): Likewise. + (mve_vldrwq_gather_base_wb_fv4sf): Likewise. + (mve_vldrwq_gather_base_wb_fv4sf_insn): Likewise. + (mve_vldrwq_gather_base_wb_z_fv4sf): Likewise. + (mve_vldrwq_gather_base_wb_z_fv4sf_insn): Likewise. + (mve_vldrdq_gather_base_wb_v2di): Likewise. + (mve_vldrdq_gather_base_wb_v2di_insn): Likewise. + (mve_vldrdq_gather_base_wb_z_v2di): Likewise. + (mve_vldrdq_gather_base_wb_z_v2di_insn): Likewise. + +2020-03-20 Srinath Parvathaneni + Andre Vieira + Mihail Ionescu + * config/arm/arm-builtins.c (QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_UNONE_QUALIFIERS): Define quinary builtin qualifier. diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c index cefc144..ecdd95f 100644 --- a/gcc/config/arm/arm-builtins.c +++ b/gcc/config/arm/arm-builtins.c @@ -718,6 +718,50 @@ arm_quinop_unone_unone_unone_unone_imm_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] #define QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_UNONE_QUALIFIERS \ (arm_quinop_unone_unone_unone_unone_imm_unone_qualifiers) +static enum arm_type_qualifiers +arm_ldrgbwbs_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_unsigned, qualifier_immediate}; +#define LDRGBWBS_QUALIFIERS (arm_ldrgbwbs_qualifiers) + +static enum arm_type_qualifiers +arm_ldrgbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate}; +#define LDRGBWBU_QUALIFIERS (arm_ldrgbwbu_qualifiers) + +static enum arm_type_qualifiers +arm_ldrgbwbs_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_unsigned, qualifier_immediate, + qualifier_unsigned}; +#define LDRGBWBS_Z_QUALIFIERS (arm_ldrgbwbs_z_qualifiers) + +static enum arm_type_qualifiers +arm_ldrgbwbu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate, + qualifier_unsigned}; +#define LDRGBWBU_Z_QUALIFIERS (arm_ldrgbwbu_z_qualifiers) + +static enum arm_type_qualifiers +arm_strsbwbs_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_unsigned, qualifier_const, qualifier_none}; +#define STRSBWBS_QUALIFIERS (arm_strsbwbs_qualifiers) + +static enum arm_type_qualifiers +arm_strsbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_unsigned, qualifier_const, qualifier_unsigned}; +#define STRSBWBU_QUALIFIERS (arm_strsbwbu_qualifiers) + +static enum arm_type_qualifiers +arm_strsbwbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_unsigned, qualifier_const, + qualifier_none, qualifier_unsigned}; +#define STRSBWBS_P_QUALIFIERS (arm_strsbwbs_p_qualifiers) + +static enum arm_type_qualifiers +arm_strsbwbu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_unsigned, qualifier_const, + qualifier_unsigned, qualifier_unsigned}; +#define STRSBWBU_P_QUALIFIERS (arm_strsbwbu_p_qualifiers) + /* End of Qualifier for MVE builtins. */ /* void ([T element type] *, T, immediate). */ diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index 00f2242..969908b 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -2054,6 +2054,26 @@ typedef struct { uint8x16_t val[4]; } uint8x16x4_t; #define viwdupq_wb_u8( __a, __b, __imm) __arm_viwdupq_wb_u8( __a, __b, __imm) #define viwdupq_wb_u32( __a, __b, __imm) __arm_viwdupq_wb_u32( __a, __b, __imm) #define viwdupq_wb_u16( __a, __b, __imm) __arm_viwdupq_wb_u16( __a, __b, __imm) +#define vldrdq_gather_base_wb_s64(__addr, __offset) __arm_vldrdq_gather_base_wb_s64(__addr, __offset) +#define vldrdq_gather_base_wb_u64(__addr, __offset) __arm_vldrdq_gather_base_wb_u64(__addr, __offset) +#define vldrdq_gather_base_wb_z_s64(__addr, __offset, __p) __arm_vldrdq_gather_base_wb_z_s64(__addr, __offset, __p) +#define vldrdq_gather_base_wb_z_u64(__addr, __offset, __p) __arm_vldrdq_gather_base_wb_z_u64(__addr, __offset, __p) +#define vldrwq_gather_base_wb_f32(__addr, __offset) __arm_vldrwq_gather_base_wb_f32(__addr, __offset) +#define vldrwq_gather_base_wb_s32(__addr, __offset) __arm_vldrwq_gather_base_wb_s32(__addr, __offset) +#define vldrwq_gather_base_wb_u32(__addr, __offset) __arm_vldrwq_gather_base_wb_u32(__addr, __offset) +#define vldrwq_gather_base_wb_z_f32(__addr, __offset, __p) __arm_vldrwq_gather_base_wb_z_f32(__addr, __offset, __p) +#define vldrwq_gather_base_wb_z_s32(__addr, __offset, __p) __arm_vldrwq_gather_base_wb_z_s32(__addr, __offset, __p) +#define vldrwq_gather_base_wb_z_u32(__addr, __offset, __p) __arm_vldrwq_gather_base_wb_z_u32(__addr, __offset, __p) +#define vstrdq_scatter_base_wb_p_s64(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_wb_p_s64(__addr, __offset, __value, __p) +#define vstrdq_scatter_base_wb_p_u64(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_wb_p_u64(__addr, __offset, __value, __p) +#define vstrdq_scatter_base_wb_s64(__addr, __offset, __value) __arm_vstrdq_scatter_base_wb_s64(__addr, __offset, __value) +#define vstrdq_scatter_base_wb_u64(__addr, __offset, __value) __arm_vstrdq_scatter_base_wb_u64(__addr, __offset, __value) +#define vstrwq_scatter_base_wb_p_s32(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_wb_p_s32(__addr, __offset, __value, __p) +#define vstrwq_scatter_base_wb_p_f32(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_wb_p_f32(__addr, __offset, __value, __p) +#define vstrwq_scatter_base_wb_p_u32(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_wb_p_u32(__addr, __offset, __value, __p) +#define vstrwq_scatter_base_wb_s32(__addr, __offset, __value) __arm_vstrwq_scatter_base_wb_s32(__addr, __offset, __value) +#define vstrwq_scatter_base_wb_u32(__addr, __offset, __value) __arm_vstrwq_scatter_base_wb_u32(__addr, __offset, __value) +#define vstrwq_scatter_base_wb_f32(__addr, __offset, __value) __arm_vstrwq_scatter_base_wb_f32(__addr, __offset, __value) #endif __extension__ extern __inline void @@ -13388,6 +13408,150 @@ __arm_viwdupq_wb_u16 (uint32_t * __a, uint32_t __b, const int __imm) return __res; } +__extension__ extern __inline int64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrdq_gather_base_wb_s64 (uint64x2_t * __addr, const int __offset) +{ + int64x2_t + result = __builtin_mve_vldrdq_gather_base_wb_sv2di (*__addr, __offset); + __addr += __offset; + return result; +} + +__extension__ extern __inline uint64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrdq_gather_base_wb_u64 (uint64x2_t * __addr, const int __offset) +{ + uint64x2_t + result = __builtin_mve_vldrdq_gather_base_wb_uv2di (*__addr, __offset); + __addr += __offset; + return result; +} + +__extension__ extern __inline int64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrdq_gather_base_wb_z_s64 (uint64x2_t * __addr, const int __offset, mve_pred16_t __p) +{ + int64x2_t + result = __builtin_mve_vldrdq_gather_base_wb_z_sv2di (*__addr, __offset, __p); + __addr += __offset; + return result; +} + +__extension__ extern __inline uint64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrdq_gather_base_wb_z_u64 (uint64x2_t * __addr, const int __offset, mve_pred16_t __p) +{ + uint64x2_t + result = __builtin_mve_vldrdq_gather_base_wb_z_uv2di (*__addr, __offset, __p); + __addr += __offset; + return result; +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_base_wb_s32 (uint32x4_t * __addr, const int __offset) +{ + int32x4_t + result = __builtin_mve_vldrwq_gather_base_wb_sv4si (*__addr, __offset); + __addr += __offset; + return result; +} + +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_base_wb_u32 (uint32x4_t * __addr, const int __offset) +{ + uint32x4_t + result = __builtin_mve_vldrwq_gather_base_wb_uv4si (*__addr, __offset); + __addr += __offset; + return result; +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_base_wb_z_s32 (uint32x4_t * __addr, const int __offset, mve_pred16_t __p) +{ + int32x4_t + result = __builtin_mve_vldrwq_gather_base_wb_z_sv4si (*__addr, __offset, __p); + __addr += __offset; + return result; +} + +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_base_wb_z_u32 (uint32x4_t * __addr, const int __offset, mve_pred16_t __p) +{ + uint32x4_t + result = __builtin_mve_vldrwq_gather_base_wb_z_uv4si (*__addr, __offset, __p); + __addr += __offset; + return result; +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrdq_scatter_base_wb_s64 (uint64x2_t * __addr, const int __offset, int64x2_t __value) +{ + __builtin_mve_vstrdq_scatter_base_wb_sv2di (*__addr, __offset, __value); + __builtin_mve_vstrdq_scatter_base_wb_add_sv2di (*__addr, __offset, *__addr); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrdq_scatter_base_wb_u64 (uint64x2_t * __addr, const int __offset, uint64x2_t __value) +{ + __builtin_mve_vstrdq_scatter_base_wb_uv2di (*__addr, __offset, __value); + __builtin_mve_vstrdq_scatter_base_wb_add_uv2di (*__addr, __offset, *__addr); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrdq_scatter_base_wb_p_s64 (uint64x2_t * __addr, const int __offset, int64x2_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrdq_scatter_base_wb_p_sv2di (*__addr, __offset, __value, __p); + __builtin_mve_vstrdq_scatter_base_wb_p_add_sv2di (*__addr, __offset, *__addr, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrdq_scatter_base_wb_p_u64 (uint64x2_t * __addr, const int __offset, uint64x2_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrdq_scatter_base_wb_p_uv2di (*__addr, __offset, __value, __p); + __builtin_mve_vstrdq_scatter_base_wb_p_add_uv2di (*__addr, __offset, *__addr, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrwq_scatter_base_wb_p_s32 (uint32x4_t * __addr, const int __offset, int32x4_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrwq_scatter_base_wb_p_sv4si (*__addr, __offset, __value, __p); + __builtin_mve_vstrwq_scatter_base_wb_p_add_sv4si (*__addr, __offset, *__addr, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrwq_scatter_base_wb_p_u32 (uint32x4_t * __addr, const int __offset, uint32x4_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrwq_scatter_base_wb_p_uv4si (*__addr, __offset, __value, __p); + __builtin_mve_vstrwq_scatter_base_wb_p_add_uv4si (*__addr, __offset, *__addr, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrwq_scatter_base_wb_s32 (uint32x4_t * __addr, const int __offset, int32x4_t __value) +{ + __builtin_mve_vstrwq_scatter_base_wb_sv4si (*__addr, __offset, __value); + __builtin_mve_vstrwq_scatter_base_wb_add_sv4si (*__addr, __offset, *__addr); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrwq_scatter_base_wb_u32 (uint32x4_t * __addr, const int __offset, uint32x4_t __value) +{ + __builtin_mve_vstrwq_scatter_base_wb_uv4si (*__addr, __offset, __value); + __builtin_mve_vstrwq_scatter_base_wb_add_uv4si (*__addr, __offset, *__addr); +} + #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */ __extension__ extern __inline void @@ -16024,6 +16188,42 @@ __arm_vreinterpretq_f32_u8 (uint8x16_t __a) return (float32x4_t) __a; } +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_base_wb_f32 (uint32x4_t * __addr, const int __offset) +{ + float32x4_t + result = __builtin_mve_vldrwq_gather_base_wb_fv4sf (*__addr, __offset); + __addr += __offset; + return result; +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_base_wb_z_f32 (uint32x4_t * __addr, const int __offset, mve_pred16_t __p) +{ + float32x4_t + result = __builtin_mve_vldrwq_gather_base_wb_z_fv4sf (*__addr, __offset, __p); + __addr += __offset; + return result; +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrwq_scatter_base_wb_f32 (uint32x4_t * __addr, const int __offset, float32x4_t __value) +{ + __builtin_mve_vstrwq_scatter_base_wb_fv4sf (*__addr, __offset, __value); + __builtin_mve_vstrwq_scatter_base_wb_add_fv4sf (*__addr, __offset, *__addr); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrwq_scatter_base_wb_p_f32 (uint32x4_t * __addr, const int __offset, float32x4_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrwq_scatter_base_wb_p_fv4sf (*__addr, __offset, __value, __p); + __builtin_mve_vstrwq_scatter_base_wb_p_add_fv4sf (*__addr, __offset, *__addr, __p); +} + #endif enum { @@ -18940,8 +19140,34 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint64x2_t]: __arm_vreinterpretq_u8_u64 (__ARM_mve_coerce(__p0, uint64x2_t)), \ int (*)[__ARM_mve_type_float32x4_t]: __arm_vreinterpretq_u8_f32 (__ARM_mve_coerce(__p0, float32x4_t)));}) +#define vstrwq_scatter_base_wb(p0,p1,p2) __arm_vstrwq_scatter_base_wb(p0,p1,p2) +#define __arm_vstrwq_scatter_base_wb(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_wb_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \ + int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t)), \ + int (*)[__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_base_wb_f32 (p0, p1, __ARM_mve_coerce(__p2, float32x4_t)));}) + +#define vstrwq_scatter_base_wb_p(p0,p1,p2,p3) __arm_vstrwq_scatter_base_wb_p(p0,p1,p2,p3) +#define __arm_vstrwq_scatter_base_wb_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_wb_p_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_p_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_base_wb_p_f32 (p0, p1, __ARM_mve_coerce(__p2, float32x4_t), p3));}) + #else /* MVE Integer. */ +#define vstrwq_scatter_base_wb(p0,p1,p2) __arm_vstrwq_scatter_base_wb(p0,p1,p2) +#define __arm_vstrwq_scatter_base_wb(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_wb_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \ + int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t)));}) + +#define vstrwq_scatter_base_wb_p(p0,p1,p2,p3) __arm_vstrwq_scatter_base_wb_p(p0,p1,p2,p3) +#define __arm_vstrwq_scatter_base_wb_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_wb_p_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_p_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));}) + #define vst4q(p0,p1) __arm_vst4q(p0,p1) #define __arm_vst4q(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -21424,6 +21650,18 @@ extern void *__ARM_undef; #endif /* MVE Integer. */ +#define vstrdq_scatter_base_wb_p(p0,p1,p2,p3) __arm_vstrdq_scatter_base_wb_p(p0,p1,p2,p3) +#define __arm_vstrdq_scatter_base_wb_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_wb_p_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \ + int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_wb_p_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));}) + +#define vstrdq_scatter_base_wb(p0,p1,p2) __arm_vstrdq_scatter_base_wb(p0,p1,p2) +#define __arm_vstrdq_scatter_base_wb(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_wb_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t)), \ + int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_wb_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t)));}) + #define vldrdq_gather_offset(p0,p1) __arm_vldrdq_gather_offset(p0,p1) #define __arm_vldrdq_gather_offset(p0,p1) ({ __typeof(p0) __p0 = (p0); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def index 2ed7886..9fc0a8a 100644 --- a/gcc/config/arm/arm_mve_builtins.def +++ b/gcc/config/arm/arm_mve_builtins.def @@ -827,3 +827,33 @@ VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vddupq_m_n_u, v16qi, v8hi, v4si) VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vidupq_m_n_u, v16qi, v8hi, v4si) VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vdwdupq_n_u, v16qi, v4si, v8hi) VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, viwdupq_n_u, v16qi, v4si, v8hi) +VAR1 (STRSBWBU, vstrwq_scatter_base_wb_u, v4si) +VAR1 (STRSBWBU, vstrwq_scatter_base_wb_add_u, v4si) +VAR1 (STRSBWBU, vstrwq_scatter_base_wb_add_s, v4si) +VAR1 (STRSBWBU, vstrwq_scatter_base_wb_add_f, v4sf) +VAR1 (STRSBWBU, vstrdq_scatter_base_wb_u, v2di) +VAR1 (STRSBWBU, vstrdq_scatter_base_wb_add_u, v2di) +VAR1 (STRSBWBU, vstrdq_scatter_base_wb_add_s, v2di) +VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_u, v4si) +VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_add_u, v4si) +VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_add_s, v4si) +VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_add_f, v4sf) +VAR1 (STRSBWBU_P, vstrdq_scatter_base_wb_p_u, v2di) +VAR1 (STRSBWBU_P, vstrdq_scatter_base_wb_p_add_u, v2di) +VAR1 (STRSBWBU_P, vstrdq_scatter_base_wb_p_add_s, v2di) +VAR1 (STRSBWBS, vstrwq_scatter_base_wb_s, v4si) +VAR1 (STRSBWBS, vstrwq_scatter_base_wb_f, v4sf) +VAR1 (STRSBWBS, vstrdq_scatter_base_wb_s, v2di) +VAR1 (STRSBWBS_P, vstrwq_scatter_base_wb_p_s, v4si) +VAR1 (STRSBWBS_P, vstrwq_scatter_base_wb_p_f, v4sf) +VAR1 (STRSBWBS_P, vstrdq_scatter_base_wb_p_s, v2di) +VAR1 (LDRGBWBU_Z, vldrwq_gather_base_wb_z_u, v4si) +VAR1 (LDRGBWBU_Z, vldrdq_gather_base_wb_z_u, v2di) +VAR1 (LDRGBWBU, vldrwq_gather_base_wb_u, v4si) +VAR1 (LDRGBWBU, vldrdq_gather_base_wb_u, v2di) +VAR1 (LDRGBWBS_Z, vldrwq_gather_base_wb_z_s, v4si) +VAR1 (LDRGBWBS_Z, vldrwq_gather_base_wb_z_f, v4sf) +VAR1 (LDRGBWBS_Z, vldrdq_gather_base_wb_z_s, v2di) +VAR1 (LDRGBWBS, vldrwq_gather_base_wb_s, v4si) +VAR1 (LDRGBWBS, vldrwq_gather_base_wb_f, v4sf) +VAR1 (LDRGBWBS, vldrdq_gather_base_wb_s, v2di) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index b2702f5..a22e752 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -208,7 +208,10 @@ VSTRDQSSO_U VSTRWQSO_S VSTRWQSO_U VSTRWQSSO_S VSTRWQSSO_U VSTRHQSO_F VSTRHQSSO_F VSTRWQSB_F VSTRWQSO_F VSTRWQSSO_F VDDUPQ VDDUPQ_M VDWDUPQ - VDWDUPQ_M VIDUPQ VIDUPQ_M VIWDUPQ VIWDUPQ_M]) + VDWDUPQ_M VIDUPQ VIDUPQ_M VIWDUPQ VIWDUPQ_M + VSTRWQSBWB_S VSTRWQSBWB_U VLDRWQGBWB_S VLDRWQGBWB_U + VSTRWQSBWB_F VLDRWQGBWB_F VSTRDQSBWB_S VSTRDQSBWB_U + VLDRDQGBWB_S VLDRDQGBWB_U]) (define_mode_attr MVE_CNVT [(V8HI "V8HF") (V4SI "V4SF") (V8HF "V8HI") (V4SF "V4SI")]) @@ -377,7 +380,10 @@ (VSTRDQSB_S "s") (VSTRDQSB_U "u") (VSTRDQSO_S "s") (VSTRDQSO_U "u") (VSTRDQSSO_S "s") (VSTRDQSSO_U "u") (VSTRWQSO_U "u") (VSTRWQSO_S "s") (VSTRWQSSO_U "u") - (VSTRWQSSO_S "s")]) + (VSTRWQSSO_S "s") (VSTRWQSBWB_S "s") (VSTRWQSBWB_U "u") + (VLDRWQGBWB_S "s") (VLDRWQGBWB_U "u") (VLDRDQGBWB_S "s") + (VLDRDQGBWB_U "u") (VSTRDQSBWB_S "s") + (VSTRDQSBWB_U "u")]) (define_int_attr mode1 [(VCTP8Q "8") (VCTP16Q "16") (VCTP32Q "32") (VCTP64Q "64") (VCTP8Q_M "8") (VCTP16Q_M "16") @@ -626,6 +632,10 @@ (define_int_iterator VSTRDSSOQ [VSTRDQSSO_S VSTRDQSSO_U]) (define_int_iterator VSTRWSOQ [VSTRWQSO_S VSTRWQSO_U]) (define_int_iterator VSTRWSSOQ [VSTRWQSSO_S VSTRWQSSO_U]) +(define_int_iterator VSTRWSBWBQ [VSTRWQSBWB_S VSTRWQSBWB_U]) +(define_int_iterator VLDRWGBWBQ [VLDRWQGBWB_S VLDRWQGBWB_U]) +(define_int_iterator VSTRDSBWBQ [VSTRDQSBWB_S VSTRDQSBWB_U]) +(define_int_iterator VLDRDGBWBQ [VLDRDQGBWB_S VLDRDQGBWB_U]) (define_insn "*mve_mov" [(set (match_operand:MVE_types 0 "nonimmediate_operand" "=w,w,r,w,w,r,w,Us") @@ -10042,3 +10052,572 @@ "vpst\;\tviwdupt.u%#\t%q2, %3, %4, %5" [(set_attr "type" "mve_move") (set_attr "length""8")]) +(define_expand "mve_vstrwq_scatter_base_wb_v4si" + [(match_operand:V4SI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V4SI 2 "s_register_operand" "w") + (unspec:V4SI [(const_int 0)] VSTRWSBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_wb = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vstrwq_scatter_base_wb_v4si_insn (ignore_wb, operands[0], + operands[1], operands[2])); + DONE; +}) + +(define_expand "mve_vstrwq_scatter_base_wb_add_v4si" + [(match_operand:V4SI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V4SI 2 "s_register_operand" "0") + (unspec:V4SI [(const_int 0)] VSTRWSBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_vec = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vstrwq_scatter_base_wb_v4si_insn (operands[0], operands[2], + operands[1], ignore_vec)); + DONE; +}) + +;; +;; [vstrwq_scatter_base_wb_s vstrdq_scatter_base_wb_u] +;; +(define_insn "mve_vstrwq_scatter_base_wb_v4si_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:V4SI 1 "s_register_operand" "0") + (match_operand:SI 2 "mve_vldrd_immediate" "Ri") + (match_operand:V4SI 3 "s_register_operand" "w")] + VSTRWSBWBQ)) + (set (match_operand:V4SI 0 "s_register_operand" "=w") + (unspec:V4SI [(match_dup 1) (match_dup 2)] + VSTRWSBWBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[1]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vstrw.u32\t%q2, [%q0, %1]!",ops); + return ""; +} + [(set_attr "length" "4")]) + +(define_expand "mve_vstrwq_scatter_base_wb_p_v4si" + [(match_operand:V4SI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V4SI 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWSBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_wb = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vstrwq_scatter_base_wb_p_v4si_insn (ignore_wb, operands[0], + operands[1], operands[2], + operands[3])); + DONE; +}) + +(define_expand "mve_vstrwq_scatter_base_wb_p_add_v4si" + [(match_operand:V4SI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V4SI 2 "s_register_operand" "0") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWSBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_vec = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vstrwq_scatter_base_wb_p_v4si_insn (operands[0], operands[2], + operands[1], ignore_vec, + operands[3])); + DONE; +}) + +;; +;; [vstrwq_scatter_base_wb_p_s vstrwq_scatter_base_wb_p_u] +;; +(define_insn "mve_vstrwq_scatter_base_wb_p_v4si_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:V4SI 1 "s_register_operand" "0") + (match_operand:SI 2 "mve_vldrd_immediate" "Ri") + (match_operand:V4SI 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand")] + VSTRWSBWBQ)) + (set (match_operand:V4SI 0 "s_register_operand" "=w") + (unspec:V4SI [(match_dup 1) (match_dup 2)] + VSTRWSBWBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[1]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vpst\;\tvstrwt.u32\t%q2, [%q0, %1]!",ops); + return ""; +} + [(set_attr "length" "8")]) + +(define_expand "mve_vstrwq_scatter_base_wb_fv4sf" + [(match_operand:V4SI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V4SF 2 "s_register_operand" "w") + (unspec:V4SI [(const_int 0)] VSTRWQSBWB_F)] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ignore_wb = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vstrwq_scatter_base_wb_fv4sf_insn (ignore_wb,operands[0], + operands[1], operands[2])); + DONE; +}) + +(define_expand "mve_vstrwq_scatter_base_wb_add_fv4sf" + [(match_operand:V4SI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V4SI 2 "s_register_operand" "0") + (unspec:V4SI [(const_int 0)] VSTRWQSBWB_F)] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ignore_vec = gen_reg_rtx (V4SFmode); + emit_insn ( + gen_mve_vstrwq_scatter_base_wb_fv4sf_insn (operands[0], operands[2], + operands[1], ignore_vec)); + DONE; +}) + +;; +;; [vstrwq_scatter_base_wb_f] +;; +(define_insn "mve_vstrwq_scatter_base_wb_fv4sf_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:V4SI 1 "s_register_operand" "0") + (match_operand:SI 2 "mve_vldrd_immediate" "Ri") + (match_operand:V4SF 3 "s_register_operand" "w")] + VSTRWQSBWB_F)) + (set (match_operand:V4SI 0 "s_register_operand" "=w") + (unspec:V4SI [(match_dup 1) (match_dup 2)] + VSTRWQSBWB_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ops[3]; + ops[0] = operands[1]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vstrw.u32\t%q2, [%q0, %1]!",ops); + return ""; +} + [(set_attr "length" "4")]) + +(define_expand "mve_vstrwq_scatter_base_wb_p_fv4sf" + [(match_operand:V4SI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V4SF 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWQSBWB_F)] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ignore_wb = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vstrwq_scatter_base_wb_p_fv4sf_insn (ignore_wb, operands[0], + operands[1], operands[2], + operands[3])); + DONE; +}) + +(define_expand "mve_vstrwq_scatter_base_wb_p_add_fv4sf" + [(match_operand:V4SI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V4SI 2 "s_register_operand" "0") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWQSBWB_F)] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ignore_vec = gen_reg_rtx (V4SFmode); + emit_insn ( + gen_mve_vstrwq_scatter_base_wb_p_fv4sf_insn (operands[0], operands[2], + operands[1], ignore_vec, + operands[3])); + DONE; +}) + +;; +;; [vstrwq_scatter_base_wb_p_f] +;; +(define_insn "mve_vstrwq_scatter_base_wb_p_fv4sf_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:V4SI 1 "s_register_operand" "0") + (match_operand:SI 2 "mve_vldrd_immediate" "Ri") + (match_operand:V4SF 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand")] + VSTRWQSBWB_F)) + (set (match_operand:V4SI 0 "s_register_operand" "=w") + (unspec:V4SI [(match_dup 1) (match_dup 2)] + VSTRWQSBWB_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ops[3]; + ops[0] = operands[1]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vpst\;\tvstrwt.u32\t%q2, [%q0, %1]!",ops); + return ""; +} + [(set_attr "length" "8")]) + +(define_expand "mve_vstrdq_scatter_base_wb_v2di" + [(match_operand:V2DI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V2DI 2 "s_register_operand" "w") + (unspec:V2DI [(const_int 0)] VSTRDSBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_wb = gen_reg_rtx (V2DImode); + emit_insn ( + gen_mve_vstrdq_scatter_base_wb_v2di_insn (ignore_wb, operands[0], + operands[1], operands[2])); + DONE; +}) + +(define_expand "mve_vstrdq_scatter_base_wb_add_v2di" + [(match_operand:V2DI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V2DI 2 "s_register_operand" "0") + (unspec:V2DI [(const_int 0)] VSTRDSBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_vec = gen_reg_rtx (V2DImode); + emit_insn ( + gen_mve_vstrdq_scatter_base_wb_v2di_insn (operands[0], operands[2], + operands[1], ignore_vec)); + DONE; +}) + +;; +;; [vstrdq_scatter_base_wb_s vstrdq_scatter_base_wb_u] +;; +(define_insn "mve_vstrdq_scatter_base_wb_v2di_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:V2DI 1 "s_register_operand" "0") + (match_operand:SI 2 "mve_vldrd_immediate" "Ri") + (match_operand:V2DI 3 "s_register_operand" "w")] + VSTRDSBWBQ)) + (set (match_operand:V2DI 0 "s_register_operand" "=&w") + (unspec:V2DI [(match_dup 1) (match_dup 2)] + VSTRDSBWBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[1]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vstrd.u64\t%q2, [%q0, %1]!",ops); + return ""; +} + [(set_attr "length" "4")]) + +(define_expand "mve_vstrdq_scatter_base_wb_p_v2di" + [(match_operand:V2DI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V2DI 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V2DI [(const_int 0)] VSTRDSBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_wb = gen_reg_rtx (V2DImode); + emit_insn ( + gen_mve_vstrdq_scatter_base_wb_p_v2di_insn (ignore_wb, operands[0], + operands[1], operands[2], + operands[3])); + DONE; +}) + +(define_expand "mve_vstrdq_scatter_base_wb_p_add_v2di" + [(match_operand:V2DI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V2DI 2 "s_register_operand" "0") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V2DI [(const_int 0)] VSTRDSBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_vec = gen_reg_rtx (V2DImode); + emit_insn ( + gen_mve_vstrdq_scatter_base_wb_p_v2di_insn (operands[0], operands[2], + operands[1], ignore_vec, + operands[3])); + DONE; +}) + +;; +;; [vstrdq_scatter_base_wb_p_s vstrdq_scatter_base_wb_p_u] +;; +(define_insn "mve_vstrdq_scatter_base_wb_p_v2di_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:V2DI 1 "s_register_operand" "0") + (match_operand:SI 2 "mve_vldrd_immediate" "Ri") + (match_operand:V2DI 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand")] + VSTRDSBWBQ)) + (set (match_operand:V2DI 0 "s_register_operand" "=w") + (unspec:V2DI [(match_dup 1) (match_dup 2)] + VSTRDSBWBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[1]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vpst\;\tvstrdt.u64\t%q2, [%q0, %1]!",ops); + return ""; +} + [(set_attr "length" "8")]) + +(define_expand "mve_vldrwq_gather_base_wb_v4si" + [(match_operand:V4SI 0 "s_register_operand") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:SI 2 "mve_vldrd_immediate") + (unspec:V4SI [(const_int 0)] VLDRWGBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_wb = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vldrwq_gather_base_wb_v4si_insn (operands[0], ignore_wb, + operands[1], operands[2])); + DONE; +}) + +;; +;; [vldrwq_gather_base_wb_s vldrwq_gather_base_wb_u] +;; +(define_insn "mve_vldrwq_gather_base_wb_v4si_insn" + [(set (match_operand:V4SI 0 "s_register_operand" "=&w") + (unspec:V4SI [(match_operand:V4SI 2 "s_register_operand" "1") + (match_operand:SI 3 "mve_vldrd_immediate" "Ri") + (mem:BLK (scratch))] + VLDRWGBWBQ)) + (set (match_operand:V4SI 1 "s_register_operand" "=&w") + (unspec:V4SI [(match_dup 2) (match_dup 3)] + VLDRWGBWBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]!",ops); + return ""; +} + [(set_attr "length" "4")]) + +(define_expand "mve_vldrwq_gather_base_wb_z_v4si" + [(match_operand:V4SI 0 "s_register_operand") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:SI 2 "mve_vldrd_immediate") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VLDRWGBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_wb = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vldrwq_gather_base_wb_z_v4si_insn (operands[0], ignore_wb, + operands[1], operands[2], + operands[3])); + DONE; +}) + +;; +;; [vldrwq_gather_base_wb_z_s vldrwq_gather_base_wb_z_u] +;; +(define_insn "mve_vldrwq_gather_base_wb_z_v4si_insn" + [(set (match_operand:V4SI 0 "s_register_operand" "=&w") + (unspec:V4SI [(match_operand:V4SI 2 "s_register_operand" "1") + (match_operand:SI 3 "mve_vldrd_immediate" "Ri") + (match_operand:HI 4 "vpr_register_operand" "Up") + (mem:BLK (scratch))] + VLDRWGBWBQ)) + (set (match_operand:V4SI 1 "s_register_operand" "=&w") + (unspec:V4SI [(match_dup 2) (match_dup 3)] + VLDRWGBWBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vpst\;\tvldrwt.u32\t%q0, [%q1, %2]!",ops); + return ""; +} + [(set_attr "length" "8")]) + +(define_expand "mve_vldrwq_gather_base_wb_fv4sf" + [(match_operand:V4SF 0 "s_register_operand") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:SI 2 "mve_vldrd_immediate") + (unspec:V4SI [(const_int 0)] VLDRWQGBWB_F)] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ignore_wb = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vldrwq_gather_base_wb_fv4sf_insn (operands[0], ignore_wb, + operands[1], operands[2])); + DONE; +}) + +;; +;; [vldrwq_gather_base_wb_f] +;; +(define_insn "mve_vldrwq_gather_base_wb_fv4sf_insn" + [(set (match_operand:V4SF 0 "s_register_operand" "=&w") + (unspec:V4SF [(match_operand:V4SI 2 "s_register_operand" "1") + (match_operand:SI 3 "mve_vldrd_immediate" "Ri") + (mem:BLK (scratch))] + VLDRWQGBWB_F)) + (set (match_operand:V4SI 1 "s_register_operand" "=&w") + (unspec:V4SI [(match_dup 2) (match_dup 3)] + VLDRWQGBWB_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]!",ops); + return ""; +} + [(set_attr "length" "4")]) + +(define_expand "mve_vldrwq_gather_base_wb_z_fv4sf" + [(match_operand:V4SF 0 "s_register_operand") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:SI 2 "mve_vldrd_immediate") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VLDRWQGBWB_F)] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ignore_wb = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vldrwq_gather_base_wb_z_fv4sf_insn (operands[0], ignore_wb, + operands[1], operands[2], + operands[3])); + DONE; +}) + +;; +;; [vldrwq_gather_base_wb_z_f] +;; +(define_insn "mve_vldrwq_gather_base_wb_z_fv4sf_insn" + [(set (match_operand:V4SF 0 "s_register_operand" "=&w") + (unspec:V4SF [(match_operand:V4SI 2 "s_register_operand" "1") + (match_operand:SI 3 "mve_vldrd_immediate" "Ri") + (match_operand:HI 4 "vpr_register_operand" "Up") + (mem:BLK (scratch))] + VLDRWQGBWB_F)) + (set (match_operand:V4SI 1 "s_register_operand" "=&w") + (unspec:V4SI [(match_dup 2) (match_dup 3)] + VLDRWQGBWB_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vpst\;\tvldrwt.u32\t%q0, [%q1, %2]!",ops); + return ""; +} + [(set_attr "length" "8")]) + +(define_expand "mve_vldrdq_gather_base_wb_v2di" + [(match_operand:V2DI 0 "s_register_operand") + (match_operand:V2DI 1 "s_register_operand") + (match_operand:SI 2 "mve_vldrd_immediate") + (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_wb = gen_reg_rtx (V2DImode); + emit_insn ( + gen_mve_vldrdq_gather_base_wb_v2di_insn (operands[0], ignore_wb, + operands[1], operands[2])); + DONE; +}) + +;; +;; [vldrdq_gather_base_wb_s vldrdq_gather_base_wb_u] +;; +(define_insn "mve_vldrdq_gather_base_wb_v2di_insn" + [(set (match_operand:V2DI 0 "s_register_operand" "=&w") + (unspec:V2DI [(match_operand:V2DI 2 "s_register_operand" "1") + (match_operand:SI 3 "mve_vldrd_immediate" "Ri") + (mem:BLK (scratch))] + VLDRDGBWBQ)) + (set (match_operand:V2DI 1 "s_register_operand" "=&w") + (unspec:V2DI [(match_dup 2) (match_dup 3)] + VLDRDGBWBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vldrd.64\t%q0, [%q1, %2]!",ops); + return ""; +} + [(set_attr "length" "4")]) + +(define_expand "mve_vldrdq_gather_base_wb_z_v2di" + [(match_operand:V2DI 0 "s_register_operand") + (match_operand:V2DI 1 "s_register_operand") + (match_operand:SI 2 "mve_vldrd_immediate") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_wb = gen_reg_rtx (V2DImode); + emit_insn ( + gen_mve_vldrdq_gather_base_wb_z_v2di_insn (operands[0], ignore_wb, + operands[1], operands[2], + operands[3])); + DONE; +}) + +;; +;; [vldrdq_gather_base_wb_z_s vldrdq_gather_base_wb_z_u] +;; +(define_insn "mve_vldrdq_gather_base_wb_z_v2di_insn" + [(set (match_operand:V2DI 0 "s_register_operand" "=&w") + (unspec:V2DI [(match_operand:V2DI 2 "s_register_operand" "1") + (match_operand:SI 3 "mve_vldrd_immediate" "Ri") + (match_operand:HI 4 "vpr_register_operand" "Up") + (mem:BLK (scratch))] + VLDRDGBWBQ)) + (set (match_operand:V2DI 1 "s_register_operand" "=&w") + (unspec:V2DI [(match_dup 2) (match_dup 3)] + VLDRDGBWBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vpst\;\tvldrdt.u64\t%q0, [%q1, %2]!",ops); + return ""; +} + [(set_attr "length" "8")]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 4506849..11b7afb 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -2,6 +2,36 @@ Andre Vieira Mihail Ionescu + * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c: New test. + * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_s64.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_u64.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_u32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_u32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_s64.c: + Likewise. + * gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_u64.c: + Likewise. + * gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_s64.c: Likewise. + * gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_u64.c: Likewise. + * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_f32.c: + Likewise. + * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_s32.c: + Likewise. + * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_u32.c: + Likewise. + * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_u32.c: Likewise. + +2020-03-20 Srinath Parvathaneni + Andre Vieira + Mihail Ionescu + * gcc.target/arm/mve/intrinsics/vddupq_m_n_u16.c: New test. * gcc.target/arm/mve/intrinsics/vddupq_m_n_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vddupq_m_n_u8.c: Likewise. diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c new file mode 100644 index 0000000..763a72e --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +int64x2_t +foo (uint64x2_t * addr) +{ + return vldrdq_gather_base_wb_s64 (addr, 8); +} + +/* { dg-final { scan-assembler "vldrd.64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c new file mode 100644 index 0000000..df719f9 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +uint64x2_t +foo (uint64x2_t * addr) +{ + return vldrdq_gather_base_wb_u64 (addr, 8); +} + +/* { dg-final { scan-assembler "vldrd.64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_s64.c new file mode 100644 index 0000000..c22adfc --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_s64.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ +#include "arm_mve.h" + +int64x2_t foo (uint64x2_t * addr, mve_pred16_t p) +{ + return vldrdq_gather_base_wb_z_s64 (addr, 1016, p); +} + +/* { dg-final { scan-assembler "vldrdt.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_u64.c new file mode 100644 index 0000000..385c0d9 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_u64.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ +#include "arm_mve.h" + +uint64x2_t foo (uint64x2_t * addr, mve_pred16_t p) +{ + return vldrdq_gather_base_wb_z_u64 (addr, 8, p); +} + +/* { dg-final { scan-assembler "vldrdt.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_f32.c new file mode 100644 index 0000000..12473c3 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_f32.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (uint32x4_t * addr) +{ + return vldrwq_gather_base_wb_f32 (addr, 8); +} + +/* { dg-final { scan-assembler "vldrw.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_s32.c new file mode 100644 index 0000000..619e41a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_s32.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +int32x4_t +foo (uint32x4_t * addr) +{ + return vldrwq_gather_base_wb_s32 (addr, 8); +} + +/* { dg-final { scan-assembler "vldrw.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_u32.c new file mode 100644 index 0000000..144e7f4 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_u32.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +uint32x4_t +foo (uint32x4_t * addr) +{ + return vldrwq_gather_base_wb_u32 (addr, 8); +} + +/* { dg-final { scan-assembler "vldrw.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_f32.c new file mode 100644 index 0000000..d69f9bd --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_f32.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (uint32x4_t * addr, mve_pred16_t p) +{ + return vldrwq_gather_base_wb_z_f32 (addr, 8, p); +} + +/* { dg-final { scan-assembler "vldrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_s32.c new file mode 100644 index 0000000..620dec6 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_s32.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +int32x4_t +foo (uint32x4_t * addr, mve_pred16_t p) +{ + return vldrwq_gather_base_wb_z_s32 (addr, 8, p); +} + +/* { dg-final { scan-assembler "vldrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_u32.c new file mode 100644 index 0000000..409ecf4 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_u32.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +uint32x4_t +foo (uint32x4_t * addr, mve_pred16_t p) +{ + return vldrwq_gather_base_wb_z_u32 (addr, 8, p); +} + +/* { dg-final { scan-assembler "vldrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_s64.c new file mode 100644 index 0000000..9fc4e34 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_s64.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint64x2_t * addr, const int offset, int64x2_t value, mve_pred16_t p) +{ + vstrdq_scatter_base_wb_p_s64 (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrdt.u64" } } */ + +void +foo1 (uint64x2_t * addr, const int offset, int64x2_t value, mve_pred16_t p) +{ + vstrdq_scatter_base_wb_p (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrdt.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_u64.c new file mode 100644 index 0000000..0434f6d --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_u64.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint64x2_t * addr, const int offset, uint64x2_t value, mve_pred16_t p) +{ + vstrdq_scatter_base_wb_p_u64 (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrdt.u64" } } */ + +void +foo1 (uint64x2_t * addr, const int offset, uint64x2_t value, mve_pred16_t p) +{ + vstrdq_scatter_base_wb_p (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrdt.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_s64.c new file mode 100644 index 0000000..9989564 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_s64.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint64x2_t * addr, const int offset, int64x2_t value) +{ + vstrdq_scatter_base_wb_s64 (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrd.u64" } } */ + +void +foo1 (uint64x2_t * addr, const int offset, int64x2_t value) +{ + vstrdq_scatter_base_wb (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrd.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_u64.c new file mode 100644 index 0000000..60c71d5 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_u64.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint64x2_t * addr, const int offset, uint64x2_t value) +{ + vstrdq_scatter_base_wb_u64 (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrd.u64" } } */ + +void +foo1 (uint64x2_t * addr, const int offset, uint64x2_t value) +{ + vstrdq_scatter_base_wb (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrd.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_f32.c new file mode 100644 index 0000000..2bae380 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_f32.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint32x4_t * addr, const int offset, float32x4_t value) +{ + vstrwq_scatter_base_wb_f32 (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrw.u32" } } */ + +void +foo1 (uint32x4_t * addr, const int offset, float32x4_t value) +{ + vstrwq_scatter_base_wb (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrw.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_f32.c new file mode 100644 index 0000000..dee9413 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_f32.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint32x4_t * addr, const int offset, float32x4_t value, mve_pred16_t p) +{ + vstrwq_scatter_base_wb_p_f32 (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.u32" } } */ + +void +foo1 (uint32x4_t * addr, const int offset, float32x4_t value, mve_pred16_t p) +{ + vstrwq_scatter_base_wb_p (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_s32.c new file mode 100644 index 0000000..3a0423a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_s32.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint32x4_t * addr, const int offset, int32x4_t value, mve_pred16_t p) +{ + vstrwq_scatter_base_wb_p_s32 (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.u32" } } */ + +void +foo1 (uint32x4_t * addr, const int offset, int32x4_t value, mve_pred16_t p) +{ + vstrwq_scatter_base_wb_p (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_u32.c new file mode 100644 index 0000000..32eb757 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_u32.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint32x4_t * addr, const int offset, uint32x4_t value, mve_pred16_t p) +{ + vstrwq_scatter_base_wb_p_u32 (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.u32" } } */ + +void +foo1 (uint32x4_t * addr, const int offset, uint32x4_t value, mve_pred16_t p) +{ + vstrwq_scatter_base_wb_p (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_s32.c new file mode 100644 index 0000000..4c232e1 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_s32.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint32x4_t * addr, const int offset, int32x4_t value) +{ + vstrwq_scatter_base_wb_s32 (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrw.u32" } } */ + +void +foo1 (uint32x4_t * addr, const int offset, int32x4_t value) +{ + vstrwq_scatter_base_wb (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrw.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_u32.c new file mode 100644 index 0000000..7171a9f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_u32.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint32x4_t * addr, uint32x4_t value) +{ + vstrwq_scatter_base_wb_u32 (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrw.u32" } } */ + +void +foo1 (uint32x4_t * addr, uint32x4_t value) +{ + vstrwq_scatter_base_wb (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrw.u32" } } */ -- 2.7.4