From e3e913d7a0ab9502c14ffa275485b34d0bd9c754 Mon Sep 17 00:00:00 2001 From: yroux Date: Wed, 16 Jul 2014 16:03:58 +0000 Subject: [PATCH] gcc/ 2014-07-16 Yvan Roux Backport from trunk r211058, 211177. 2014-05-29 Alan Lawrence * config/aarch64/aarch64-builtins.c (aarch64_types_binopv_qualifiers, TYPES_BINOPV): New static data. * config/aarch64/aarch64-simd-builtins.def (im_lane_bound): New builtin. * config/aarch64/aarch64-simd.md (aarch64_ext, aarch64_im_lane_boundsi): New patterns. * config/aarch64/aarch64.c (aarch64_expand_vec_perm_const_1): Match patterns for EXT. (aarch64_evpc_ext): New function. * config/aarch64/iterators.md (UNSPEC_EXT): New enum element. * config/aarch64/arm_neon.h (vext_f32, vext_f64, vext_p8, vext_p16, vext_s8, vext_s16, vext_s32, vext_s64, vext_u8, vext_u16, vext_u32, vext_u64, vextq_f32, vextq_f64, vextq_p8, vextq_p16, vextq_s8, vextq_s16, vextq_s32, vextq_s64, vextq_u8, vextq_u16, vextq_u32, vextq_u64): Replace __asm with __builtin_shuffle and im_lane_boundsi. 2014-06-03 Alan Lawrence * config/aarch64/aarch64.c (aarch64_evpc_ext): allow and handle location == 0. gcc/testsuite/ 2014-07-16 Yvan Roux Backport from trunk r210152, 211059. 2014-05-29 Alan Lawrence * gcc.target/arm/simd/vextQf32_1.c: New file. * gcc.target/arm/simd/vextQp16_1.c: New file. * gcc.target/arm/simd/vextQp8_1.c: New file. * gcc.target/arm/simd/vextQs16_1.c: New file. * gcc.target/arm/simd/vextQs32_1.c: New file. * gcc.target/arm/simd/vextQs64_1.c: New file. * gcc.target/arm/simd/vextQs8_1.c: New file. * gcc.target/arm/simd/vextQu16_1.c: New file. * gcc.target/arm/simd/vextQu32_1.c: New file. * gcc.target/arm/simd/vextQu64_1.c: New file. * gcc.target/arm/simd/vextQu8_1.c: New file. * gcc.target/arm/simd/vextQp64_1.c: New file. * gcc.target/arm/simd/vextf32_1.c: New file. * gcc.target/arm/simd/vextp16_1.c: New file. * gcc.target/arm/simd/vextp8_1.c: New file. * gcc.target/arm/simd/vexts16_1.c: New file. * gcc.target/arm/simd/vexts32_1.c: New file. * gcc.target/arm/simd/vexts64_1.c: New file. * gcc.target/arm/simd/vexts8_1.c: New file. * gcc.target/arm/simd/vextu16_1.c: New file. * gcc.target/arm/simd/vextu32_1.c: New file. * gcc.target/arm/simd/vextu64_1.c: New file. * gcc.target/arm/simd/vextu8_1.c: New file. * gcc.target/arm/simd/vextp64_1.c: New file. 2014-05-07 Alan Lawrence gcc.target/aarch64/simd/ext_f32.x: New file. gcc.target/aarch64/simd/ext_f32_1.c: New file. gcc.target/aarch64/simd/ext_p16.x: New file. gcc.target/aarch64/simd/ext_p16_1.c: New file. gcc.target/aarch64/simd/ext_p8.x: New file. gcc.target/aarch64/simd/ext_p8_1.c: New file. gcc.target/aarch64/simd/ext_s16.x: New file. gcc.target/aarch64/simd/ext_s16_1.c: New file. gcc.target/aarch64/simd/ext_s32.x: New file. gcc.target/aarch64/simd/ext_s32_1.c: New file. gcc.target/aarch64/simd/ext_s64.x: New file. gcc.target/aarch64/simd/ext_s64_1.c: New file. gcc.target/aarch64/simd/ext_s8.x: New file. gcc.target/aarch64/simd/ext_s8_1.c: New file. gcc.target/aarch64/simd/ext_u16.x: New file. gcc.target/aarch64/simd/ext_u16_1.c: New file. gcc.target/aarch64/simd/ext_u32.x: New file. gcc.target/aarch64/simd/ext_u32_1.c: New file. gcc.target/aarch64/simd/ext_u64.x: New file. gcc.target/aarch64/simd/ext_u64_1.c: New file. gcc.target/aarch64/simd/ext_u8.x: New file. gcc.target/aarch64/simd/ext_u8_1.c: New file. gcc.target/aarch64/simd/ext_f64.c: New file. gcc.target/aarch64/simd/extq_f32.x: New file. gcc.target/aarch64/simd/extq_f32_1.c: New file. gcc.target/aarch64/simd/extq_p16.x: New file. gcc.target/aarch64/simd/extq_p16_1.c: New file. gcc.target/aarch64/simd/extq_p8.x: New file. gcc.target/aarch64/simd/extq_p8_1.c: New file. gcc.target/aarch64/simd/extq_s16.x: New file. gcc.target/aarch64/simd/extq_s16_1.c: New file. gcc.target/aarch64/simd/extq_s32.x: New file. gcc.target/aarch64/simd/extq_s32_1.c: New file. gcc.target/aarch64/simd/extq_s64.x: New file. gcc.target/aarch64/simd/extq_s64_1.c: New file. gcc.target/aarch64/simd/extq_s8.x: New file. gcc.target/aarch64/simd/extq_s8_1.c: New file. gcc.target/aarch64/simd/extq_u16.x: New file. gcc.target/aarch64/simd/extq_u16_1.c: New file. gcc.target/aarch64/simd/extq_u32.x: New file. git-svn-id: svn://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_9-branch@212677 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog.linaro | 27 + gcc/config/aarch64/aarch64-builtins.c | 4 + gcc/config/aarch64/aarch64-simd-builtins.def | 3 + gcc/config/aarch64/aarch64-simd.md | 29 + gcc/config/aarch64/aarch64.c | 68 ++- gcc/config/aarch64/arm_neon.h | 598 ++++++++++----------- gcc/config/aarch64/iterators.md | 1 + gcc/testsuite/ChangeLog.linaro | 73 +++ gcc/testsuite/gcc.target/aarch64/simd/ext_f32.x | 30 ++ gcc/testsuite/gcc.target/aarch64/simd/ext_f32_1.c | 10 + gcc/testsuite/gcc.target/aarch64/simd/ext_f64_1.c | 25 + gcc/testsuite/gcc.target/aarch64/simd/ext_p16.x | 58 ++ gcc/testsuite/gcc.target/aarch64/simd/ext_p16_1.c | 10 + gcc/testsuite/gcc.target/aarch64/simd/ext_p8.x | 114 ++++ gcc/testsuite/gcc.target/aarch64/simd/ext_p8_1.c | 10 + gcc/testsuite/gcc.target/aarch64/simd/ext_s16.x | 58 ++ gcc/testsuite/gcc.target/aarch64/simd/ext_s16_1.c | 10 + gcc/testsuite/gcc.target/aarch64/simd/ext_s32.x | 30 ++ gcc/testsuite/gcc.target/aarch64/simd/ext_s32_1.c | 10 + gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x | 17 + gcc/testsuite/gcc.target/aarch64/simd/ext_s64_1.c | 11 + gcc/testsuite/gcc.target/aarch64/simd/ext_s8.x | 114 ++++ gcc/testsuite/gcc.target/aarch64/simd/ext_s8_1.c | 10 + gcc/testsuite/gcc.target/aarch64/simd/ext_u16.x | 58 ++ gcc/testsuite/gcc.target/aarch64/simd/ext_u16_1.c | 10 + gcc/testsuite/gcc.target/aarch64/simd/ext_u32.x | 30 ++ gcc/testsuite/gcc.target/aarch64/simd/ext_u32_1.c | 10 + gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x | 17 + gcc/testsuite/gcc.target/aarch64/simd/ext_u64_1.c | 11 + gcc/testsuite/gcc.target/aarch64/simd/ext_u8.x | 114 ++++ gcc/testsuite/gcc.target/aarch64/simd/ext_u8_1.c | 10 + gcc/testsuite/gcc.target/aarch64/simd/extq_f32.x | 58 ++ gcc/testsuite/gcc.target/aarch64/simd/extq_f32_1.c | 10 + gcc/testsuite/gcc.target/aarch64/simd/extq_f64_1.c | 36 ++ gcc/testsuite/gcc.target/aarch64/simd/extq_p16.x | 114 ++++ gcc/testsuite/gcc.target/aarch64/simd/extq_p16_1.c | 10 + gcc/testsuite/gcc.target/aarch64/simd/extq_p8.x | 227 ++++++++ gcc/testsuite/gcc.target/aarch64/simd/extq_p8_1.c | 10 + gcc/testsuite/gcc.target/aarch64/simd/extq_s16.x | 114 ++++ gcc/testsuite/gcc.target/aarch64/simd/extq_s16_1.c | 10 + gcc/testsuite/gcc.target/aarch64/simd/extq_s32.x | 58 ++ gcc/testsuite/gcc.target/aarch64/simd/extq_s32_1.c | 10 + gcc/testsuite/gcc.target/aarch64/simd/extq_s64.x | 30 ++ gcc/testsuite/gcc.target/aarch64/simd/extq_s64_1.c | 10 + gcc/testsuite/gcc.target/aarch64/simd/extq_s8.x | 227 ++++++++ gcc/testsuite/gcc.target/aarch64/simd/extq_s8_1.c | 10 + gcc/testsuite/gcc.target/aarch64/simd/extq_u16.x | 114 ++++ gcc/testsuite/gcc.target/aarch64/simd/extq_u16_1.c | 10 + gcc/testsuite/gcc.target/aarch64/simd/extq_u32.x | 58 ++ gcc/testsuite/gcc.target/aarch64/simd/extq_u32_1.c | 10 + gcc/testsuite/gcc.target/aarch64/simd/extq_u64.x | 30 ++ gcc/testsuite/gcc.target/aarch64/simd/extq_u64_1.c | 10 + gcc/testsuite/gcc.target/aarch64/simd/extq_u8.x | 227 ++++++++ gcc/testsuite/gcc.target/aarch64/simd/extq_u8_1.c | 10 + gcc/testsuite/gcc.target/arm/simd/vextQf32_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vextQp16_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vextQp64_1.c | 33 ++ gcc/testsuite/gcc.target/arm/simd/vextQp8_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vextQs16_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vextQs32_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vextQs64_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vextQs8_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vextQu16_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vextQu32_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vextQu64_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vextQu8_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vextf32_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vextp16_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vextp64_1.c | 26 + gcc/testsuite/gcc.target/arm/simd/vextp8_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vexts16_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vexts32_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vexts64_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vexts8_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vextu16_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vextu32_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vextu64_1.c | 12 + gcc/testsuite/gcc.target/arm/simd/vextu8_1.c | 12 + 78 files changed, 2993 insertions(+), 313 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/ext_f32.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/ext_f32_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/ext_f64_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/ext_p16.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/ext_p16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/ext_p8.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/ext_p8_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/ext_s16.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/ext_s16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/ext_s32.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/ext_s32_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/ext_s64_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/ext_s8.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/ext_s8_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/ext_u16.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/ext_u16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/ext_u32.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/ext_u32_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/ext_u64_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/ext_u8.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/ext_u8_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/extq_f32.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/extq_f32_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/extq_f64_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/extq_p16.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/extq_p16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/extq_p8.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/extq_p8_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/extq_s16.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/extq_s16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/extq_s32.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/extq_s32_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/extq_s64.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/extq_s64_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/extq_s8.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/extq_s8_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/extq_u16.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/extq_u16_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/extq_u32.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/extq_u32_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/extq_u64.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/extq_u64_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/extq_u8.x create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/extq_u8_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vextQf32_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vextQp16_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vextQp64_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vextQp8_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vextQs16_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vextQs32_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vextQs64_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vextQs8_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vextQu16_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vextQu32_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vextQu64_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vextQu8_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vextf32_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vextp16_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vextp64_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vextp8_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vexts16_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vexts32_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vexts64_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vexts8_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vextu16_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vextu32_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vextu64_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vextu8_1.c diff --git a/gcc/ChangeLog.linaro b/gcc/ChangeLog.linaro index d4dcabf..0a1d2a6 100644 --- a/gcc/ChangeLog.linaro +++ b/gcc/ChangeLog.linaro @@ -1,5 +1,32 @@ 2014-07-16 Yvan Roux + Backport from trunk r211058, 211177. + 2014-05-29 Alan Lawrence + + * config/aarch64/aarch64-builtins.c (aarch64_types_binopv_qualifiers, + TYPES_BINOPV): New static data. + * config/aarch64/aarch64-simd-builtins.def (im_lane_bound): New builtin. + * config/aarch64/aarch64-simd.md (aarch64_ext, aarch64_im_lane_boundsi): + New patterns. + * config/aarch64/aarch64.c (aarch64_expand_vec_perm_const_1): Match + patterns for EXT. + (aarch64_evpc_ext): New function. + + * config/aarch64/iterators.md (UNSPEC_EXT): New enum element. + + * config/aarch64/arm_neon.h (vext_f32, vext_f64, vext_p8, vext_p16, + vext_s8, vext_s16, vext_s32, vext_s64, vext_u8, vext_u16, vext_u32, + vext_u64, vextq_f32, vextq_f64, vextq_p8, vextq_p16, vextq_s8, + vextq_s16, vextq_s32, vextq_s64, vextq_u8, vextq_u16, vextq_u32, + vextq_u64): Replace __asm with __builtin_shuffle and im_lane_boundsi. + + 2014-06-03 Alan Lawrence + + * config/aarch64/aarch64.c (aarch64_evpc_ext): allow and handle + location == 0. + +2014-07-16 Yvan Roux + Backport from trunk r209797. 2014-04-25 Kyrylo Tkachov diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index afd568e..ca14d51 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -169,6 +169,10 @@ aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_maybe_immediate }; #define TYPES_BINOP (aarch64_types_binop_qualifiers) static enum aarch64_type_qualifiers +aarch64_types_binopv_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_none, qualifier_none }; +#define TYPES_BINOPV (aarch64_types_binopv_qualifiers) +static enum aarch64_type_qualifiers aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned }; #define TYPES_BINOPU (aarch64_types_binopu_qualifiers) diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 339e8f8..b5d9965 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -410,3 +410,6 @@ /* Implemented by aarch64_crypto_pmull. */ VAR1 (BINOPP, crypto_pmull, 0, di) VAR1 (BINOPP, crypto_pmull, 0, v2di) + + /* Meta-op to check lane bounds of immediate in aarch64_expand_builtin. */ + VAR1 (BINOPV, im_lane_bound, 0, si) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index dde2311..ff8b5f2 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -4508,6 +4508,35 @@ [(set_attr "type" "neon_permute")] ) +;; Note immediate (third) operand is lane index not byte index. +(define_insn "aarch64_ext" + [(set (match_operand:VALL 0 "register_operand" "=w") + (unspec:VALL [(match_operand:VALL 1 "register_operand" "w") + (match_operand:VALL 2 "register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_EXT))] + "TARGET_SIMD" +{ + operands[3] = GEN_INT (INTVAL (operands[3]) + * GET_MODE_SIZE (GET_MODE_INNER (mode))); + return "ext\\t%0., %1., %2., #%3"; +} + [(set_attr "type" "neon_ext")] +) + +;; This exists solely to check the arguments to the corresponding __builtin. +;; Used where we want an error for out-of-range indices which would otherwise +;; be silently wrapped (e.g. the mask to a __builtin_shuffle). +(define_expand "aarch64_im_lane_boundsi" + [(match_operand:SI 0 "immediate_operand" "i") + (match_operand:SI 1 "immediate_operand" "i")] + "TARGET_SIMD" +{ + aarch64_simd_lane_bounds (operands[0], 0, INTVAL (operands[1])); + DONE; +} +) + (define_insn "aarch64_st2_dreg" [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv") (unspec:TI [(match_operand:OI 1 "register_operand" "w") diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 8936560..6168e01 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -9018,6 +9018,70 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d) return true; } +/* Recognize patterns for the EXT insn. */ + +static bool +aarch64_evpc_ext (struct expand_vec_perm_d *d) +{ + unsigned int i, nelt = d->nelt; + rtx (*gen) (rtx, rtx, rtx, rtx); + rtx offset; + + unsigned int location = d->perm[0]; /* Always < nelt. */ + + /* Check if the extracted indices are increasing by one. */ + for (i = 1; i < nelt; i++) + { + unsigned int required = location + i; + if (d->one_vector_p) + { + /* We'll pass the same vector in twice, so allow indices to wrap. */ + required &= (nelt - 1); + } + if (d->perm[i] != required) + return false; + } + + switch (d->vmode) + { + case V16QImode: gen = gen_aarch64_extv16qi; break; + case V8QImode: gen = gen_aarch64_extv8qi; break; + case V4HImode: gen = gen_aarch64_extv4hi; break; + case V8HImode: gen = gen_aarch64_extv8hi; break; + case V2SImode: gen = gen_aarch64_extv2si; break; + case V4SImode: gen = gen_aarch64_extv4si; break; + case V2SFmode: gen = gen_aarch64_extv2sf; break; + case V4SFmode: gen = gen_aarch64_extv4sf; break; + case V2DImode: gen = gen_aarch64_extv2di; break; + case V2DFmode: gen = gen_aarch64_extv2df; break; + default: + return false; + } + + /* Success! */ + if (d->testing_p) + return true; + + /* The case where (location == 0) is a no-op for both big- and little-endian, + and is removed by the mid-end at optimization levels -O1 and higher. */ + + if (BYTES_BIG_ENDIAN && (location != 0)) + { + /* After setup, we want the high elements of the first vector (stored + at the LSB end of the register), and the low elements of the second + vector (stored at the MSB end of the register). So swap. */ + rtx temp = d->op0; + d->op0 = d->op1; + d->op1 = temp; + /* location != 0 (above), so safe to assume (nelt - location) < nelt. */ + location = nelt - location; + } + + offset = GEN_INT (location); + emit_insn (gen (d->target, d->op0, d->op1, offset)); + return true; +} + static bool aarch64_evpc_dup (struct expand_vec_perm_d *d) { @@ -9121,7 +9185,9 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) if (TARGET_SIMD) { - if (aarch64_evpc_zip (d)) + if (aarch64_evpc_ext (d)) + return true; + else if (aarch64_evpc_zip (d)) return true; else if (aarch64_evpc_uzp (d)) return true; diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index e1b5d62..c1589b8 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -5661,318 +5661,6 @@ vcvtxd_f32_f64 (float64_t a) return result; } -#define vext_f32(a, b, c) \ - __extension__ \ - ({ \ - float32x2_t b_ = (b); \ - float32x2_t a_ = (a); \ - float32x2_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vext_f64(a, b, c) \ - __extension__ \ - ({ \ - float64x1_t b_ = (b); \ - float64x1_t a_ = (a); \ - float64x1_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vext_p8(a, b, c) \ - __extension__ \ - ({ \ - poly8x8_t b_ = (b); \ - poly8x8_t a_ = (a); \ - poly8x8_t result; \ - __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vext_p16(a, b, c) \ - __extension__ \ - ({ \ - poly16x4_t b_ = (b); \ - poly16x4_t a_ = (a); \ - poly16x4_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vext_s8(a, b, c) \ - __extension__ \ - ({ \ - int8x8_t b_ = (b); \ - int8x8_t a_ = (a); \ - int8x8_t result; \ - __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vext_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x4_t b_ = (b); \ - int16x4_t a_ = (a); \ - int16x4_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vext_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x2_t b_ = (b); \ - int32x2_t a_ = (a); \ - int32x2_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vext_s64(a, b, c) \ - __extension__ \ - ({ \ - int64x1_t b_ = (b); \ - int64x1_t a_ = (a); \ - int64x1_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vext_u8(a, b, c) \ - __extension__ \ - ({ \ - uint8x8_t b_ = (b); \ - uint8x8_t a_ = (a); \ - uint8x8_t result; \ - __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vext_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x4_t b_ = (b); \ - uint16x4_t a_ = (a); \ - uint16x4_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vext_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x2_t b_ = (b); \ - uint32x2_t a_ = (a); \ - uint32x2_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vext_u64(a, b, c) \ - __extension__ \ - ({ \ - uint64x1_t b_ = (b); \ - uint64x1_t a_ = (a); \ - uint64x1_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vextq_f32(a, b, c) \ - __extension__ \ - ({ \ - float32x4_t b_ = (b); \ - float32x4_t a_ = (a); \ - float32x4_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vextq_f64(a, b, c) \ - __extension__ \ - ({ \ - float64x2_t b_ = (b); \ - float64x2_t a_ = (a); \ - float64x2_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vextq_p8(a, b, c) \ - __extension__ \ - ({ \ - poly8x16_t b_ = (b); \ - poly8x16_t a_ = (a); \ - poly8x16_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vextq_p16(a, b, c) \ - __extension__ \ - ({ \ - poly16x8_t b_ = (b); \ - poly16x8_t a_ = (a); \ - poly16x8_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vextq_s8(a, b, c) \ - __extension__ \ - ({ \ - int8x16_t b_ = (b); \ - int8x16_t a_ = (a); \ - int8x16_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vextq_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x8_t b_ = (b); \ - int16x8_t a_ = (a); \ - int16x8_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vextq_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x4_t b_ = (b); \ - int32x4_t a_ = (a); \ - int32x4_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vextq_s64(a, b, c) \ - __extension__ \ - ({ \ - int64x2_t b_ = (b); \ - int64x2_t a_ = (a); \ - int64x2_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vextq_u8(a, b, c) \ - __extension__ \ - ({ \ - uint8x16_t b_ = (b); \ - uint8x16_t a_ = (a); \ - uint8x16_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vextq_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x8_t b_ = (b); \ - uint16x8_t a_ = (a); \ - uint16x8_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vextq_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x4_t b_ = (b); \ - uint32x4_t a_ = (a); \ - uint32x4_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vextq_u64(a, b, c) \ - __extension__ \ - ({ \ - uint64x2_t b_ = (b); \ - uint64x2_t a_ = (a); \ - uint64x2_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c) { @@ -17906,6 +17594,292 @@ vdupd_laneq_u64 (uint64x2_t __a, const int __b) return __aarch64_vgetq_lane_u64 (__a, __b); } +/* vext */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vext_f32 (float32x2_t __a, float32x2_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 2); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1}); +#endif +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vext_f64 (float64x1_t __a, float64x1_t __b, __const int __c) +{ + /* The only possible index to the assembler instruction returns element 0. */ + __builtin_aarch64_im_lane_boundsi (__c, 1); + return __a; +} +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vext_p8 (poly8x8_t __a, poly8x8_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 8); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint8x8_t) + {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); +#else + return __builtin_shuffle (__a, __b, + (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); +#endif +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 4); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, + (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3}); +#endif +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 8); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint8x8_t) + {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); +#else + return __builtin_shuffle (__a, __b, + (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); +#endif +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vext_s16 (int16x4_t __a, int16x4_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 4); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, + (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3}); +#endif +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vext_s32 (int32x2_t __a, int32x2_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 2); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1}); +#endif +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vext_s64 (int64x1_t __a, int64x1_t __b, __const int __c) +{ + /* The only possible index to the assembler instruction returns element 0. */ + __builtin_aarch64_im_lane_boundsi (__c, 1); + return __a; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vext_u8 (uint8x8_t __a, uint8x8_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 8); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint8x8_t) + {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); +#else + return __builtin_shuffle (__a, __b, + (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); +#endif +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vext_u16 (uint16x4_t __a, uint16x4_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 4); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, + (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3}); +#endif +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vext_u32 (uint32x2_t __a, uint32x2_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 2); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1}); +#endif +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vext_u64 (uint64x1_t __a, uint64x1_t __b, __const int __c) +{ + /* The only possible index to the assembler instruction returns element 0. */ + __builtin_aarch64_im_lane_boundsi (__c, 1); + return __a; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vextq_f32 (float32x4_t __a, float32x4_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 4); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, + (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3}); +#endif +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vextq_f64 (float64x2_t __a, float64x2_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 2); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1}); +#endif +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vextq_p8 (poly8x16_t __a, poly8x16_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 16); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint8x16_t) + {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c, + 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c}); +#else + return __builtin_shuffle (__a, __b, (uint8x16_t) + {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, + __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15}); +#endif +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 8); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint16x8_t) + {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); +#else + return __builtin_shuffle (__a, __b, + (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); +#endif +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 16); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint8x16_t) + {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c, + 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c}); +#else + return __builtin_shuffle (__a, __b, (uint8x16_t) + {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, + __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15}); +#endif +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vextq_s16 (int16x8_t __a, int16x8_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 8); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint16x8_t) + {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); +#else + return __builtin_shuffle (__a, __b, + (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); +#endif +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vextq_s32 (int32x4_t __a, int32x4_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 4); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, + (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3}); +#endif +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vextq_s64 (int64x2_t __a, int64x2_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 2); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1}); +#endif +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 16); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint8x16_t) + {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c, + 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c}); +#else + return __builtin_shuffle (__a, __b, (uint8x16_t) + {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, + __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15}); +#endif +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vextq_u16 (uint16x8_t __a, uint16x8_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 8); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint16x8_t) + {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); +#else + return __builtin_shuffle (__a, __b, + (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); +#endif +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vextq_u32 (uint32x4_t __a, uint32x4_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 4); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, + (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3}); +#endif +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 2); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1}); +#endif +} + /* vfma_lane */ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index d7ac267..9c48745 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -270,6 +270,7 @@ UNSPEC_UZP2 ; Used in vector permute patterns. UNSPEC_TRN1 ; Used in vector permute patterns. UNSPEC_TRN2 ; Used in vector permute patterns. + UNSPEC_EXT ; Used in aarch64-simd.md. UNSPEC_AESE ; Used in aarch64-simd.md. UNSPEC_AESD ; Used in aarch64-simd.md. UNSPEC_AESMC ; Used in aarch64-simd.md. diff --git a/gcc/testsuite/ChangeLog.linaro b/gcc/testsuite/ChangeLog.linaro index c0e26bd..004194a 100644 --- a/gcc/testsuite/ChangeLog.linaro +++ b/gcc/testsuite/ChangeLog.linaro @@ -1,5 +1,78 @@ 2014-07-16 Yvan Roux + Backport from trunk r210152, 211059. + 2014-05-29 Alan Lawrence + + * gcc.target/arm/simd/vextQf32_1.c: New file. + * gcc.target/arm/simd/vextQp16_1.c: New file. + * gcc.target/arm/simd/vextQp8_1.c: New file. + * gcc.target/arm/simd/vextQs16_1.c: New file. + * gcc.target/arm/simd/vextQs32_1.c: New file. + * gcc.target/arm/simd/vextQs64_1.c: New file. + * gcc.target/arm/simd/vextQs8_1.c: New file. + * gcc.target/arm/simd/vextQu16_1.c: New file. + * gcc.target/arm/simd/vextQu32_1.c: New file. + * gcc.target/arm/simd/vextQu64_1.c: New file. + * gcc.target/arm/simd/vextQu8_1.c: New file. + * gcc.target/arm/simd/vextQp64_1.c: New file. + * gcc.target/arm/simd/vextf32_1.c: New file. + * gcc.target/arm/simd/vextp16_1.c: New file. + * gcc.target/arm/simd/vextp8_1.c: New file. + * gcc.target/arm/simd/vexts16_1.c: New file. + * gcc.target/arm/simd/vexts32_1.c: New file. + * gcc.target/arm/simd/vexts64_1.c: New file. + * gcc.target/arm/simd/vexts8_1.c: New file. + * gcc.target/arm/simd/vextu16_1.c: New file. + * gcc.target/arm/simd/vextu32_1.c: New file. + * gcc.target/arm/simd/vextu64_1.c: New file. + * gcc.target/arm/simd/vextu8_1.c: New file. + * gcc.target/arm/simd/vextp64_1.c: New file. + + 2014-05-07 Alan Lawrence + + * gcc.target/aarch64/simd/ext_f32.x: New file. + * gcc.target/aarch64/simd/ext_f32_1.c: New file. + * gcc.target/aarch64/simd/ext_p16.x: New file. + * gcc.target/aarch64/simd/ext_p16_1.c: New file. + * gcc.target/aarch64/simd/ext_p8.x: New file. + * gcc.target/aarch64/simd/ext_p8_1.c: New file. + * gcc.target/aarch64/simd/ext_s16.x: New file. + * gcc.target/aarch64/simd/ext_s16_1.c: New file. + * gcc.target/aarch64/simd/ext_s32.x: New file. + * gcc.target/aarch64/simd/ext_s32_1.c: New file. + * gcc.target/aarch64/simd/ext_s64.x: New file. + * gcc.target/aarch64/simd/ext_s64_1.c: New file. + * gcc.target/aarch64/simd/ext_s8.x: New file. + * gcc.target/aarch64/simd/ext_s8_1.c: New file. + * gcc.target/aarch64/simd/ext_u16.x: New file. + * gcc.target/aarch64/simd/ext_u16_1.c: New file. + * gcc.target/aarch64/simd/ext_u32.x: New file. + * gcc.target/aarch64/simd/ext_u32_1.c: New file. + * gcc.target/aarch64/simd/ext_u64.x: New file. + * gcc.target/aarch64/simd/ext_u64_1.c: New file. + * gcc.target/aarch64/simd/ext_u8.x: New file. + * gcc.target/aarch64/simd/ext_u8_1.c: New file. + * gcc.target/aarch64/simd/ext_f64.c: New file. + * gcc.target/aarch64/simd/extq_f32.x: New file. + * gcc.target/aarch64/simd/extq_f32_1.c: New file. + * gcc.target/aarch64/simd/extq_p16.x: New file. + * gcc.target/aarch64/simd/extq_p16_1.c: New file. + * gcc.target/aarch64/simd/extq_p8.x: New file. + * gcc.target/aarch64/simd/extq_p8_1.c: New file. + * gcc.target/aarch64/simd/extq_s16.x: New file. + * gcc.target/aarch64/simd/extq_s16_1.c: New file. + * gcc.target/aarch64/simd/extq_s32.x: New file. + * gcc.target/aarch64/simd/extq_s32_1.c: New file. + * gcc.target/aarch64/simd/extq_s64.x: New file. + * gcc.target/aarch64/simd/extq_s64_1.c: New file. + * gcc.target/aarch64/simd/extq_s8.x: New file. + * gcc.target/aarch64/simd/extq_s8_1.c: New file. + * gcc.target/aarch64/simd/extq_u16.x: New file. + * gcc.target/aarch64/simd/extq_u16_1.c: New file. + * gcc.target/aarch64/simd/extq_u32.x: New file. + +2014-07-16 Yvan Roux + Backport from trunk r209940, `r209943, r209947. 2014-04-30 Alan Lawrence diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_f32.x b/gcc/testsuite/gcc.target/aarch64/simd/ext_f32.x new file mode 100644 index 0000000..b511e51 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_f32.x @@ -0,0 +1,30 @@ +extern void abort (void); + +float32x2_t +test_vext_f32_1 (float32x2_t a, float32x2_t b) +{ + return vext_f32 (a, b, 1); +} + +int +main (int argc, char **argv) +{ + int i, off; + float32_t arr1[] = {0, 1}; + float32x2_t in1 = vld1_f32 (arr1); + float32_t arr2[] = {2, 3}; + float32x2_t in2 = vld1_f32 (arr2); + float32_t exp[2]; + float32x2_t expected; + float32x2_t actual = test_vext_f32_1 (in1, in2); + + for (i = 0; i < 2; i++) + exp[i] = i + 1; + expected = vld1_f32 (exp); + for (i = 0; i < 2; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_f32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/ext_f32_1.c new file mode 100644 index 0000000..806c10a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_f32_1.c @@ -0,0 +1,10 @@ +/* Test the `vextf32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "ext_f32.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_f64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/ext_f64_1.c new file mode 100644 index 0000000..09aecba --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_f64_1.c @@ -0,0 +1,25 @@ +/* Test the `vextf64' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" + +extern void abort (void); + +int +main (int argc, char **argv) +{ + int i, off; + float64x1_t in1 = {0}; + float64x1_t in2 = {1}; + float64x1_t actual = vext_f64 (in1, in2, 0); + if (actual != in1) + abort (); + + return 0; +} + +/* Do not scan-assembler. An EXT instruction could be emitted, but would merely + return its first argument, so it is legitimate to optimize it out. */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_p16.x b/gcc/testsuite/gcc.target/aarch64/simd/ext_p16.x new file mode 100644 index 0000000..b5b3814 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_p16.x @@ -0,0 +1,58 @@ +extern void abort (void); + +poly16x4_t +test_vext_p16_1 (poly16x4_t a, poly16x4_t b) +{ + return vext_p16 (a, b, 1); +} + +poly16x4_t +test_vext_p16_2 (poly16x4_t a, poly16x4_t b) +{ + return vext_p16 (a, b, 2); +} + +poly16x4_t +test_vext_p16_3 (poly16x4_t a, poly16x4_t b) +{ + return vext_p16 (a, b, 3); +} + +int +main (int argc, char **argv) +{ + int i, off; + poly16_t arr1[] = {0, 1, 2, 3}; + poly16x4_t in1 = vld1_p16 (arr1); + poly16_t arr2[] = {4, 5, 6, 7}; + poly16x4_t in2 = vld1_p16 (arr2); + poly16_t exp[4]; + poly16x4_t expected; + poly16x4_t actual = test_vext_p16_1 (in1, in2); + + for (i = 0; i < 4; i++) + exp[i] = i + 1; + expected = vld1_p16 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_p16_2 (in1, in2); + for (i = 0; i < 4; i++) + exp[i] = i + 2; + expected = vld1_p16 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_p16_3 (in1, in2); + for (i = 0; i < 4; i++) + exp[i] = i + 3; + expected = vld1_p16 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_p16_1.c b/gcc/testsuite/gcc.target/aarch64/simd/ext_p16_1.c new file mode 100644 index 0000000..86b8660 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_p16_1.c @@ -0,0 +1,10 @@ +/* Test the `vextp16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "ext_p16.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_p8.x b/gcc/testsuite/gcc.target/aarch64/simd/ext_p8.x new file mode 100644 index 0000000..95b37b1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_p8.x @@ -0,0 +1,114 @@ +extern void abort (void); + +poly8x8_t +test_vext_p8_1 (poly8x8_t a, poly8x8_t b) +{ + return vext_p8 (a, b, 1); +} + +poly8x8_t +test_vext_p8_2 (poly8x8_t a, poly8x8_t b) +{ + return vext_p8 (a, b, 2); +} + +poly8x8_t +test_vext_p8_3 (poly8x8_t a, poly8x8_t b) +{ + return vext_p8 (a, b, 3); +} + +poly8x8_t +test_vext_p8_4 (poly8x8_t a, poly8x8_t b) +{ + return vext_p8 (a, b, 4); +} + +poly8x8_t +test_vext_p8_5 (poly8x8_t a, poly8x8_t b) +{ + return vext_p8 (a, b, 5); +} + +poly8x8_t +test_vext_p8_6 (poly8x8_t a, poly8x8_t b) +{ + return vext_p8 (a, b, 6); +} + +poly8x8_t +test_vext_p8_7 (poly8x8_t a, poly8x8_t b) +{ + return vext_p8 (a, b, 7); +} + +int +main (int argc, char **argv) +{ + int i, off; + poly8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7}; + poly8x8_t in1 = vld1_p8 (arr1); + poly8_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15}; + poly8x8_t in2 = vld1_p8 (arr2); + poly8_t exp[8]; + poly8x8_t expected; + poly8x8_t actual = test_vext_p8_1 (in1, in2); + + for (i = 0; i < 8; i++) + exp[i] = i + 1; + expected = vld1_p8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_p8_2 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 2; + expected = vld1_p8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_p8_3 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 3; + expected = vld1_p8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_p8_4 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 4; + expected = vld1_p8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_p8_5 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 5; + expected = vld1_p8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_p8_6 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 6; + expected = vld1_p8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_p8_7 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 7; + expected = vld1_p8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_p8_1.c b/gcc/testsuite/gcc.target/aarch64/simd/ext_p8_1.c new file mode 100644 index 0000000..aae0575 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_p8_1.c @@ -0,0 +1,10 @@ +/* Test the `vextp8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "ext_p8.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#?\[0-9\]+\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_s16.x b/gcc/testsuite/gcc.target/aarch64/simd/ext_s16.x new file mode 100644 index 0000000..78ef0ad --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_s16.x @@ -0,0 +1,58 @@ +extern void abort (void); + +int16x4_t +test_vext_s16_1 (int16x4_t a, int16x4_t b) +{ + return vext_s16 (a, b, 1); +} + +int16x4_t +test_vext_s16_2 (int16x4_t a, int16x4_t b) +{ + return vext_s16 (a, b, 2); +} + +int16x4_t +test_vext_s16_3 (int16x4_t a, int16x4_t b) +{ + return vext_s16 (a, b, 3); +} + +int +main (int argc, char **argv) +{ + int i, off; + int16_t arr1[] = {0, 1, 2, 3}; + int16x4_t in1 = vld1_s16 (arr1); + int16_t arr2[] = {4, 5, 6, 7}; + int16x4_t in2 = vld1_s16 (arr2); + int16_t exp[4]; + int16x4_t expected; + int16x4_t actual = test_vext_s16_1 (in1, in2); + + for (i = 0; i < 4; i++) + exp[i] = i + 1; + expected = vld1_s16 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_s16_2 (in1, in2); + for (i = 0; i < 4; i++) + exp[i] = i + 2; + expected = vld1_s16 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_s16_3 (in1, in2); + for (i = 0; i < 4; i++) + exp[i] = i + 3; + expected = vld1_s16 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_s16_1.c b/gcc/testsuite/gcc.target/aarch64/simd/ext_s16_1.c new file mode 100644 index 0000000..fd9ebd9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_s16_1.c @@ -0,0 +1,10 @@ +/* Test the `vexts16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "ext_s16.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_s32.x b/gcc/testsuite/gcc.target/aarch64/simd/ext_s32.x new file mode 100644 index 0000000..066ceac --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_s32.x @@ -0,0 +1,30 @@ +extern void abort (void); + +int32x2_t +test_vext_s32_1 (int32x2_t a, int32x2_t b) +{ + return vext_s32 (a, b, 1); +} + +int +main (int argc, char **argv) +{ + int i, off; + int32_t arr1[] = {0, 1}; + int32x2_t in1 = vld1_s32 (arr1); + int32_t arr2[] = {2, 3}; + int32x2_t in2 = vld1_s32 (arr2); + int32_t exp[2]; + int32x2_t expected; + int32x2_t actual = test_vext_s32_1 (in1, in2); + + for (i = 0; i < 2; i++) + exp[i] = i + 1; + expected = vld1_s32 (exp); + for (i = 0; i < 2; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_s32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/ext_s32_1.c new file mode 100644 index 0000000..05c5d60 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_s32_1.c @@ -0,0 +1,10 @@ +/* Test the `vexts32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "ext_s32.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x b/gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x new file mode 100644 index 0000000..c71011a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x @@ -0,0 +1,17 @@ +extern void abort (void); + +int +main (int argc, char **argv) +{ + int i, off; + int64_t arr1[] = {0}; + int64x1_t in1 = vld1_s64 (arr1); + int64_t arr2[] = {1}; + int64x1_t in2 = vld1_s64 (arr2); + int64x1_t actual = vext_s64 (in1, in2, 0); + if (actual != in1) + abort (); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_s64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/ext_s64_1.c new file mode 100644 index 0000000..1e2748d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_s64_1.c @@ -0,0 +1,11 @@ +/* Test the `vexts64' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "ext_s64.x" + +/* Do not scan-assembler. An EXT instruction could be emitted, but would merely + return its first argument, so it is legitimate to optimize it out. */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_s8.x b/gcc/testsuite/gcc.target/aarch64/simd/ext_s8.x new file mode 100644 index 0000000..e79ef0a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_s8.x @@ -0,0 +1,114 @@ +extern void abort (void); + +int8x8_t +test_vext_s8_1 (int8x8_t a, int8x8_t b) +{ + return vext_s8 (a, b, 1); +} + +int8x8_t +test_vext_s8_2 (int8x8_t a, int8x8_t b) +{ + return vext_s8 (a, b, 2); +} + +int8x8_t +test_vext_s8_3 (int8x8_t a, int8x8_t b) +{ + return vext_s8 (a, b, 3); +} + +int8x8_t +test_vext_s8_4 (int8x8_t a, int8x8_t b) +{ + return vext_s8 (a, b, 4); +} + +int8x8_t +test_vext_s8_5 (int8x8_t a, int8x8_t b) +{ + return vext_s8 (a, b, 5); +} + +int8x8_t +test_vext_s8_6 (int8x8_t a, int8x8_t b) +{ + return vext_s8 (a, b, 6); +} + +int8x8_t +test_vext_s8_7 (int8x8_t a, int8x8_t b) +{ + return vext_s8 (a, b, 7); +} + +int +main (int argc, char **argv) +{ + int i, off; + int8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7}; + int8x8_t in1 = vld1_s8 (arr1); + int8_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15}; + int8x8_t in2 = vld1_s8 (arr2); + int8_t exp[8]; + int8x8_t expected; + int8x8_t actual = test_vext_s8_1 (in1, in2); + + for (i = 0; i < 8; i++) + exp[i] = i + 1; + expected = vld1_s8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_s8_2 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 2; + expected = vld1_s8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_s8_3 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 3; + expected = vld1_s8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_s8_4 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 4; + expected = vld1_s8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_s8_5 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 5; + expected = vld1_s8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_s8_6 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 6; + expected = vld1_s8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_s8_7 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 7; + expected = vld1_s8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_s8_1.c b/gcc/testsuite/gcc.target/aarch64/simd/ext_s8_1.c new file mode 100644 index 0000000..c368eba --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_s8_1.c @@ -0,0 +1,10 @@ +/* Test the `vexts8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "ext_s8.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#?\[0-9\]+\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_u16.x b/gcc/testsuite/gcc.target/aarch64/simd/ext_u16.x new file mode 100644 index 0000000..5a16ef3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_u16.x @@ -0,0 +1,58 @@ +extern void abort (void); + +uint16x4_t +test_vext_u16_1 (uint16x4_t a, uint16x4_t b) +{ + return vext_u16 (a, b, 1); +} + +uint16x4_t +test_vext_u16_2 (uint16x4_t a, uint16x4_t b) +{ + return vext_u16 (a, b, 2); +} + +uint16x4_t +test_vext_u16_3 (uint16x4_t a, uint16x4_t b) +{ + return vext_u16 (a, b, 3); +} + +int +main (int argc, char **argv) +{ + int i, off; + uint16_t arr1[] = {0, 1, 2, 3}; + uint16x4_t in1 = vld1_u16 (arr1); + uint16_t arr2[] = {4, 5, 6, 7}; + uint16x4_t in2 = vld1_u16 (arr2); + uint16_t exp[4]; + uint16x4_t expected; + uint16x4_t actual = test_vext_u16_1 (in1, in2); + + for (i = 0; i < 4; i++) + exp[i] = i + 1; + expected = vld1_u16 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_u16_2 (in1, in2); + for (i = 0; i < 4; i++) + exp[i] = i + 2; + expected = vld1_u16 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_u16_3 (in1, in2); + for (i = 0; i < 4; i++) + exp[i] = i + 3; + expected = vld1_u16 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_u16_1.c b/gcc/testsuite/gcc.target/aarch64/simd/ext_u16_1.c new file mode 100644 index 0000000..83b1f97 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_u16_1.c @@ -0,0 +1,10 @@ +/* Test the `vextu16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "ext_u16.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_u32.x b/gcc/testsuite/gcc.target/aarch64/simd/ext_u32.x new file mode 100644 index 0000000..a5c3dd4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_u32.x @@ -0,0 +1,30 @@ +extern void abort (void); + +uint32x2_t +test_vext_u32_1 (uint32x2_t a, uint32x2_t b) +{ + return vext_u32 (a, b, 1); +} + +int +main (int argc, char **argv) +{ + int i, off; + uint32_t arr1[] = {0, 1}; + uint32x2_t in1 = vld1_u32 (arr1); + uint32_t arr2[] = {2, 3}; + uint32x2_t in2 = vld1_u32 (arr2); + uint32_t exp[2]; + uint32x2_t expected; + uint32x2_t actual = test_vext_u32_1 (in1, in2); + + for (i = 0; i < 2; i++) + exp[i] = i + 1; + expected = vld1_u32 (exp); + for (i = 0; i < 2; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_u32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/ext_u32_1.c new file mode 100644 index 0000000..6ed6508 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_u32_1.c @@ -0,0 +1,10 @@ +/* Test the `vextu32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "ext_u32.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x b/gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x new file mode 100644 index 0000000..8d5072b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x @@ -0,0 +1,17 @@ +extern void abort (void); + +int +main (int argc, char **argv) +{ + int i, off; + uint64_t arr1[] = {0}; + uint64x1_t in1 = vld1_u64 (arr1); + uint64_t arr2[] = {1}; + uint64x1_t in2 = vld1_u64 (arr2); + uint64x1_t actual = vext_u64 (in1, in2, 0); + if (actual != in1) + abort (); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_u64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/ext_u64_1.c new file mode 100644 index 0000000..e6779f7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_u64_1.c @@ -0,0 +1,11 @@ +/* Test the `vextu64' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "ext_u64.x" + +/* Do not scan-assembler. An EXT instruction could be emitted, but would merely + return its first argument, so it is legitimate to optimize it out. */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_u8.x b/gcc/testsuite/gcc.target/aarch64/simd/ext_u8.x new file mode 100644 index 0000000..4d58057 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_u8.x @@ -0,0 +1,114 @@ +extern void abort (void); + +uint8x8_t +test_vext_u8_1 (uint8x8_t a, uint8x8_t b) +{ + return vext_u8 (a, b, 1); +} + +uint8x8_t +test_vext_u8_2 (uint8x8_t a, uint8x8_t b) +{ + return vext_u8 (a, b, 2); +} + +uint8x8_t +test_vext_u8_3 (uint8x8_t a, uint8x8_t b) +{ + return vext_u8 (a, b, 3); +} + +uint8x8_t +test_vext_u8_4 (uint8x8_t a, uint8x8_t b) +{ + return vext_u8 (a, b, 4); +} + +uint8x8_t +test_vext_u8_5 (uint8x8_t a, uint8x8_t b) +{ + return vext_u8 (a, b, 5); +} + +uint8x8_t +test_vext_u8_6 (uint8x8_t a, uint8x8_t b) +{ + return vext_u8 (a, b, 6); +} + +uint8x8_t +test_vext_u8_7 (uint8x8_t a, uint8x8_t b) +{ + return vext_u8 (a, b, 7); +} + +int +main (int argc, char **argv) +{ + int i, off; + uint8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7}; + uint8x8_t in1 = vld1_u8 (arr1); + uint8_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15}; + uint8x8_t in2 = vld1_u8 (arr2); + uint8_t exp[8]; + uint8x8_t expected; + uint8x8_t actual = test_vext_u8_1 (in1, in2); + + for (i = 0; i < 8; i++) + exp[i] = i + 1; + expected = vld1_u8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_u8_2 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 2; + expected = vld1_u8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_u8_3 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 3; + expected = vld1_u8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_u8_4 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 4; + expected = vld1_u8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_u8_5 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 5; + expected = vld1_u8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_u8_6 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 6; + expected = vld1_u8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_u8_7 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 7; + expected = vld1_u8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ext_u8_1.c b/gcc/testsuite/gcc.target/aarch64/simd/ext_u8_1.c new file mode 100644 index 0000000..8e91b61 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/ext_u8_1.c @@ -0,0 +1,10 @@ +/* Test the `vextu8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "ext_u8.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#?\[0-9\]+\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/extq_f32.x b/gcc/testsuite/gcc.target/aarch64/simd/extq_f32.x new file mode 100644 index 0000000..b8059d9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/extq_f32.x @@ -0,0 +1,58 @@ +extern void abort (void); + +float32x4_t +test_vextq_f32_1 (float32x4_t a, float32x4_t b) +{ + return vextq_f32 (a, b, 1); +} + +float32x4_t +test_vextq_f32_2 (float32x4_t a, float32x4_t b) +{ + return vextq_f32 (a, b, 2); +} + +float32x4_t +test_vextq_f32_3 (float32x4_t a, float32x4_t b) +{ + return vextq_f32 (a, b, 3); +} + +int +main (int argc, char **argv) +{ + int i, off; + float32_t arr1[] = {0, 1, 2, 3}; + float32x4_t in1 = vld1q_f32 (arr1); + float32_t arr2[] = {4, 5, 6, 7}; + float32x4_t in2 = vld1q_f32 (arr2); + float32_t exp[4]; + float32x4_t expected; + float32x4_t actual = test_vextq_f32_1 (in1, in2); + + for (i = 0; i < 4; i++) + exp[i] = i + 1; + expected = vld1q_f32 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_f32_2 (in1, in2); + for (i = 0; i < 4; i++) + exp[i] = i + 2; + expected = vld1q_f32 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_f32_3 (in1, in2); + for (i = 0; i < 4; i++) + exp[i] = i + 3; + expected = vld1q_f32 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/simd/extq_f32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/extq_f32_1.c new file mode 100644 index 0000000..94bce1a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/extq_f32_1.c @@ -0,0 +1,10 @@ +/* Test the `vextQf32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "extq_f32.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/extq_f64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/extq_f64_1.c new file mode 100644 index 0000000..c60b1f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/extq_f64_1.c @@ -0,0 +1,36 @@ +/* Test the `vextq_f64' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +extern void abort (void); +#include + +float64x2_t +test_vextq_f64_1 (float64x2_t a, float64x2_t b) +{ + return vextq_f64 (a, b, 1); +} + +int +main (int argc, char **argv) +{ + int i, off; + float64_t arr1[] = {0, 1}; + float64x2_t in1 = vld1q_f64 (arr1); + float64_t arr2[] = {2, 3}; + float64x2_t in2 = vld1q_f64 (arr2); + float64_t exp[] = {1, 2}; + float64x2_t expected = vld1q_f64 (exp); + float64x2_t actual = test_vextq_f64_1 (in1, in2); + + for (i = 0; i < 2; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.8\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/extq_p16.x b/gcc/testsuite/gcc.target/aarch64/simd/extq_p16.x new file mode 100644 index 0000000..9b7cf4e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/extq_p16.x @@ -0,0 +1,114 @@ +extern void abort (void); + +poly16x8_t +test_vextq_p16_1 (poly16x8_t a, poly16x8_t b) +{ + return vextq_p16 (a, b, 1); +} + +poly16x8_t +test_vextq_p16_2 (poly16x8_t a, poly16x8_t b) +{ + return vextq_p16 (a, b, 2); +} + +poly16x8_t +test_vextq_p16_3 (poly16x8_t a, poly16x8_t b) +{ + return vextq_p16 (a, b, 3); +} + +poly16x8_t +test_vextq_p16_4 (poly16x8_t a, poly16x8_t b) +{ + return vextq_p16 (a, b, 4); +} + +poly16x8_t +test_vextq_p16_5 (poly16x8_t a, poly16x8_t b) +{ + return vextq_p16 (a, b, 5); +} + +poly16x8_t +test_vextq_p16_6 (poly16x8_t a, poly16x8_t b) +{ + return vextq_p16 (a, b, 6); +} + +poly16x8_t +test_vextq_p16_7 (poly16x8_t a, poly16x8_t b) +{ + return vextq_p16 (a, b, 7); +} + +int +main (int argc, char **argv) +{ + int i, off; + poly16_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7}; + poly16x8_t in1 = vld1q_p16 (arr1); + poly16_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15}; + poly16x8_t in2 = vld1q_p16 (arr2); + poly16_t exp[8]; + poly16x8_t expected; + poly16x8_t actual = test_vextq_p16_1 (in1, in2); + + for (i = 0; i < 8; i++) + exp[i] = i + 1; + expected = vld1q_p16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p16_2 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 2; + expected = vld1q_p16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p16_3 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 3; + expected = vld1q_p16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p16_4 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 4; + expected = vld1q_p16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p16_5 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 5; + expected = vld1q_p16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p16_6 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 6; + expected = vld1q_p16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p16_7 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 7; + expected = vld1q_p16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/simd/extq_p16_1.c b/gcc/testsuite/gcc.target/aarch64/simd/extq_p16_1.c new file mode 100644 index 0000000..8f73eef --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/extq_p16_1.c @@ -0,0 +1,10 @@ +/* Test the `vextQp16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "extq_p16.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/extq_p8.x b/gcc/testsuite/gcc.target/aarch64/simd/extq_p8.x new file mode 100644 index 0000000..1f53060 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/extq_p8.x @@ -0,0 +1,227 @@ +extern void abort (void); + +poly8x16_t +test_vextq_p8_1 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 1); +} + +poly8x16_t +test_vextq_p8_2 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 2); +} + +poly8x16_t +test_vextq_p8_3 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 3); +} + +poly8x16_t +test_vextq_p8_4 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 4); +} + +poly8x16_t +test_vextq_p8_5 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 5); +} + +poly8x16_t +test_vextq_p8_6 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 6); +} + +poly8x16_t +test_vextq_p8_7 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 7); +} + +poly8x16_t +test_vextq_p8_8 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 8); +} + +poly8x16_t +test_vextq_p8_9 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 9); +} + +poly8x16_t +test_vextq_p8_10 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 10); +} + +poly8x16_t +test_vextq_p8_11 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 11); +} + +poly8x16_t +test_vextq_p8_12 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 12); +} + +poly8x16_t +test_vextq_p8_13 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 13); +} + +poly8x16_t +test_vextq_p8_14 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 14); +} + +poly8x16_t +test_vextq_p8_15 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 15); +} + +int +main (int argc, char **argv) +{ + int i; + poly8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + poly8x16_t in1 = vld1q_p8 (arr1); + poly8_t arr2[] = + {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; + poly8x16_t in2 = vld1q_p8 (arr2); + poly8_t exp[16]; + poly8x16_t expected; + poly8x16_t actual = test_vextq_p8_1 (in1, in2); + + for (i = 0; i < 16; i++) + exp[i] = i + 1; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_2 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 2; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_3 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 3; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_4 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 4; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_5 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 5; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_6 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 6; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_7 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 7; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_8 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 8; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_9 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 9; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_10 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 10; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_11 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 11; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_12 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 12; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_13 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 13; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_14 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 14; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_15 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 15; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/simd/extq_p8_1.c b/gcc/testsuite/gcc.target/aarch64/simd/extq_p8_1.c new file mode 100644 index 0000000..ea2d399 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/extq_p8_1.c @@ -0,0 +1,10 @@ +/* Test the `vextQp8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "extq_p8.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#?\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/extq_s16.x b/gcc/testsuite/gcc.target/aarch64/simd/extq_s16.x new file mode 100644 index 0000000..f5573c3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/extq_s16.x @@ -0,0 +1,114 @@ +extern void abort (void); + +int16x8_t +test_vextq_s16_1 (int16x8_t a, int16x8_t b) +{ + return vextq_s16 (a, b, 1); +} + +int16x8_t +test_vextq_s16_2 (int16x8_t a, int16x8_t b) +{ + return vextq_s16 (a, b, 2); +} + +int16x8_t +test_vextq_s16_3 (int16x8_t a, int16x8_t b) +{ + return vextq_s16 (a, b, 3); +} + +int16x8_t +test_vextq_s16_4 (int16x8_t a, int16x8_t b) +{ + return vextq_s16 (a, b, 4); +} + +int16x8_t +test_vextq_s16_5 (int16x8_t a, int16x8_t b) +{ + return vextq_s16 (a, b, 5); +} + +int16x8_t +test_vextq_s16_6 (int16x8_t a, int16x8_t b) +{ + return vextq_s16 (a, b, 6); +} + +int16x8_t +test_vextq_s16_7 (int16x8_t a, int16x8_t b) +{ + return vextq_s16 (a, b, 7); +} + +int +main (int argc, char **argv) +{ + int i, off; + int16_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7}; + int16x8_t in1 = vld1q_s16 (arr1); + int16_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15}; + int16x8_t in2 = vld1q_s16 (arr2); + int16_t exp[8]; + int16x8_t expected; + int16x8_t actual = test_vextq_s16_1 (in1, in2); + + for (i = 0; i < 8; i++) + exp[i] = i + 1; + expected = vld1q_s16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s16_2 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 2; + expected = vld1q_s16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s16_3 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 3; + expected = vld1q_s16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s16_4 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 4; + expected = vld1q_s16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s16_5 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 5; + expected = vld1q_s16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s16_6 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 6; + expected = vld1q_s16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s16_7 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 7; + expected = vld1q_s16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/simd/extq_s16_1.c b/gcc/testsuite/gcc.target/aarch64/simd/extq_s16_1.c new file mode 100644 index 0000000..8b3d5fb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/extq_s16_1.c @@ -0,0 +1,10 @@ +/* Test the `vextQs16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "extq_s16.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/extq_s32.x b/gcc/testsuite/gcc.target/aarch64/simd/extq_s32.x new file mode 100644 index 0000000..02cb78b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/extq_s32.x @@ -0,0 +1,58 @@ +extern void abort (void); + +int32x4_t +test_vextq_s32_1 (int32x4_t a, int32x4_t b) +{ + return vextq_s32 (a, b, 1); +} + +int32x4_t +test_vextq_s32_2 (int32x4_t a, int32x4_t b) +{ + return vextq_s32 (a, b, 2); +} + +int32x4_t +test_vextq_s32_3 (int32x4_t a, int32x4_t b) +{ + return vextq_s32 (a, b, 3); +} + +int +main (int argc, char **argv) +{ + int i, off; + int32_t arr1[] = {0, 1, 2, 3}; + int32x4_t in1 = vld1q_s32 (arr1); + int32_t arr2[] = {4, 5, 6, 7}; + int32x4_t in2 = vld1q_s32 (arr2); + int32_t exp[4]; + int32x4_t expected; + int32x4_t actual = test_vextq_s32_1 (in1, in2); + + for (i = 0; i < 4; i++) + exp[i] = i + 1; + expected = vld1q_s32 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s32_2 (in1, in2); + for (i = 0; i < 4; i++) + exp[i] = i + 2; + expected = vld1q_s32 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s32_3 (in1, in2); + for (i = 0; i < 4; i++) + exp[i] = i + 3; + expected = vld1q_s32 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/simd/extq_s32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/extq_s32_1.c new file mode 100644 index 0000000..4f1b08b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/extq_s32_1.c @@ -0,0 +1,10 @@ +/* Test the `vextQs32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "extq_s32.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/extq_s64.x b/gcc/testsuite/gcc.target/aarch64/simd/extq_s64.x new file mode 100644 index 0000000..8068bf8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/extq_s64.x @@ -0,0 +1,30 @@ +extern void abort (void); + +int64x2_t +test_vextq_s64_1 (int64x2_t a, int64x2_t b) +{ + return vextq_s64 (a, b, 1); +} + +int +main (int argc, char **argv) +{ + int i, off; + int64_t arr1[] = {0, 1}; + int64x2_t in1 = vld1q_s64 (arr1); + int64_t arr2[] = {2, 3}; + int64x2_t in2 = vld1q_s64 (arr2); + int64_t exp[2]; + int64x2_t expected; + int64x2_t actual = test_vextq_s64_1 (in1, in2); + + for (i = 0; i < 2; i++) + exp[i] = i + 1; + expected = vld1q_s64 (exp); + for (i = 0; i < 2; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/simd/extq_s64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/extq_s64_1.c new file mode 100644 index 0000000..cb308fb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/extq_s64_1.c @@ -0,0 +1,10 @@ +/* Test the `vextQs64' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "extq_s64.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.8\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/extq_s8.x b/gcc/testsuite/gcc.target/aarch64/simd/extq_s8.x new file mode 100644 index 0000000..012a858 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/extq_s8.x @@ -0,0 +1,227 @@ +extern void abort (void); + +int8x16_t +test_vextq_s8_1 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 1); +} + +int8x16_t +test_vextq_s8_2 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 2); +} + +int8x16_t +test_vextq_s8_3 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 3); +} + +int8x16_t +test_vextq_s8_4 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 4); +} + +int8x16_t +test_vextq_s8_5 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 5); +} + +int8x16_t +test_vextq_s8_6 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 6); +} + +int8x16_t +test_vextq_s8_7 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 7); +} + +int8x16_t +test_vextq_s8_8 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 8); +} + +int8x16_t +test_vextq_s8_9 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 9); +} + +int8x16_t +test_vextq_s8_10 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 10); +} + +int8x16_t +test_vextq_s8_11 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 11); +} + +int8x16_t +test_vextq_s8_12 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 12); +} + +int8x16_t +test_vextq_s8_13 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 13); +} + +int8x16_t +test_vextq_s8_14 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 14); +} + +int8x16_t +test_vextq_s8_15 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 15); +} + +int +main (int argc, char **argv) +{ + int i; + int8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + int8x16_t in1 = vld1q_s8 (arr1); + int8_t arr2[] = + {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; + int8x16_t in2 = vld1q_s8 (arr2); + int8_t exp[16]; + int8x16_t expected; + int8x16_t actual = test_vextq_s8_1 (in1, in2); + + for (i = 0; i < 16; i++) + exp[i] = i + 1; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_2 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 2; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_3 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 3; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_4 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 4; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_5 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 5; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_6 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 6; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_7 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 7; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_8 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 8; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_9 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 9; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_10 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 10; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_11 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 11; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_12 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 12; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_13 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 13; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_14 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 14; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_15 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 15; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/simd/extq_s8_1.c b/gcc/testsuite/gcc.target/aarch64/simd/extq_s8_1.c new file mode 100644 index 0000000..7e1e50a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/extq_s8_1.c @@ -0,0 +1,10 @@ +/* Test the `vextQs8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "extq_s8.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#?\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/extq_u16.x b/gcc/testsuite/gcc.target/aarch64/simd/extq_u16.x new file mode 100644 index 0000000..66bc9f5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/extq_u16.x @@ -0,0 +1,114 @@ +extern void abort (void); + +uint16x8_t +test_vextq_u16_1 (uint16x8_t a, uint16x8_t b) +{ + return vextq_u16 (a, b, 1); +} + +uint16x8_t +test_vextq_u16_2 (uint16x8_t a, uint16x8_t b) +{ + return vextq_u16 (a, b, 2); +} + +uint16x8_t +test_vextq_u16_3 (uint16x8_t a, uint16x8_t b) +{ + return vextq_u16 (a, b, 3); +} + +uint16x8_t +test_vextq_u16_4 (uint16x8_t a, uint16x8_t b) +{ + return vextq_u16 (a, b, 4); +} + +uint16x8_t +test_vextq_u16_5 (uint16x8_t a, uint16x8_t b) +{ + return vextq_u16 (a, b, 5); +} + +uint16x8_t +test_vextq_u16_6 (uint16x8_t a, uint16x8_t b) +{ + return vextq_u16 (a, b, 6); +} + +uint16x8_t +test_vextq_u16_7 (uint16x8_t a, uint16x8_t b) +{ + return vextq_u16 (a, b, 7); +} + +int +main (int argc, char **argv) +{ + int i, off; + uint16_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7}; + uint16x8_t in1 = vld1q_u16 (arr1); + uint16_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15}; + uint16x8_t in2 = vld1q_u16 (arr2); + uint16_t exp[8]; + uint16x8_t expected; + uint16x8_t actual = test_vextq_u16_1 (in1, in2); + + for (i = 0; i < 8; i++) + exp[i] = i + 1; + expected = vld1q_u16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u16_2 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 2; + expected = vld1q_u16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u16_3 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 3; + expected = vld1q_u16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u16_4 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 4; + expected = vld1q_u16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u16_5 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 5; + expected = vld1q_u16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u16_6 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 6; + expected = vld1q_u16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u16_7 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 7; + expected = vld1q_u16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/simd/extq_u16_1.c b/gcc/testsuite/gcc.target/aarch64/simd/extq_u16_1.c new file mode 100644 index 0000000..edf3efd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/extq_u16_1.c @@ -0,0 +1,10 @@ +/* Test the `vextQu16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "extq_u16.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/extq_u32.x b/gcc/testsuite/gcc.target/aarch64/simd/extq_u32.x new file mode 100644 index 0000000..61bec8b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/extq_u32.x @@ -0,0 +1,58 @@ +extern void abort (void); + +uint32x4_t +test_vextq_u32_1 (uint32x4_t a, uint32x4_t b) +{ + return vextq_u32 (a, b, 1); +} + +uint32x4_t +test_vextq_u32_2 (uint32x4_t a, uint32x4_t b) +{ + return vextq_u32 (a, b, 2); +} + +uint32x4_t +test_vextq_u32_3 (uint32x4_t a, uint32x4_t b) +{ + return vextq_u32 (a, b, 3); +} + +int +main (int argc, char **argv) +{ + int i, off; + uint32_t arr1[] = {0, 1, 2, 3}; + uint32x4_t in1 = vld1q_u32 (arr1); + uint32_t arr2[] = {4, 5, 6, 7}; + uint32x4_t in2 = vld1q_u32 (arr2); + uint32_t exp[4]; + uint32x4_t expected; + uint32x4_t actual = test_vextq_u32_1 (in1, in2); + + for (i = 0; i < 4; i++) + exp[i] = i + 1; + expected = vld1q_u32 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u32_2 (in1, in2); + for (i = 0; i < 4; i++) + exp[i] = i + 2; + expected = vld1q_u32 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u32_3 (in1, in2); + for (i = 0; i < 4; i++) + exp[i] = i + 3; + expected = vld1q_u32 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/simd/extq_u32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/extq_u32_1.c new file mode 100644 index 0000000..f3eb56e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/extq_u32_1.c @@ -0,0 +1,10 @@ +/* Test the `vextQu32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "extq_u32.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/extq_u64.x b/gcc/testsuite/gcc.target/aarch64/simd/extq_u64.x new file mode 100644 index 0000000..e1bd267 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/extq_u64.x @@ -0,0 +1,30 @@ +extern void abort (void); + +uint64x2_t +test_vextq_u64_1 (uint64x2_t a, uint64x2_t b) +{ + return vextq_u64 (a, b, 1); +} + +int +main (int argc, char **argv) +{ + int i, off; + uint64_t arr1[] = {0, 1}; + uint64x2_t in1 = vld1q_u64 (arr1); + uint64_t arr2[] = {2, 3}; + uint64x2_t in2 = vld1q_u64 (arr2); + uint64_t exp[2]; + uint64x2_t expected; + uint64x2_t actual = test_vextq_u64_1 (in1, in2); + + for (i = 0; i < 2; i++) + exp[i] = i + 1; + expected = vld1q_u64 (exp); + for (i = 0; i < 2; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/simd/extq_u64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/extq_u64_1.c new file mode 100644 index 0000000..d999c2c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/extq_u64_1.c @@ -0,0 +1,10 @@ +/* Test the `vextQu64' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "extq_u64.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.8\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/extq_u8.x b/gcc/testsuite/gcc.target/aarch64/simd/extq_u8.x new file mode 100644 index 0000000..2b25dec --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/extq_u8.x @@ -0,0 +1,227 @@ +extern void abort (void); + +uint8x16_t +test_vextq_u8_1 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 1); +} + +uint8x16_t +test_vextq_u8_2 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 2); +} + +uint8x16_t +test_vextq_u8_3 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 3); +} + +uint8x16_t +test_vextq_u8_4 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 4); +} + +uint8x16_t +test_vextq_u8_5 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 5); +} + +uint8x16_t +test_vextq_u8_6 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 6); +} + +uint8x16_t +test_vextq_u8_7 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 7); +} + +uint8x16_t +test_vextq_u8_8 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 8); +} + +uint8x16_t +test_vextq_u8_9 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 9); +} + +uint8x16_t +test_vextq_u8_10 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 10); +} + +uint8x16_t +test_vextq_u8_11 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 11); +} + +uint8x16_t +test_vextq_u8_12 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 12); +} + +uint8x16_t +test_vextq_u8_13 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 13); +} + +uint8x16_t +test_vextq_u8_14 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 14); +} + +uint8x16_t +test_vextq_u8_15 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 15); +} + +int +main (int argc, char **argv) +{ + int i; + uint8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + uint8x16_t in1 = vld1q_u8 (arr1); + uint8_t arr2[] = + {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; + uint8x16_t in2 = vld1q_u8 (arr2); + uint8_t exp[16]; + uint8x16_t expected; + uint8x16_t actual = test_vextq_u8_1 (in1, in2); + + for (i = 0; i < 16; i++) + exp[i] = i + 1; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_2 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 2; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_3 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 3; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_4 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 4; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_5 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 5; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_6 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 6; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_7 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 7; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_8 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 8; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_9 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 9; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_10 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 10; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_11 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 11; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_12 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 12; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_13 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 13; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_14 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 14; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_15 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 15; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/simd/extq_u8_1.c b/gcc/testsuite/gcc.target/aarch64/simd/extq_u8_1.c new file mode 100644 index 0000000..30df9ea --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/extq_u8_1.c @@ -0,0 +1,10 @@ +/* Test the `vextQu8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "extq_u8.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#?\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vextQf32_1.c b/gcc/testsuite/gcc.target/arm/simd/vextQf32_1.c new file mode 100644 index 0000000..c1da6d3 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vextQf32_1.c @@ -0,0 +1,12 @@ +/* Test the `vextQf32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/extq_f32.x" + +/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vextQp16_1.c b/gcc/testsuite/gcc.target/arm/simd/vextQp16_1.c new file mode 100644 index 0000000..adc0861 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vextQp16_1.c @@ -0,0 +1,12 @@ +/* Test the `vextQp16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/extq_p16.x" + +/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vextQp64_1.c b/gcc/testsuite/gcc.target/arm/simd/vextQp64_1.c new file mode 100644 index 0000000..e8b688d --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vextQp64_1.c @@ -0,0 +1,33 @@ +/* Test the `vextQp64' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +extern void abort (void); + +poly64x2_t +test_vextq_p64_1 (poly64x2_t a, poly64x2_t b) +{ + return vextq_p64(a, b, 1); +} + +int +main (int argc, char **argv) +{ + int i, off; + poly64x2_t in1 = {0, 1}; + poly64x2_t in2 = {2, 3}; + poly64x2_t actual = test_vextq_p64_1 (in1, in2); + for (i = 0; i < 2; i++) + if (actual[i] != i + 1) + abort (); + + return 0; +} + +/* { dg-final { scan-assembler-times "vext\.64\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vextQp8_1.c b/gcc/testsuite/gcc.target/arm/simd/vextQp8_1.c new file mode 100644 index 0000000..5f2cc53 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vextQp8_1.c @@ -0,0 +1,12 @@ +/* Test the `vextQp8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/extq_p8.x" + +/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vextQs16_1.c b/gcc/testsuite/gcc.target/arm/simd/vextQs16_1.c new file mode 100644 index 0000000..c0d791d --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vextQs16_1.c @@ -0,0 +1,12 @@ +/* Test the `vextQs16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/extq_s16.x" + +/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vextQs32_1.c b/gcc/testsuite/gcc.target/arm/simd/vextQs32_1.c new file mode 100644 index 0000000..ed5b210 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vextQs32_1.c @@ -0,0 +1,12 @@ +/* Test the `vextQs32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/extq_s32.x" + +/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vextQs64_1.c b/gcc/testsuite/gcc.target/arm/simd/vextQs64_1.c new file mode 100644 index 0000000..dbbee47 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vextQs64_1.c @@ -0,0 +1,12 @@ +/* Test the `vextQs64' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/extq_s64.x" + +/* { dg-final { scan-assembler-times "vext\.64\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vextQs8_1.c b/gcc/testsuite/gcc.target/arm/simd/vextQs8_1.c new file mode 100644 index 0000000..0ebdce3 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vextQs8_1.c @@ -0,0 +1,12 @@ +/* Test the `vextQs8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/extq_s8.x" + +/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vextQu16_1.c b/gcc/testsuite/gcc.target/arm/simd/vextQu16_1.c new file mode 100644 index 0000000..136f2b8 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vextQu16_1.c @@ -0,0 +1,12 @@ +/* Test the `vextQu16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/extq_u16.x" + +/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vextQu32_1.c b/gcc/testsuite/gcc.target/arm/simd/vextQu32_1.c new file mode 100644 index 0000000..66ce035 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vextQu32_1.c @@ -0,0 +1,12 @@ +/* Test the `vextQu32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/extq_u32.x" + +/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vextQu64_1.c b/gcc/testsuite/gcc.target/arm/simd/vextQu64_1.c new file mode 100644 index 0000000..ebe4abd --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vextQu64_1.c @@ -0,0 +1,12 @@ +/* Test the `vextQu64' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/extq_u64.x" + +/* { dg-final { scan-assembler-times "vext\.64\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vextQu8_1.c b/gcc/testsuite/gcc.target/arm/simd/vextQu8_1.c new file mode 100644 index 0000000..432ac0a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vextQu8_1.c @@ -0,0 +1,12 @@ +/* Test the `vextQu8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/extq_u8.x" + +/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vextf32_1.c b/gcc/testsuite/gcc.target/arm/simd/vextf32_1.c new file mode 100644 index 0000000..99e0bad --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vextf32_1.c @@ -0,0 +1,12 @@ +/* Test the `vextf32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/ext_f32.x" + +/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vextp16_1.c b/gcc/testsuite/gcc.target/arm/simd/vextp16_1.c new file mode 100644 index 0000000..00695bf --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vextp16_1.c @@ -0,0 +1,12 @@ +/* Test the `vextp16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/ext_p16.x" + +/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vextp64_1.c b/gcc/testsuite/gcc.target/arm/simd/vextp64_1.c new file mode 100644 index 0000000..8783e16 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vextp64_1.c @@ -0,0 +1,26 @@ +/* Test the `vextp64' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +extern void abort (void); + +int +main (int argc, char **argv) +{ + int i; + poly64x1_t in1 = {0}; + poly64x1_t in2 = {1}; + poly64x1_t actual = vext_p64 (in1, in2, 0); + if (actual != in1) + abort (); + + return 0; +} + +/* Don't scan assembler for vext - it can be optimized into a move from r0. +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vextp8_1.c b/gcc/testsuite/gcc.target/arm/simd/vextp8_1.c new file mode 100644 index 0000000..2ba72c1 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vextp8_1.c @@ -0,0 +1,12 @@ +/* Test the `vextp8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/ext_p8.x" + +/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vexts16_1.c b/gcc/testsuite/gcc.target/arm/simd/vexts16_1.c new file mode 100644 index 0000000..4fa57d6 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vexts16_1.c @@ -0,0 +1,12 @@ +/* Test the `vexts16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/ext_s16.x" + +/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vexts32_1.c b/gcc/testsuite/gcc.target/arm/simd/vexts32_1.c new file mode 100644 index 0000000..3cd5936 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vexts32_1.c @@ -0,0 +1,12 @@ +/* Test the `vexts32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/ext_s32.x" + +/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vexts64_1.c b/gcc/testsuite/gcc.target/arm/simd/vexts64_1.c new file mode 100644 index 0000000..7bb2012 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vexts64_1.c @@ -0,0 +1,12 @@ +/* Test the `vexts64' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/ext_s64.x" + +/* Don't scan assembler for vext - it can be optimized into a move from r0. */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vexts8_1.c b/gcc/testsuite/gcc.target/arm/simd/vexts8_1.c new file mode 100644 index 0000000..194e198 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vexts8_1.c @@ -0,0 +1,12 @@ +/* Test the `vexts8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/ext_s8.x" + +/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vextu16_1.c b/gcc/testsuite/gcc.target/arm/simd/vextu16_1.c new file mode 100644 index 0000000..f69c2bd --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vextu16_1.c @@ -0,0 +1,12 @@ +/* Test the `vextu16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/ext_u16.x" + +/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vextu32_1.c b/gcc/testsuite/gcc.target/arm/simd/vextu32_1.c new file mode 100644 index 0000000..b76e383 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vextu32_1.c @@ -0,0 +1,12 @@ +/* Test the `vextu32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/ext_u32.x" + +/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vextu64_1.c b/gcc/testsuite/gcc.target/arm/simd/vextu64_1.c new file mode 100644 index 0000000..39ffc56 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vextu64_1.c @@ -0,0 +1,12 @@ +/* Test the `vextu64' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/ext_u64.x" + +/* Don't scan assembler for vext - it can be optimized into a move from r0. */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vextu8_1.c b/gcc/testsuite/gcc.target/arm/simd/vextu8_1.c new file mode 100644 index 0000000..a9d62b3 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vextu8_1.c @@ -0,0 +1,12 @@ +/* Test the `vextu8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/ext_u8.x" + +/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ -- 2.7.4