From 20103c0ea9336d2b5286eb7f2605ace3fd49a431 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 15 Aug 2019 08:05:50 +0000 Subject: [PATCH] Add support for conditional shifts This patch adds support for IFN_COND shifts left and shifts right. This is mostly mechanical, but since we try to handle conditional operations in the same way as unconditional operations in match.pd, we need to support IFN_COND shifts by scalars as well as vectors. E.g.: IFN_COND_SHL (cond, a, { 1, 1, ... }, fallback) and: IFN_COND_SHL (cond, a, 1, fallback) are the same operation, with: (for shiftrotate (lrotate rrotate lshift rshift) ... /* Prefer vector1 << scalar to vector1 << vector2 if vector2 is uniform. */ (for vec (VECTOR_CST CONSTRUCTOR) (simplify (shiftrotate @0 vec@1) (with { tree tem = uniform_vector_p (@1); } (if (tem) (shiftrotate @0 { tem; })))))) preferring the latter. The patch copes with this by extending create_convert_operand_from to handle scalar-to-vector conversions. 2019-08-15 Richard Sandiford Prathamesh Kulkarni gcc/ * internal-fn.def (IFN_COND_SHL, IFN_COND_SHR): New internal functions. * internal-fn.c (FOR_EACH_CODE_MAPPING): Handle shifts. * match.pd (UNCOND_BINARY, COND_BINARY): Likewise. * optabs.def (cond_ashl_optab, cond_ashr_optab, cond_lshr_optab): New optabs. * optabs.h (create_convert_operand_from): Expand comment. * optabs.c (maybe_legitimize_operand): Allow implicit broadcasts when mapping scalar rtxes to vector operands. * config/aarch64/iterators.md (SVE_INT_BINARY): Add ashift, ashiftrt and lshiftrt. (sve_int_op, sve_int_op_rev, sve_pred_int_rhs2_operand): Handle them. * config/aarch64/aarch64-sve.md (*cond__2_const) (*cond__any_const): New patterns. gcc/testsuite/ * gcc.target/aarch64/sve/cond_shift_1.c: New test. * gcc.target/aarch64/sve/cond_shift_1_run.c: Likewise. * gcc.target/aarch64/sve/cond_shift_2.c: Likewise. * gcc.target/aarch64/sve/cond_shift_2_run.c: Likewise. * gcc.target/aarch64/sve/cond_shift_3.c: Likewise. * gcc.target/aarch64/sve/cond_shift_3_run.c: Likewise. * gcc.target/aarch64/sve/cond_shift_4.c: Likewise. * gcc.target/aarch64/sve/cond_shift_4_run.c: Likewise. * gcc.target/aarch64/sve/cond_shift_5.c: Likewise. * gcc.target/aarch64/sve/cond_shift_5_run.c: Likewise. * gcc.target/aarch64/sve/cond_shift_6.c: Likewise. * gcc.target/aarch64/sve/cond_shift_6_run.c: Likewise. * gcc.target/aarch64/sve/cond_shift_7.c: Likewise. * gcc.target/aarch64/sve/cond_shift_7_run.c: Likewise. * gcc.target/aarch64/sve/cond_shift_8.c: Likewise. * gcc.target/aarch64/sve/cond_shift_8_run.c: Likewise. * gcc.target/aarch64/sve/cond_shift_9.c: Likewise. * gcc.target/aarch64/sve/cond_shift_9_run.c: Likewise. Co-Authored-By: Prathamesh Kulkarni From-SVN: r274505 --- gcc/ChangeLog | 17 +++++++ gcc/config/aarch64/aarch64-sve.md | 46 +++++++++++++++++++ gcc/config/aarch64/iterators.md | 32 ++++++++----- gcc/internal-fn.c | 4 +- gcc/internal-fn.def | 4 ++ gcc/match.pd | 6 ++- gcc/optabs.c | 12 ++++- gcc/optabs.def | 3 ++ gcc/optabs.h | 6 ++- gcc/testsuite/ChangeLog | 22 +++++++++ .../gcc.target/aarch64/sve/cond_shift_1.c | 48 ++++++++++++++++++++ .../gcc.target/aarch64/sve/cond_shift_1_run.c | 27 +++++++++++ .../gcc.target/aarch64/sve/cond_shift_2.c | 52 ++++++++++++++++++++++ .../gcc.target/aarch64/sve/cond_shift_2_run.c | 27 +++++++++++ .../gcc.target/aarch64/sve/cond_shift_3.c | 48 ++++++++++++++++++++ .../gcc.target/aarch64/sve/cond_shift_3_run.c | 27 +++++++++++ .../gcc.target/aarch64/sve/cond_shift_4.c | 52 ++++++++++++++++++++++ .../gcc.target/aarch64/sve/cond_shift_4_run.c | 27 +++++++++++ .../gcc.target/aarch64/sve/cond_shift_5.c | 38 ++++++++++++++++ .../gcc.target/aarch64/sve/cond_shift_5_run.c | 28 ++++++++++++ .../gcc.target/aarch64/sve/cond_shift_6.c | 33 ++++++++++++++ .../gcc.target/aarch64/sve/cond_shift_6_run.c | 28 ++++++++++++ .../gcc.target/aarch64/sve/cond_shift_7.c | 40 +++++++++++++++++ .../gcc.target/aarch64/sve/cond_shift_7_run.c | 28 ++++++++++++ .../gcc.target/aarch64/sve/cond_shift_8.c | 38 ++++++++++++++++ .../gcc.target/aarch64/sve/cond_shift_8_run.c | 28 ++++++++++++ .../gcc.target/aarch64/sve/cond_shift_9.c | 40 +++++++++++++++++ .../gcc.target/aarch64/sve/cond_shift_9_run.c | 28 ++++++++++++ 28 files changed, 772 insertions(+), 17 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_shift_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_shift_1_run.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_shift_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_shift_2_run.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_shift_3.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_shift_3_run.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_shift_4.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_shift_4_run.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_shift_5.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_shift_5_run.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_shift_6.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_shift_6_run.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_shift_7.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_shift_7_run.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_shift_8.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_shift_8_run.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_shift_9.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_shift_9_run.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 66de492..e6c3d70 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,20 @@ +2019-08-15 Richard Sandiford + Prathamesh Kulkarni + + * internal-fn.def (IFN_COND_SHL, IFN_COND_SHR): New internal functions. + * internal-fn.c (FOR_EACH_CODE_MAPPING): Handle shifts. + * match.pd (UNCOND_BINARY, COND_BINARY): Likewise. + * optabs.def (cond_ashl_optab, cond_ashr_optab, cond_lshr_optab): New + optabs. + * optabs.h (create_convert_operand_from): Expand comment. + * optabs.c (maybe_legitimize_operand): Allow implicit broadcasts + when mapping scalar rtxes to vector operands. + * config/aarch64/iterators.md (SVE_INT_BINARY): Add ashift, + ashiftrt and lshiftrt. + (sve_int_op, sve_int_op_rev, sve_pred_int_rhs2_operand): Handle them. + * config/aarch64/aarch64-sve.md (*cond__2_const) + (*cond__any_const): New patterns. + 2019-08-15 Martin Liska PR ipa/91438 diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 43beb7e..b8156a1 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -1772,7 +1772,10 @@ ;; Includes: ;; - ADD (merging form only) ;; - AND (merging form only) +;; - ASR (merging form only) ;; - EOR (merging form only) +;; - LSL (merging form only) +;; - LSR (merging form only) ;; - MUL ;; - ORR (merging form only) ;; - SMAX @@ -2405,6 +2408,49 @@ "\t%0., %1., #%2" ) +;; Predicated integer shift, merging with the first input. +(define_insn "*cond__2_const" + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (unspec:SVE_I + [(match_operand: 1 "register_operand" "Upl, Upl") + (ASHIFT:SVE_I + (match_operand:SVE_I 2 "register_operand" "0, w") + (match_operand:SVE_I 3 "aarch64_simd_shift_imm")) + (match_dup 2)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + \t%0., %1/m, %0., #%3 + movprfx\t%0, %2\;\t%0., %1/m, %0., #%3" + [(set_attr "movprfx" "*,yes")] +) + +;; Predicated integer shift, merging with an independent value. +(define_insn_and_rewrite "*cond__any_const" + [(set (match_operand:SVE_I 0 "register_operand" "=w, &w, ?&w") + (unspec:SVE_I + [(match_operand: 1 "register_operand" "Upl, Upl, Upl") + (ASHIFT:SVE_I + (match_operand:SVE_I 2 "register_operand" "w, w, w") + (match_operand:SVE_I 3 "aarch64_simd_shift_imm")) + (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] + UNSPEC_SEL))] + "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" + "@ + movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., #%3 + movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., #%3 + #" + "&& reload_completed + && register_operand (operands[4], mode) + && !rtx_equal_p (operands[0], operands[4])" + { + emit_insn (gen_vcond_mask_ (operands[0], operands[2], + operands[4], operands[1])); + operands[4] = operands[2] = operands[0]; + } + [(set_attr "movprfx" "yes")] +) + ;; ------------------------------------------------------------------------- ;; ---- [FP] General binary arithmetic corresponding to rtx codes ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 3642ba1..81dddb7 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1280,6 +1280,7 @@ ;; SVE integer binary operations. (define_code_iterator SVE_INT_BINARY [plus minus mult smax umax smin umin + ashift ashiftrt lshiftrt and ior xor]) ;; SVE integer binary division operations. @@ -1475,6 +1476,9 @@ (smax "smax") (umin "umin") (umax "umax") + (ashift "lsl") + (ashiftrt "asr") + (lshiftrt "lsr") (and "and") (ior "orr") (xor "eor") @@ -1484,17 +1488,20 @@ (popcount "cnt")]) (define_code_attr sve_int_op_rev [(plus "add") - (minus "subr") - (mult "mul") - (div "sdivr") - (udiv "udivr") - (smin "smin") - (smax "smax") - (umin "umin") - (umax "umax") - (and "and") - (ior "orr") - (xor "eor")]) + (minus "subr") + (mult "mul") + (div "sdivr") + (udiv "udivr") + (smin "smin") + (smax "smax") + (umin "umin") + (umax "umax") + (ashift "lslr") + (ashiftrt "asrr") + (lshiftrt "lsrr") + (and "and") + (ior "orr") + (xor "eor")]) ;; The floating-point SVE instruction that implements an rtx code. (define_code_attr sve_fp_op [(plus "fadd") @@ -1535,6 +1542,9 @@ (umax "register_operand") (smin "register_operand") (umin "register_operand") + (ashift "aarch64_sve_lshift_operand") + (ashiftrt "aarch64_sve_rshift_operand") + (lshiftrt "aarch64_sve_rshift_operand") (and "aarch64_sve_pred_and_operand") (ior "register_operand") (xor "register_operand")]) diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index 1067376..ad86b9a 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -3286,7 +3286,9 @@ static void (*const internal_fn_expanders[]) (internal_fn, gcall *) = { T (MAX_EXPR, IFN_COND_MAX) \ T (BIT_AND_EXPR, IFN_COND_AND) \ T (BIT_IOR_EXPR, IFN_COND_IOR) \ - T (BIT_XOR_EXPR, IFN_COND_XOR) + T (BIT_XOR_EXPR, IFN_COND_XOR) \ + T (LSHIFT_EXPR, IFN_COND_SHL) \ + T (RSHIFT_EXPR, IFN_COND_SHR) /* Return a function that only performs CODE when a certain condition is met and that uses a given fallback value otherwise. For example, if CODE is diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 906d74b..9461693 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -167,6 +167,10 @@ DEF_INTERNAL_OPTAB_FN (COND_IOR, ECF_CONST | ECF_NOTHROW, cond_ior, cond_binary) DEF_INTERNAL_OPTAB_FN (COND_XOR, ECF_CONST | ECF_NOTHROW, cond_xor, cond_binary) +DEF_INTERNAL_OPTAB_FN (COND_SHL, ECF_CONST | ECF_NOTHROW, + cond_ashl, cond_binary) +DEF_INTERNAL_SIGNED_OPTAB_FN (COND_SHR, ECF_CONST | ECF_NOTHROW, first, + cond_ashr, cond_lshr, cond_binary) DEF_INTERNAL_OPTAB_FN (COND_FMA, ECF_CONST, cond_fma, cond_ternary) DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, cond_ternary) diff --git a/gcc/match.pd b/gcc/match.pd index 0317bc7..93dcef9 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -83,12 +83,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) plus minus mult trunc_div trunc_mod rdiv min max - bit_and bit_ior bit_xor) + bit_and bit_ior bit_xor + lshift rshift) (define_operator_list COND_BINARY IFN_COND_ADD IFN_COND_SUB IFN_COND_MUL IFN_COND_DIV IFN_COND_MOD IFN_COND_RDIV IFN_COND_MIN IFN_COND_MAX - IFN_COND_AND IFN_COND_IOR IFN_COND_XOR) + IFN_COND_AND IFN_COND_IOR IFN_COND_XOR + IFN_COND_SHL IFN_COND_SHR) /* Same for ternary operations. */ (define_operator_list UNCOND_TERNARY diff --git a/gcc/optabs.c b/gcc/optabs.c index 06bcaab..9e54dda 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -7212,7 +7212,7 @@ static bool maybe_legitimize_operand (enum insn_code icode, unsigned int opno, class expand_operand *op) { - machine_mode mode, imode; + machine_mode mode, imode, tmode; mode = op->mode; switch (op->type) @@ -7259,9 +7259,17 @@ maybe_legitimize_operand (enum insn_code icode, unsigned int opno, gcc_assert (mode != VOIDmode); imode = insn_data[(int) icode].operand[opno].mode; + tmode = (VECTOR_MODE_P (imode) && !VECTOR_MODE_P (mode) + ? GET_MODE_INNER (imode) : imode); + if (tmode != VOIDmode && tmode != mode) + { + op->value = convert_modes (tmode, mode, op->value, op->unsigned_p); + mode = tmode; + } if (imode != VOIDmode && imode != mode) { - op->value = convert_modes (imode, mode, op->value, op->unsigned_p); + gcc_assert (VECTOR_MODE_P (imode) && !VECTOR_MODE_P (mode)); + op->value = expand_vector_broadcast (imode, op->value); mode = imode; } goto input; diff --git a/gcc/optabs.def b/gcc/optabs.def index 4ffd0f3..5283e67 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -230,6 +230,9 @@ OPTAB_D (cond_umod_optab, "cond_umod$a") OPTAB_D (cond_and_optab, "cond_and$a") OPTAB_D (cond_ior_optab, "cond_ior$a") OPTAB_D (cond_xor_optab, "cond_xor$a") +OPTAB_D (cond_ashl_optab, "cond_ashl$a") +OPTAB_D (cond_ashr_optab, "cond_ashr$a") +OPTAB_D (cond_lshr_optab, "cond_lshr$a") OPTAB_D (cond_smin_optab, "cond_smin$a") OPTAB_D (cond_smax_optab, "cond_smax$a") OPTAB_D (cond_umin_optab, "cond_umin$a") diff --git a/gcc/optabs.h b/gcc/optabs.h index 0654107..897bb5d 100644 --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -129,7 +129,11 @@ create_convert_operand_to (class expand_operand *op, rtx value, /* Make OP describe an input operand that should have the same value as VALUE, after any mode conversion that the backend might request. If VALUE is a CONST_INT, it should be treated as having mode MODE. - UNSIGNED_P says whether VALUE is unsigned. */ + UNSIGNED_P says whether VALUE is unsigned. + + The conversion of VALUE can include a combination of numerical + conversion (as for convert_modes) and duplicating a scalar to fill + a vector (if VALUE is a scalar but the operand is a vector). */ static inline void create_convert_operand_from (class expand_operand *op, rtx value, diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c736378..eed83ce 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,25 @@ +2019-08-15 Richard Sandiford + Prathamesh Kulkarni + + * gcc.target/aarch64/sve/cond_shift_1.c: New test. + * gcc.target/aarch64/sve/cond_shift_1_run.c: Likewise. + * gcc.target/aarch64/sve/cond_shift_2.c: Likewise. + * gcc.target/aarch64/sve/cond_shift_2_run.c: Likewise. + * gcc.target/aarch64/sve/cond_shift_3.c: Likewise. + * gcc.target/aarch64/sve/cond_shift_3_run.c: Likewise. + * gcc.target/aarch64/sve/cond_shift_4.c: Likewise. + * gcc.target/aarch64/sve/cond_shift_4_run.c: Likewise. + * gcc.target/aarch64/sve/cond_shift_5.c: Likewise. + * gcc.target/aarch64/sve/cond_shift_5_run.c: Likewise. + * gcc.target/aarch64/sve/cond_shift_6.c: Likewise. + * gcc.target/aarch64/sve/cond_shift_6_run.c: Likewise. + * gcc.target/aarch64/sve/cond_shift_7.c: Likewise. + * gcc.target/aarch64/sve/cond_shift_7_run.c: Likewise. + * gcc.target/aarch64/sve/cond_shift_8.c: Likewise. + * gcc.target/aarch64/sve/cond_shift_8_run.c: Likewise. + * gcc.target/aarch64/sve/cond_shift_9.c: Likewise. + * gcc.target/aarch64/sve/cond_shift_9_run.c: Likewise. + 2019-08-14 Martin Sebor PR testsuite/91449 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_1.c new file mode 100644 index 0000000..f2c51b2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_1.c @@ -0,0 +1,48 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include + +#define DEF_LOOP(TYPE, NAME, OP) \ + void __attribute__ ((noipa)) \ + test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ + TYPE *__restrict b, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + r[i] = a[i] > 20 ? b[i] OP 3 : b[i]; \ + } + +#define TEST_TYPE(T, TYPE) \ + T (TYPE, shl, <<) \ + T (TYPE, shr, >>) + +#define TEST_ALL(T) \ + TEST_TYPE (T, int8_t) \ + TEST_TYPE (T, uint8_t) \ + TEST_TYPE (T, int16_t) \ + TEST_TYPE (T, uint16_t) \ + TEST_TYPE (T, int32_t) \ + TEST_TYPE (T, uint32_t) \ + TEST_TYPE (T, int64_t) \ + TEST_TYPE (T, uint64_t) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ + +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ +/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_1_run.c new file mode 100644 index 0000000..acc403e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_1_run.c @@ -0,0 +1,27 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "cond_shift_1.c" + +#define N 99 + +#define TEST_LOOP(TYPE, NAME, OP) \ + { \ + TYPE r[N], a[N], b[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + a[i] = (i & 1 ? i : 3 * i); \ + b[i] = (i >> 4) << (i & 15); \ + asm volatile ("" ::: "memory"); \ + } \ + test_##TYPE##_##NAME (r, a, b, N); \ + for (int i = 0; i < N; ++i) \ + if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : b[i])) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_ALL (TEST_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_2.c new file mode 100644 index 0000000..c9082c9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_2.c @@ -0,0 +1,52 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include + +#define DEF_LOOP(TYPE, NAME, OP) \ + void __attribute__ ((noipa)) \ + test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ + TYPE *__restrict b, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + r[i] = a[i] > 20 ? b[i] OP 3 : a[i]; \ + } + +#define TEST_TYPE(T, TYPE) \ + T (TYPE, shl, <<) \ + T (TYPE, shr, >>) + +#define TEST_ALL(T) \ + TEST_TYPE (T, int8_t) \ + TEST_TYPE (T, uint8_t) \ + TEST_TYPE (T, int16_t) \ + TEST_TYPE (T, uint16_t) \ + TEST_TYPE (T, int32_t) \ + TEST_TYPE (T, uint32_t) \ + TEST_TYPE (T, int64_t) \ + TEST_TYPE (T, uint64_t) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ + +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_2_run.c new file mode 100644 index 0000000..4917d3a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_2_run.c @@ -0,0 +1,27 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "cond_shift_2.c" + +#define N 99 + +#define TEST_LOOP(TYPE, NAME, OP) \ + { \ + TYPE r[N], a[N], b[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + a[i] = (i & 1 ? i : 3 * i); \ + b[i] = (i >> 4) << (i & 15); \ + asm volatile ("" ::: "memory"); \ + } \ + test_##TYPE##_##NAME (r, a, b, N); \ + for (int i = 0; i < N; ++i) \ + if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : a[i])) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_ALL (TEST_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_3.c new file mode 100644 index 0000000..55e0de8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_3.c @@ -0,0 +1,48 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include + +#define DEF_LOOP(TYPE, NAME, OP) \ + void __attribute__ ((noipa)) \ + test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ + TYPE *__restrict b, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + r[i] = a[i] > 20 ? b[i] OP 3 : 72; \ + } + +#define TEST_TYPE(T, TYPE) \ + T (TYPE, shl, <<) \ + T (TYPE, shr, >>) + +#define TEST_ALL(T) \ + TEST_TYPE (T, int8_t) \ + TEST_TYPE (T, uint8_t) \ + TEST_TYPE (T, int16_t) \ + TEST_TYPE (T, uint16_t) \ + TEST_TYPE (T, int32_t) \ + TEST_TYPE (T, uint32_t) \ + TEST_TYPE (T, int64_t) \ + TEST_TYPE (T, uint64_t) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ + +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ +/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ +/* { dg-final { scan-assembler-times {\tsel\t} 16 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_3_run.c new file mode 100644 index 0000000..194c75b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_3_run.c @@ -0,0 +1,27 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "cond_shift_3.c" + +#define N 99 + +#define TEST_LOOP(TYPE, NAME, OP) \ + { \ + TYPE r[N], a[N], b[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + a[i] = (i & 1 ? i : 3 * i); \ + b[i] = (i >> 4) << (i & 15); \ + asm volatile ("" ::: "memory"); \ + } \ + test_##TYPE##_##NAME (r, a, b, N); \ + for (int i = 0; i < N; ++i) \ + if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : 72)) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_ALL (TEST_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_4.c new file mode 100644 index 0000000..32dd681 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_4.c @@ -0,0 +1,52 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include + +#define DEF_LOOP(TYPE, NAME, OP) \ + void __attribute__ ((noipa)) \ + test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ + TYPE *__restrict b, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + r[i] = a[i] > 20 ? b[i] OP 3 : 0; \ + } + +#define TEST_TYPE(T, TYPE) \ + T (TYPE, shl, <<) \ + T (TYPE, shr, >>) + +#define TEST_ALL(T) \ + TEST_TYPE (T, int8_t) \ + TEST_TYPE (T, uint8_t) \ + TEST_TYPE (T, int16_t) \ + TEST_TYPE (T, uint16_t) \ + TEST_TYPE (T, int32_t) \ + TEST_TYPE (T, uint32_t) \ + TEST_TYPE (T, int64_t) \ + TEST_TYPE (T, uint64_t) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ + +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/z, z[0-9]+\.b\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_4_run.c new file mode 100644 index 0000000..ee26300 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_4_run.c @@ -0,0 +1,27 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "cond_shift_4.c" + +#define N 99 + +#define TEST_LOOP(TYPE, NAME, OP) \ + { \ + TYPE r[N], a[N], b[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + a[i] = (i & 1 ? i : 3 * i); \ + b[i] = (i >> 4) << (i & 15); \ + asm volatile ("" ::: "memory"); \ + } \ + test_##TYPE##_##NAME (r, a, b, N); \ + for (int i = 0; i < N; ++i) \ + if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : 0)) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_ALL (TEST_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_5.c new file mode 100644 index 0000000..1d44915 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_5.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include + +#define DEF_LOOP(TYPE, NAME, OP) \ + void __attribute__ ((noipa)) \ + test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ + TYPE *__restrict b, TYPE *__restrict c, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + r[i] = a[i] > 20 ? b[i] OP c[i] : b[i]; \ + } + +#define TEST_TYPE(T, TYPE) \ + T (TYPE, shl, <<) \ + T (TYPE, shr, >>) + +#define TEST_ALL(T) \ + TEST_TYPE (T, int32_t) \ + TEST_TYPE (T, uint32_t) \ + TEST_TYPE (T, int64_t) \ + TEST_TYPE (T, uint64_t) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ + +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ +/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_5_run.c new file mode 100644 index 0000000..35bf1b8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_5_run.c @@ -0,0 +1,28 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "cond_shift_5.c" + +#define N 99 + +#define TEST_LOOP(TYPE, NAME, OP) \ + { \ + TYPE r[N], a[N], b[N], c[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + a[i] = (i & 1 ? i : 3 * i); \ + b[i] = (i >> 4) << (i & 15); \ + c[i] = ~i & 7; \ + asm volatile ("" ::: "memory"); \ + } \ + test_##TYPE##_##NAME (r, a, b, c, N); \ + for (int i = 0; i < N; ++i) \ + if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : b[i])) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_ALL (TEST_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_6.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_6.c new file mode 100644 index 0000000..35cb676 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_6.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include + +#define DEF_LOOP(TYPE, NAME, OP) \ + void __attribute__ ((noipa)) \ + test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ + TYPE *__restrict b, TYPE *__restrict c, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + r[i] = a[i] > 20 ? b[i] OP c[i] : c[i]; \ + } + +#define TEST_TYPE(T, TYPE) \ + T (TYPE, shl, <<) \ + T (TYPE, shr, >>) + +#define TEST_ALL(T) \ + TEST_TYPE (T, int32_t) \ + TEST_TYPE (T, uint32_t) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tlslr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ + +/* { dg-final { scan-assembler-times {\tasrr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tlsrr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ +/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_6_run.c new file mode 100644 index 0000000..e601c61 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_6_run.c @@ -0,0 +1,28 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "cond_shift_6.c" + +#define N 99 + +#define TEST_LOOP(TYPE, NAME, OP) \ + { \ + TYPE r[N], a[N], b[N], c[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + a[i] = (i & 1 ? i : 3 * i); \ + b[i] = (i >> 4) << (i & 15); \ + c[i] = ~i & 7; \ + asm volatile ("" ::: "memory"); \ + } \ + test_##TYPE##_##NAME (r, a, b, c, N); \ + for (int i = 0; i < N; ++i) \ + if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : c[i])) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_ALL (TEST_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_7.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_7.c new file mode 100644 index 0000000..80154b2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_7.c @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include + +#define DEF_LOOP(TYPE, NAME, OP) \ + void __attribute__ ((noipa)) \ + test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ + TYPE *__restrict b, TYPE *__restrict c, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + r[i] = a[i] > 20 ? b[i] OP c[i] : a[i]; \ + } + +#define TEST_TYPE(T, TYPE) \ + T (TYPE, shl, <<) \ + T (TYPE, shr, >>) + +#define TEST_ALL(T) \ + TEST_TYPE (T, int32_t) \ + TEST_TYPE (T, uint32_t) \ + TEST_TYPE (T, int64_t) \ + TEST_TYPE (T, uint64_t) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ + +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_7_run.c new file mode 100644 index 0000000..d23b009 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_7_run.c @@ -0,0 +1,28 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "cond_shift_7.c" + +#define N 99 + +#define TEST_LOOP(TYPE, NAME, OP) \ + { \ + TYPE r[N], a[N], b[N], c[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + a[i] = (i & 1 ? i : 3 * i); \ + b[i] = (i >> 4) << (i & 15); \ + c[i] = ~i & 7; \ + asm volatile ("" ::: "memory"); \ + } \ + test_##TYPE##_##NAME (r, a, b, c, N); \ + for (int i = 0; i < N; ++i) \ + if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : a[i])) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_ALL (TEST_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_8.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_8.c new file mode 100644 index 0000000..b478c0c4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_8.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include + +#define DEF_LOOP(TYPE, NAME, OP) \ + void __attribute__ ((noipa)) \ + test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ + TYPE *__restrict b, TYPE *__restrict c, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + r[i] = a[i] > 20 ? b[i] OP c[i] : 91; \ + } + +#define TEST_TYPE(T, TYPE) \ + T (TYPE, shl, <<) \ + T (TYPE, shr, >>) + +#define TEST_ALL(T) \ + TEST_TYPE (T, int32_t) \ + TEST_TYPE (T, uint32_t) \ + TEST_TYPE (T, int64_t) \ + TEST_TYPE (T, uint64_t) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ + +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ +/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ +/* { dg-final { scan-assembler-times {\tsel\t} 8 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_8_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_8_run.c new file mode 100644 index 0000000..72e5a7b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_8_run.c @@ -0,0 +1,28 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "cond_shift_8.c" + +#define N 99 + +#define TEST_LOOP(TYPE, NAME, OP) \ + { \ + TYPE r[N], a[N], b[N], c[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + a[i] = (i & 1 ? i : 3 * i); \ + b[i] = (i >> 4) << (i & 15); \ + c[i] = ~i & 7; \ + asm volatile ("" ::: "memory"); \ + } \ + test_##TYPE##_##NAME (r, a, b, c, N); \ + for (int i = 0; i < N; ++i) \ + if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : 91)) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_ALL (TEST_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_9.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_9.c new file mode 100644 index 0000000..184e93a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_9.c @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include + +#define DEF_LOOP(TYPE, NAME, OP) \ + void __attribute__ ((noipa)) \ + test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ + TYPE *__restrict b, TYPE *__restrict c, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + r[i] = a[i] > 20 ? b[i] OP c[i] : 0; \ + } + +#define TEST_TYPE(T, TYPE) \ + T (TYPE, shl, <<) \ + T (TYPE, shr, >>) + +#define TEST_ALL(T) \ + TEST_TYPE (T, int32_t) \ + TEST_TYPE (T, uint32_t) \ + TEST_TYPE (T, int64_t) \ + TEST_TYPE (T, uint64_t) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tlslr?\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tlslr?\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ + +/* { dg-final { scan-assembler-times {\tasrr?\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tasrr?\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tlsrr?\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsrr?\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_9_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_9_run.c new file mode 100644 index 0000000..6e41ac4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_9_run.c @@ -0,0 +1,28 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "cond_shift_9.c" + +#define N 99 + +#define TEST_LOOP(TYPE, NAME, OP) \ + { \ + TYPE r[N], a[N], b[N], c[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + a[i] = (i & 1 ? i : 3 * i); \ + b[i] = (i >> 4) << (i & 15); \ + c[i] = ~i & 7; \ + asm volatile ("" ::: "memory"); \ + } \ + test_##TYPE##_##NAME (r, a, b, c, N); \ + for (int i = 0; i < N; ++i) \ + if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : 0)) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_ALL (TEST_LOOP) + return 0; +} -- 2.7.4