From 0f9d2c1a318ed30a66b75bd6b7fa3dc3630e362e Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 7 Jan 2021 15:00:39 +0000 Subject: [PATCH] aarch64: Support conditional unpacked integer unary arithmetic on SVE This patch extends the conditional unary integer operations from SVE_FULL_I to SVE_I. In each case the type suffix is taken from the element size rather than the container size: this matters for ABS and NEG, but doesn't matter for NOT. gcc/ * config/aarch64/aarch64-sve.md (@cond_) (*cond__2): Extend from SVE_FULL_I to SVE_I. (*cond__any): Likewise. gcc/testsuite/ * gcc.target/aarch64/sve/cond_unary_5.c: New test. * gcc.target/aarch64/sve/cond_unary_5_run.c: Likewise. * gcc.target/aarch64/sve/cond_unary_6.c: Likewise. * gcc.target/aarch64/sve/cond_unary_6_run.c: Likewise. * gcc.target/aarch64/sve/cond_unary_7.c: Likewise. * gcc.target/aarch64/sve/cond_unary_7_run.c: Likewise. * gcc.target/aarch64/sve/cond_unary_8.c: Likewise. * gcc.target/aarch64/sve/cond_unary_8_run.c: Likewise. --- gcc/config/aarch64/aarch64-sve.md | 28 ++++++------ .../gcc.target/aarch64/sve/cond_unary_5.c | 49 ++++++++++++++++++++ .../gcc.target/aarch64/sve/cond_unary_5_run.c | 26 +++++++++++ .../gcc.target/aarch64/sve/cond_unary_6.c | 53 ++++++++++++++++++++++ .../gcc.target/aarch64/sve/cond_unary_6_run.c | 27 +++++++++++ .../gcc.target/aarch64/sve/cond_unary_7.c | 48 ++++++++++++++++++++ .../gcc.target/aarch64/sve/cond_unary_7_run.c | 26 +++++++++++ .../gcc.target/aarch64/sve/cond_unary_8.c | 50 ++++++++++++++++++++ .../gcc.target/aarch64/sve/cond_unary_8_run.c | 28 ++++++++++++ 9 files changed, 321 insertions(+), 14 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_unary_5.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_unary_5_run.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_unary_6.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_unary_6_run.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_unary_7.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_unary_7_run.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_unary_8.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_unary_8_run.c diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 2be05ee..2ec9acb 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -2940,23 +2940,23 @@ ;; Predicated integer unary arithmetic with merging. (define_expand "@cond_" - [(set (match_operand:SVE_FULL_I 0 "register_operand") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand") + (unspec:SVE_I [(match_operand: 1 "register_operand") - (SVE_INT_UNARY:SVE_FULL_I - (match_operand:SVE_FULL_I 2 "register_operand")) - (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")] + (SVE_INT_UNARY:SVE_I + (match_operand:SVE_I 2 "register_operand")) + (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE" ) ;; Predicated integer unary arithmetic, merging with the first input. (define_insn "*cond__2" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (unspec:SVE_I [(match_operand: 1 "register_operand" "Upl, Upl") - (SVE_INT_UNARY:SVE_FULL_I - (match_operand:SVE_FULL_I 2 "register_operand" "0, w")) + (SVE_INT_UNARY:SVE_I + (match_operand:SVE_I 2 "register_operand" "0, w")) (match_dup 2)] UNSPEC_SEL))] "TARGET_SVE" @@ -2974,12 +2974,12 @@ ;; as earlyclobber helps to make the instruction more regular to the ;; register allocator. (define_insn "*cond__any" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w") + (unspec:SVE_I [(match_operand: 1 "register_operand" "Upl, Upl, Upl") - (SVE_INT_UNARY:SVE_FULL_I - (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")) - (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] + (SVE_INT_UNARY:SVE_I + (match_operand:SVE_I 2 "register_operand" "w, w, w")) + (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])" "@ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_5.c new file mode 100644 index 0000000..17b3f86 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_5.c @@ -0,0 +1,49 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include + +#define abs(A) ((A) < 0 ? -(A) : (A)) +#define neg(A) (-(A)) +#define not(A) (~(A)) + +#define DEF_LOOP(TYPE1, TYPE2, COUNT, OP) \ + void __attribute__ ((noipa)) \ + test_##TYPE1##_##TYPE2##_##OP (TYPE2 *__restrict r, \ + TYPE2 *__restrict a, \ + TYPE1 *__restrict pred) \ + { \ + for (int i = 0; i < COUNT; ++i) \ + r[i] = pred[i] ? OP (a[i]) : a[i]; \ + } + +#define TEST_TYPES(T, TYPE1, TYPE2, COUNT) \ + T (TYPE1, TYPE2, COUNT, abs) \ + T (TYPE1, TYPE2, COUNT, neg) \ + T (TYPE1, TYPE2, COUNT, not) + +#define TEST_ALL(T) \ + TEST_TYPES (T, int16_t, int8_t, 7) \ + TEST_TYPES (T, int32_t, int8_t, 3) \ + TEST_TYPES (T, int32_t, int16_t, 3) \ + TEST_TYPES (T, int64_t, int8_t, 5) \ + TEST_TYPES (T, int64_t, int16_t, 5) \ + TEST_TYPES (T, int64_t, int32_t, 5) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.b, p[0-7]/m,} 3 } } */ +/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.b, p[0-7]/m,} 3 } } */ +/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tnot\tz[0-9]+\.b, p[0-7]/m,} 3 } } */ +/* { dg-final { scan-assembler-times {\tnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tnot\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-not {\tmov\tz} } } */ +/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_5_run.c new file mode 100644 index 0000000..d6d5526 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_5_run.c @@ -0,0 +1,26 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "cond_unary_5.c" + +#define TEST_LOOP(TYPE1, TYPE2, N, OP) \ + { \ + TYPE1 pred[N]; \ + TYPE2 r[N], a[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + a[i] = (i & 1 ? i : 3 * i) * (i % 3 == 0 ? 1 : -1); \ + pred[i] = (i % 4 < 2); \ + asm volatile ("" ::: "memory"); \ + } \ + test_##TYPE1##_##TYPE2##_##OP (r, a, pred); \ + for (int i = 0; i < N; ++i) \ + if (r[i] != (pred[i] ? OP (a[i]) : a[i])) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_ALL (TEST_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_6.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_6.c new file mode 100644 index 0000000..1bd342b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_6.c @@ -0,0 +1,53 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include + +#define abs(A) ((A) < 0 ? -(A) : (A)) +#define neg(A) (-(A)) +#define not(A) (~(A)) + +#define DEF_LOOP(TYPE1, TYPE2, COUNT, OP) \ + void __attribute__ ((noipa)) \ + test_##TYPE1##_##TYPE2##_##OP (TYPE2 *__restrict r, \ + TYPE2 *__restrict a, \ + TYPE2 *__restrict b, \ + TYPE1 *__restrict pred) \ + { \ + for (int i = 0; i < COUNT; ++i) \ + { \ + TYPE2 bi = b[i]; \ + r[i] = pred[i] ? OP (a[i]) : bi; \ + } \ + } + +#define TEST_TYPES(T, TYPE1, TYPE2, COUNT) \ + T (TYPE1, TYPE2, COUNT, abs) \ + T (TYPE1, TYPE2, COUNT, neg) \ + T (TYPE1, TYPE2, COUNT, not) + +#define TEST_ALL(T) \ + TEST_TYPES (T, int16_t, int8_t, 7) \ + TEST_TYPES (T, int32_t, int8_t, 3) \ + TEST_TYPES (T, int32_t, int16_t, 3) \ + TEST_TYPES (T, int64_t, int8_t, 5) \ + TEST_TYPES (T, int64_t, int16_t, 5) \ + TEST_TYPES (T, int64_t, int32_t, 5) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.b, p[0-7]/m,} 3 } } */ +/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.b, p[0-7]/m,} 3 } } */ +/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tnot\tz[0-9]+\.b, p[0-7]/m,} 3 } } */ +/* { dg-final { scan-assembler-times {\tnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tnot\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-not {\tmov\tz} } } */ +/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_6_run.c new file mode 100644 index 0000000..04e4bd1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_6_run.c @@ -0,0 +1,27 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "cond_unary_6.c" + +#define TEST_LOOP(TYPE1, TYPE2, N, OP) \ + { \ + TYPE1 pred[N]; \ + TYPE2 r[N], a[N], b[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + a[i] = (i & 1 ? i : 3 * i) * (i % 3 == 0 ? 1 : -1); \ + b[i] = (i % 5) * (i % 6 + 3); \ + pred[i] = (i % 4 < 2); \ + asm volatile ("" ::: "memory"); \ + } \ + test_##TYPE1##_##TYPE2##_##OP (r, a, b, pred); \ + for (int i = 0; i < N; ++i) \ + if (r[i] != (pred[i] ? OP (a[i]) : b[i])) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_ALL (TEST_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_7.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_7.c new file mode 100644 index 0000000..0e11821 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_7.c @@ -0,0 +1,48 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include + +#define abs(A) ((A) < 0 ? -(A) : (A)) +#define neg(A) (-(A)) +#define not(A) (~(A)) + +#define DEF_LOOP(TYPE1, TYPE2, COUNT, OP) \ + void __attribute__ ((noipa)) \ + test_##TYPE1##_##TYPE2##_##OP (TYPE2 *__restrict r, \ + TYPE2 *__restrict a, \ + TYPE1 *__restrict pred) \ + { \ + for (int i = 0; i < COUNT; ++i) \ + r[i] = pred[i] ? OP (a[i]) : 5; \ + } + +#define TEST_TYPES(T, TYPE1, TYPE2, COUNT) \ + T (TYPE1, TYPE2, COUNT, abs) \ + T (TYPE1, TYPE2, COUNT, neg) \ + T (TYPE1, TYPE2, COUNT, not) + +#define TEST_ALL(T) \ + TEST_TYPES (T, int16_t, int8_t, 7) \ + TEST_TYPES (T, int32_t, int8_t, 3) \ + TEST_TYPES (T, int32_t, int16_t, 3) \ + TEST_TYPES (T, int64_t, int8_t, 5) \ + TEST_TYPES (T, int64_t, int16_t, 5) \ + TEST_TYPES (T, int64_t, int32_t, 5) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.b, p[0-7]/m,} 3 } } */ +/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.b, p[0-7]/m,} 3 } } */ +/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tnot\tz[0-9]+\.b, p[0-7]/m,} 3 } } */ +/* { dg-final { scan-assembler-times {\tnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tnot\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_7_run.c new file mode 100644 index 0000000..4460a79 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_7_run.c @@ -0,0 +1,26 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "cond_unary_7.c" + +#define TEST_LOOP(TYPE1, TYPE2, N, OP) \ + { \ + TYPE1 pred[N]; \ + TYPE2 r[N], a[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + a[i] = (i & 1 ? i : 3 * i) * (i % 3 == 0 ? 1 : -1); \ + pred[i] = (i % 4 < 2); \ + asm volatile ("" ::: "memory"); \ + } \ + test_##TYPE1##_##TYPE2##_##OP (r, a, pred); \ + for (int i = 0; i < N; ++i) \ + if (r[i] != (pred[i] ? OP (a[i]) : 5)) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_ALL (TEST_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_8.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_8.c new file mode 100644 index 0000000..96c53b7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_8.c @@ -0,0 +1,50 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include + +#define abs(A) ((A) < 0 ? -(A) : (A)) +#define neg(A) (-(A)) +#define not(A) (~(A)) + +#define DEF_LOOP(TYPE1, TYPE2, COUNT, OP) \ + void __attribute__ ((noipa)) \ + test_##TYPE1##_##TYPE2##_##OP (TYPE2 *__restrict r, \ + TYPE2 *__restrict a, \ + TYPE1 *__restrict pred) \ + { \ + for (int i = 0; i < COUNT; ++i) \ + r[i] = pred[i] ? OP (a[i]) : 0; \ + } + +#define TEST_TYPES(T, TYPE1, TYPE2, COUNT) \ + T (TYPE1, TYPE2, COUNT, abs) \ + T (TYPE1, TYPE2, COUNT, neg) \ + T (TYPE1, TYPE2, COUNT, not) + +#define TEST_ALL(T) \ + TEST_TYPES (T, int16_t, int8_t, 7) \ + TEST_TYPES (T, int32_t, int8_t, 3) \ + TEST_TYPES (T, int32_t, int16_t, 3) \ + TEST_TYPES (T, int64_t, int8_t, 5) \ + TEST_TYPES (T, int64_t, int16_t, 5) \ + TEST_TYPES (T, int64_t, int32_t, 5) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.b, p[0-7]/m,} 3 } } */ +/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.b, p[0-7]/m,} 3 } } */ +/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tnot\tz[0-9]+\.b, p[0-7]/m,} 3 } } */ +/* { dg-final { scan-assembler-times {\tnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tnot\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler {\tmovprfx\tz[^,]*, p[0-7]/z} } } */ + +/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_8_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_8_run.c new file mode 100644 index 0000000..55676d6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_8_run.c @@ -0,0 +1,28 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "cond_unary_8.c" + +#define N 99 + +#define TEST_LOOP(TYPE1, TYPE2, N, OP) \ + { \ + TYPE1 pred[N]; \ + TYPE2 r[N], a[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + a[i] = (i & 1 ? i : 3 * i) * (i % 3 == 0 ? 1 : -1); \ + pred[i] = (i % 4 < 2); \ + asm volatile ("" ::: "memory"); \ + } \ + test_##TYPE1##_##TYPE2##_##OP (r, a, pred); \ + for (int i = 0; i < N; ++i) \ + if (r[i] != (pred[i] ? OP (a[i]) : 0)) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_ALL (TEST_LOOP) + return 0; +} -- 2.7.4