From bca5a9971f47cf5fe79e6595beb762539f200f46 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 14 Aug 2019 09:02:47 +0000 Subject: [PATCH] [AArch64] Add support for SVE CLS and CLZ This patch adds support for unpredicated SVE CLS and CLZ. A later patch will add support for predicated unary integer arithmetic. 2019-08-14 Richard Sandiford gcc/ * config/aarch64/iterators.md (SVE_INT_UNARY): Add clrsb and clz. (optab, sve_int_op): Handle them. * config/aarch64/aarch64-sve.md: Expand comment. gcc/testsuite/ * gcc.target/aarch64/vect-clz.c: Force SVE off. * gcc.target/aarch64/sve/clrsb_1.c: New test. * gcc.target/aarch64/sve/clrsb_1_run.c: Likewise. * gcc.target/aarch64/sve/clz_1.c: Likewise. * gcc.target/aarch64/sve/clz_1_run.c: Likewise. From-SVN: r274437 --- gcc/ChangeLog | 6 +++ gcc/config/aarch64/aarch64-sve.md | 2 + gcc/config/aarch64/iterators.md | 6 ++- gcc/testsuite/ChangeLog | 8 ++++ gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c | 22 ++++++++++ gcc/testsuite/gcc.target/aarch64/sve/clrsb_1_run.c | 50 ++++++++++++++++++++++ gcc/testsuite/gcc.target/aarch64/sve/clz_1.c | 22 ++++++++++ gcc/testsuite/gcc.target/aarch64/sve/clz_1_run.c | 50 ++++++++++++++++++++++ gcc/testsuite/gcc.target/aarch64/vect-clz.c | 2 + 9 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/clrsb_1_run.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/clz_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/clz_1_run.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c325192..b5b4414 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,11 @@ 2019-08-14 Richard Sandiford + * config/aarch64/iterators.md (SVE_INT_UNARY): Add clrsb and clz. + (optab, sve_int_op): Handle them. + * config/aarch64/aarch64-sve.md: Expand comment. + +2019-08-14 Richard Sandiford + * config/aarch64/predicates.md (const_1_to_3_operand): New predicate. * config/aarch64/aarch64-sve.md (*aarch64_adr_uxtw) (*aarch64_adr_shift, *aarch64_adr_shift_uxtw): New patterns. diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 6fed4ef..7e696d8 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -1422,6 +1422,8 @@ ;; ------------------------------------------------------------------------- ;; Includes: ;; - ABS +;; - CLS (= clrsb) +;; - CLZ ;; - CNT (= popcount) ;; - NEG ;; - NOT diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 75e672b..bc5b96a 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1276,7 +1276,7 @@ (define_code_iterator FAC_COMPARISONS [lt le ge gt]) ;; SVE integer unary operations. -(define_code_iterator SVE_INT_UNARY [abs neg not popcount]) +(define_code_iterator SVE_INT_UNARY [abs neg not clrsb clz popcount]) ;; SVE integer binary operations. (define_code_iterator SVE_INT_BINARY [plus minus mult smax umax smin umin @@ -1307,6 +1307,8 @@ (unsigned_fix "fixuns") (float "float") (unsigned_float "floatuns") + (clrsb "clrsb") + (clz "clz") (popcount "popcount") (and "and") (ior "ior") @@ -1474,6 +1476,8 @@ (ior "orr") (xor "eor") (not "not") + (clrsb "cls") + (clz "clz") (popcount "cnt")]) (define_code_attr sve_int_op_rev [(plus "add") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c33667e..1a2d800 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,13 @@ 2019-08-14 Richard Sandiford + * gcc.target/aarch64/vect-clz.c: Force SVE off. + * gcc.target/aarch64/sve/clrsb_1.c: New test. + * gcc.target/aarch64/sve/clrsb_1_run.c: Likewise. + * gcc.target/aarch64/sve/clz_1.c: Likewise. + * gcc.target/aarch64/sve/clz_1_run.c: Likewise. + +2019-08-14 Richard Sandiford + * gcc.target/aarch64/sve/adr_1.c: New test. * gcc.target/aarch64/sve/adr_1_run.c: Likewise. * gcc.target/aarch64/sve/adr_2.c: Likewise. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c b/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c new file mode 100644 index 0000000..bdc9856 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c @@ -0,0 +1,22 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O2 -ftree-vectorize --save-temps" } */ + +#include + +void __attribute__ ((noinline, noclone)) +clrsb_32 (unsigned int *restrict dst, uint32_t *restrict src, int size) +{ + for (int i = 0; i < size; ++i) + dst[i] = __builtin_clrsb (src[i]); +} + +void __attribute__ ((noinline, noclone)) +clrsb_64 (unsigned int *restrict dst, uint64_t *restrict src, int size) +{ + for (int i = 0; i < size; ++i) + dst[i] = __builtin_clrsbll (src[i]); +} + +/* { dg-final { scan-assembler-times {\tcls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcls\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1_run.c new file mode 100644 index 0000000..287630d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1_run.c @@ -0,0 +1,50 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "clrsb_1.c" + +extern void abort (void) __attribute__ ((noreturn)); + +unsigned int data[] = { + 0xffffff80, 24, + 0xffffffff, 31, + 0x00000000, 31, + 0x80000000, 0, + 0x7fffffff, 0, + 0x000003ff, 21, + 0x1fffffff, 2, + 0x0000ffff, 15, + 0xffff0000, 15 +}; + +int __attribute__ ((optimize (1))) +main (void) +{ + unsigned int count = sizeof (data) / sizeof (data[0]) / 2; + + uint32_t in32[count]; + unsigned int out32[count]; + for (unsigned int i = 0; i < count; ++i) + { + in32[i] = data[i * 2]; + asm volatile ("" ::: "memory"); + } + clrsb_32 (out32, in32, count); + for (unsigned int i = 0; i < count; ++i) + if (out32[i] != data[i * 2 + 1]) + abort (); + + uint64_t in64[count]; + unsigned int out64[count]; + for (unsigned int i = 0; i < count; ++i) + { + in64[i] = (uint64_t) data[i * 2] << 32; + asm volatile ("" ::: "memory"); + } + clrsb_64 (out64, in64, count); + for (unsigned int i = 0; i < count; ++i) + if (out64[i] != (data[i * 2] ? data[i * 2 + 1] : 63)) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c b/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c new file mode 100644 index 0000000..0c7a4e6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c @@ -0,0 +1,22 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O2 -ftree-vectorize --save-temps" } */ + +#include + +void __attribute__ ((noinline, noclone)) +clz_32 (unsigned int *restrict dst, uint32_t *restrict src, int size) +{ + for (int i = 0; i < size; ++i) + dst[i] = __builtin_clz (src[i]); +} + +void __attribute__ ((noinline, noclone)) +clz_64 (unsigned int *restrict dst, uint64_t *restrict src, int size) +{ + for (int i = 0; i < size; ++i) + dst[i] = __builtin_clzll (src[i]); +} + +/* { dg-final { scan-assembler-times {\tclz\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tclz\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clz_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/clz_1_run.c new file mode 100644 index 0000000..12d9cf2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/clz_1_run.c @@ -0,0 +1,50 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "clz_1.c" + +extern void abort (void) __attribute__ ((noreturn)); + +unsigned int data[] = { + 0xffffff80, 0, + 0xffffffff, 0, + 0x00000000, 32, + 0x80000000, 0, + 0x7fffffff, 1, + 0x000003ff, 22, + 0x1fffffff, 3, + 0x0000ffff, 16, + 0xffff0000, 0 +}; + +int __attribute__ ((optimize (1))) +main (void) +{ + unsigned int count = sizeof (data) / sizeof (data[0]) / 2; + + uint32_t in32[count]; + unsigned int out32[count]; + for (unsigned int i = 0; i < count; ++i) + { + in32[i] = data[i * 2]; + asm volatile ("" ::: "memory"); + } + clz_32 (out32, in32, count); + for (unsigned int i = 0; i < count; ++i) + if (out32[i] != data[i * 2 + 1]) + abort (); + + uint64_t in64[count]; + unsigned int out64[count]; + for (unsigned int i = 0; i < count; ++i) + { + in64[i] = (uint64_t) data[i * 2] << 10; + asm volatile ("" ::: "memory"); + } + clz_64 (out64, in64, count); + for (unsigned int i = 0; i < count; ++i) + if (out64[i] != (data[i * 2] ? data[i * 2 + 1] + 22 : 64)) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/vect-clz.c b/gcc/testsuite/gcc.target/aarch64/vect-clz.c index 044fa9e..cd181c3 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-clz.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-clz.c @@ -1,6 +1,8 @@ /* { dg-do run } */ /* { dg-options "-O3 -save-temps -fno-inline -fno-vect-cost-model" } */ +#pragma GCC target "+nosve" + extern void abort (); void -- 2.7.4