From b23c6a2c604c0dccf14cc3461357a28ffb99ed88 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 19 Dec 2019 14:52:46 +0000 Subject: [PATCH] [AArch64] Fix handling of npatterns>1 constants for partial SVE modes For partial SVE vectors of element X, we want to treat duplicates of single X elements in the same way as for full vectors of X. But if a constant instead contains a repeating pattern of X elements, the transition from one value to the next must happen at container boundaries rather than element boundaries. E.g. a VNx4HI should in that case contain the same number of constants as a VNx4SI. Fixing this means that we need a reinterpret from the container-based mode to the partial mode; e.g. in the above example we need a reinterpret from VNx4SI to VNx4HI. We can't use subregs for that because they're forbidden by aarch64_can_change_class_mode; we should handle them in the same way as for big-endian instead. 2019-12-19 Richard Sandiford gcc/ * config/aarch64/aarch64.c (aarch64_simd_valid_immediate): When handling partial SVE vectors, use the container mode rather than the element mode if the constant isn't a single-element duplicate. * config/aarch64/aarch64-sve.md (@aarch64_sve_reinterpret): Check targetm.can_change_mode_class instead of BYTES_BIG_ENDIAN. gcc/testsuite/ * gcc.target/aarch64/sve/mixed_size_9.c: New test. From-SVN: r279580 --- gcc/ChangeLog | 8 ++++++++ gcc/config/aarch64/aarch64-sve.md | 3 ++- gcc/config/aarch64/aarch64.c | 22 +++++++++++++++++++--- gcc/testsuite/ChangeLog | 4 ++++ .../gcc.target/aarch64/sve/mixed_size_9.c | 18 ++++++++++++++++++ 5 files changed, 51 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/mixed_size_9.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9126ec1..308774d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2019-12-19 Richard Sandiford + + * config/aarch64/aarch64.c (aarch64_simd_valid_immediate): When + handling partial SVE vectors, use the container mode rather than + the element mode if the constant isn't a single-element duplicate. + * config/aarch64/aarch64-sve.md (@aarch64_sve_reinterpret): + Check targetm.can_change_mode_class instead of BYTES_BIG_ENDIAN. + 2019-12-19 Andrew Stubbs * config/gcn/gcn-valu.md (addv64si3): Rename to ... diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 1d9cdad..feb7559 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -694,7 +694,8 @@ UNSPEC_REINTERPRET))] "TARGET_SVE" { - if (!BYTES_BIG_ENDIAN) + machine_mode src_mode = GET_MODE (operands[1]); + if (targetm.can_change_mode_class (mode, src_mode, FP_REGS)) { emit_move_insn (operands[0], gen_lowpart (mode, operands[1])); DONE; diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 88baf96..a85f8b0 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -16826,12 +16826,28 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info, } } - unsigned int elt_size = GET_MODE_SIZE (elt_mode); + /* If all elements in an SVE vector have the same value, we have a free + choice between using the element mode and using the container mode. + Using the element mode means that unused parts of the vector are + duplicates of the used elements, while using the container mode means + that the unused parts are an extension of the used elements. Using the + element mode is better for (say) VNx4HI 0x101, since 0x01010101 is valid + for its container mode VNx4SI while 0x00000101 isn't. + + If not all elements in an SVE vector have the same value, we need the + transition from one element to the next to occur at container boundaries. + E.g. a fixed-length VNx4HI containing { 1, 2, 3, 4 } should be treated + in the same way as a VNx4SI containing { 1, 2, 3, 4 }. */ + scalar_int_mode elt_int_mode; + if ((vec_flags & VEC_SVE_DATA) && n_elts > 1) + elt_int_mode = aarch64_sve_container_int_mode (mode); + else + elt_int_mode = int_mode_for_mode (elt_mode).require (); + + unsigned int elt_size = GET_MODE_SIZE (elt_int_mode); if (elt_size > 8) return false; - scalar_int_mode elt_int_mode = int_mode_for_mode (elt_mode).require (); - /* Expand the vector constant out into a byte vector, with the least significant byte of the register first. */ auto_vec bytes; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 7d47488..bbca1aa 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,9 @@ 2019-12-19 Richard Sandiford + * gcc.target/aarch64/sve/mixed_size_9.c: New test. + +2019-12-19 Richard Sandiford + * gcc.target/aarch64/sve/mixed_size_8.c: New test. 2019-12-19 Richard Sandiford diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_9.c b/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_9.c new file mode 100644 index 0000000..5f78150 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_9.c @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model -msve-vector-bits=256" } */ +/* Originally from gcc.dg/vect/pr88598-4.c. */ + +#define N 4 + +int a[N]; + +int __attribute__ ((noipa)) +f2 (void) +{ + int b[N] = { 0, 31, 0, 31 }, res = 0; + for (int i = 0; i < N; ++i) + res += a[i] & b[i]; + return res; +} + +/* { dg-final { scan-assembler-not {\tmov\tz[0-9]\.d, #} } } */ +/* { dg-final { scan-assembler-not {\tstr\tz[0-9],} } } */ -- 2.7.4