From e0e82856d535f56c916382f892ed2435dde54d4d Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 20 Jul 2021 17:26:10 +0200 Subject: [PATCH] rs6000: Fix up easy_vector_constant_msb handling [PR101384] The following gcc.dg/pr101384.c testcase is miscompiled on powerpc64le-linux. easy_altivec_constant has code to try construct vector constants with different element sizes, perhaps different from CONST_VECTOR's mode. But as written, that works fine for vspltis[bhw] cases, but not for the vspltisw x,-1; vsl[bhw] x,x,x case, because that creates always a V16QImode, V8HImode or V4SImode constant containing broadcasted constant with just the MSB set. The vspltis_constant function etc. expects the vspltis[bhw] instructions where the small [-16..15] or even [-32..30] constant is sign-extended to the remaining step bytes, but that is not the case for the 0x80...00 constants, with step > 1 we can't handle e.g. { 0x80, 0xff, 0xff, 0xff, 0x80, 0xff, 0xff, 0xff, 0x80, 0xff, 0xff, 0xff, 0x80, 0xff, 0xff, 0xff } vectors but do want to handle e.g. { 0, 0, 0, 0x80, 0, 0, 0, 0x80, 0, 0, 0, 0x80, 0, 0, 0, 0x80 } and similarly with copies > 1 we do want to handle e.g. { 0x80808080, 0x80808080, 0x80808080, 0x80808080 }. 2021-07-20 Jakub Jelinek PR target/101384 * config/rs6000/rs6000-protos.h (easy_altivec_constant): Change return type from bool to int. * config/rs6000/rs6000.c (vspltis_constant): Fix up handling the EASY_VECTOR_MSB case if either step or copies is not 1. (vspltis_shifted): Fix comment typo. (easy_altivec_constant): Change return type from bool to int, instead of returning true return byte size of the element mode that should be used to synthetize the constant. * config/rs6000/predicates.md (easy_vector_constant_msb): Require that vspltis_shifted is 0, handle the case where easy_altivec_constant assumes using different vector mode from CONST_VECTOR's mode. * config/rs6000/altivec.md (easy_vector_constant_msb splitter): Use easy_altivec_constant to determine mode in which -1 >> -1 should be performed, use rs6000_expand_vector_init instead of gen_vec_initv4sisi. * gcc.dg/pr101384.c: New test. * gcc.target/powerpc/pr101384-1.c: New test. * gcc.target/powerpc/pr101384-2.c: New test. --- gcc/config/rs6000/altivec.md | 19 +++++-- gcc/config/rs6000/predicates.md | 17 +++++- gcc/config/rs6000/rs6000-protos.h | 2 +- gcc/config/rs6000/rs6000.c | 59 ++++++++++++++------ gcc/testsuite/gcc.dg/pr101384.c | 39 +++++++++++++ gcc/testsuite/gcc.target/powerpc/pr101384-1.c | 79 +++++++++++++++++++++++++++ gcc/testsuite/gcc.target/powerpc/pr101384-2.c | 79 +++++++++++++++++++++++++++ 7 files changed, 268 insertions(+), 26 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr101384.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr101384-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr101384-2.c diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index a20d6ac..d70c17e 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -317,22 +317,33 @@ [(const_int 0)] { rtx dest = operands[0]; - machine_mode mode = GET_MODE (operands[0]); + machine_mode mode; rtvec v; int i, num_elements; - if (mode == V4SFmode) + switch (easy_altivec_constant (operands[1], mode)) { + case 1: + mode = V16QImode; + break; + case 2: + mode = V8HImode; + break; + case 4: mode = V4SImode; - dest = gen_lowpart (V4SImode, dest); + break; + default: + gcc_unreachable (); } + if (mode != mode) + dest = gen_lowpart (mode, dest); num_elements = GET_MODE_NUNITS (mode); v = rtvec_alloc (num_elements); for (i = 0; i < num_elements; i++) RTVEC_ELT (v, i) = constm1_rtx; - emit_insn (gen_vec_initv4sisi (dest, gen_rtx_PARALLEL (mode, v))); + rs6000_expand_vector_init (dest, gen_rtx_PARALLEL (mode, v)); emit_insn (gen_rtx_SET (dest, gen_rtx_ASHIFT (mode, dest, dest))); DONE; }) diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 121cbf1..956e42b 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -683,15 +683,26 @@ (define_predicate "easy_vector_constant_msb" (and (match_code "const_vector") (and (match_test "TARGET_ALTIVEC") - (match_test "easy_altivec_constant (op, mode)"))) + (match_test "easy_altivec_constant (op, mode)") + (match_test "vspltis_shifted (op) == 0"))) { HOST_WIDE_INT val; - int elt; + int elt, sz = easy_altivec_constant (op, mode); + machine_mode inner = GET_MODE_INNER (mode); + int isz = GET_MODE_SIZE (inner); if (mode == V2DImode || mode == V2DFmode) return 0; elt = BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 : 0; + if (isz < sz) + { + if (const_vector_elt_as_int (op, elt) != 0) + return 0; + elt += (BYTES_BIG_ENDIAN ? -1 : 1) * (sz - isz) / isz; + } + else if (isz > sz) + inner = smallest_int_mode_for_size (sz * BITS_PER_UNIT); val = const_vector_elt_as_int (op, elt); - return EASY_VECTOR_MSB (val, GET_MODE_INNER (mode)); + return EASY_VECTOR_MSB (val, inner); }) ;; Return true if this is an easy altivec constant that we form diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 94bf961..14f6b31 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -30,7 +30,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int, tree, machine_mode); #endif /* TREE_CODE */ -extern bool easy_altivec_constant (rtx, machine_mode); +extern int easy_altivec_constant (rtx, machine_mode); extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *); extern int vspltis_shifted (rtx); extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 779de95..279f00c 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -6134,6 +6134,27 @@ vspltis_constant (rtx op, unsigned step, unsigned copies) splat_val = val; msb_val = val >= 0 ? 0 : -1; + if (val == 0 && step > 1) + { + /* Special case for loading most significant bit with step > 1. + In that case, match 0s in all but step-1s elements, where match + EASY_VECTOR_MSB. */ + for (i = 1; i < nunits; ++i) + { + unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i; + HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt); + if ((i & (step - 1)) == step - 1) + { + if (!EASY_VECTOR_MSB (elt_val, inner)) + break; + } + else if (elt_val) + break; + } + if (i == nunits) + return true; + } + /* Construct the value to be splatted, if possible. If not, return 0. */ for (i = 2; i <= copies; i *= 2) { @@ -6146,6 +6167,7 @@ vspltis_constant (rtx op, unsigned step, unsigned copies) | (small_val & mask))) return false; splat_val = small_val; + inner = smallest_int_mode_for_size (bitsize); } /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */ @@ -6160,8 +6182,9 @@ vspltis_constant (rtx op, unsigned step, unsigned copies) ; /* Also check if are loading up the most significant bit which can be done by - loading up -1 and shifting the value left by -1. */ - else if (EASY_VECTOR_MSB (splat_val, inner)) + loading up -1 and shifting the value left by -1. Only do this for + step 1 here, for larger steps it is done earlier. */ + else if (EASY_VECTOR_MSB (splat_val, inner) && step == 1) ; else @@ -6271,15 +6294,15 @@ vspltis_shifted (rtx op) } } - /* If all elements are equal, we don't need to do VLSDOI. */ + /* If all elements are equal, we don't need to do VSLDOI. */ return 0; } -/* Return true if OP is of the given MODE and can be synthesized - with a vspltisb, vspltish or vspltisw. */ +/* Return non-zero (element mode byte size) if OP is of the given MODE + and can be synthesized with a vspltisb, vspltish or vspltisw. */ -bool +int easy_altivec_constant (rtx op, machine_mode mode) { unsigned step, copies; @@ -6287,39 +6310,39 @@ easy_altivec_constant (rtx op, machine_mode mode) if (mode == VOIDmode) mode = GET_MODE (op); else if (mode != GET_MODE (op)) - return false; + return 0; /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy constants. */ if (mode == V2DFmode) - return zero_constant (op, mode); + return zero_constant (op, mode) ? 8 : 0; else if (mode == V2DImode) { if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0)) || !CONST_INT_P (CONST_VECTOR_ELT (op, 1))) - return false; + return 0; if (zero_constant (op, mode)) - return true; + return 8; if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1) - return true; + return 8; - return false; + return 0; } /* V1TImode is a special container for TImode. Ignore for now. */ else if (mode == V1TImode) - return false; + return 0; /* Start with a vspltisw. */ step = GET_MODE_NUNITS (mode) / 4; copies = 1; if (vspltis_constant (op, step, copies)) - return true; + return 4; /* Then try with a vspltish. */ if (step == 1) @@ -6328,7 +6351,7 @@ easy_altivec_constant (rtx op, machine_mode mode) step >>= 1; if (vspltis_constant (op, step, copies)) - return true; + return 2; /* And finally a vspltisb. */ if (step == 1) @@ -6337,12 +6360,12 @@ easy_altivec_constant (rtx op, machine_mode mode) step >>= 1; if (vspltis_constant (op, step, copies)) - return true; + return 1; if (vspltis_shifted (op) != 0) - return true; + return GET_MODE_SIZE (GET_MODE_INNER (mode)); - return false; + return 0; } /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose diff --git a/gcc/testsuite/gcc.dg/pr101384.c b/gcc/testsuite/gcc.dg/pr101384.c new file mode 100644 index 0000000..7030c0a --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr101384.c @@ -0,0 +1,39 @@ +/* PR target/101384 */ +/* { dg-do run } */ +/* { dg-options "-O2 -Wno-psabi -w" } */ + +typedef unsigned char __attribute__((__vector_size__ (16))) U; +typedef unsigned short __attribute__((__vector_size__ (8 * sizeof (short)))) V; + +U u; +V v; + +__attribute__((noipa)) U +foo (void) +{ + U y = (U) { 0x80, 0xff, 0xff, 0xff, 0x80, 0xff, 0xff, 0xff, + 0x80, 0xff, 0xff, 0xff, 0x80, 0xff, 0xff, 0xff } + u; + return y; +} + +__attribute__((noipa)) V +bar (void) +{ + V y = (V) { 0x8000, 0xffff, 0x8000, 0xffff, + 0x8000, 0xffff, 0x8000, 0xffff } + v; + return y; +} + +int +main () +{ + U x = foo (); + for (unsigned i = 0; i < 16; i++) + if (x[i] != ((i & 3) ? 0xff : 0x80)) + __builtin_abort (); + V y = bar (); + for (unsigned i = 0; i < 8; i++) + if (y[i] != ((i & 1) ? 0xffff : 0x8000)) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/pr101384-1.c b/gcc/testsuite/gcc.target/powerpc/pr101384-1.c new file mode 100644 index 0000000..627d7d7 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr101384-1.c @@ -0,0 +1,79 @@ +/* PR target/101384 */ +/* { dg-do compile { target le } } */ +/* { dg-options "-O2 -maltivec" } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-final { scan-assembler-times {\mvspltis[whb] [^\n\r]*,-1\M} 9 } } */ +/* { dg-final { scan-assembler-times {\mvslw\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mvslh\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mvslb\M} 3 } } */ + +typedef unsigned char __attribute__((__vector_size__ (16))) U; +typedef unsigned short __attribute__((__vector_size__ (16))) V; +typedef unsigned int __attribute__((__vector_size__ (16))) W; + +U u; +V v; +W w; + +U +f1 (void) +{ + U y = (U) { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 } + u; + return y; +} + +U +f2 (void) +{ + U y = (U) { 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80 } + u; + return y; +} + +U +f3 (void) +{ + U y = (U) { 0, 0, 0, 0x80, 0, 0, 0, 0x80, 0, 0, 0, 0x80, 0, 0, 0, 0x80 } + u; + return y; +} + +V +f4 (void) +{ + V y = (V) { 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080 } + v; + return y; +} + +V +f5 (void) +{ + V y = (V) { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 } + v; + return y; +} + +V +f6 (void) +{ + V y = (V) { 0, 0x8000, 0, 0x8000, 0, 0x8000, 0, 0x8000 } + v; + return y; +} + +W +f7 (void) +{ + W y = (W) { 0x80808080, 0x80808080, 0x80808080, 0x80808080 } + w; + return y; +} + +W +f8 (void) +{ + W y = (W) { 0x80008000, 0x80008000, 0x80008000, 0x80008000 } + w; + return y; +} + +W +f9 (void) +{ + W y = (W) { 0x80000000, 0x80000000, 0x80000000, 0x80000000 } + w; + return y; +} diff --git a/gcc/testsuite/gcc.target/powerpc/pr101384-2.c b/gcc/testsuite/gcc.target/powerpc/pr101384-2.c new file mode 100644 index 0000000..f395708 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr101384-2.c @@ -0,0 +1,79 @@ +/* PR target/101384 */ +/* { dg-do compile { target be } } */ +/* { dg-options "-O2 -maltivec" } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-final { scan-assembler-times {\mvspltis[whb] [^\n\r]*,-1\M} 9 } } */ +/* { dg-final { scan-assembler-times {\mvslw\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mvslh\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mvslb\M} 3 } } */ + +typedef unsigned char __attribute__((__vector_size__ (16))) U; +typedef unsigned short __attribute__((__vector_size__ (16))) V; +typedef unsigned int __attribute__((__vector_size__ (16))) W; + +U u; +V v; +W w; + +U +f1 (void) +{ + U y = (U) { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 } + u; + return y; +} + +U +f2 (void) +{ + U y = (U) { 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0 } + u; + return y; +} + +U +f3 (void) +{ + U y = (U) { 0x80, 0, 0, 0, 0x80, 0, 0, 0, 0x80, 0, 0, 0, 0x80, 0, 0, 0 } + u; + return y; +} + +V +f4 (void) +{ + V y = (V) { 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080 } + v; + return y; +} + +V +f5 (void) +{ + V y = (V) { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 } + v; + return y; +} + +V +f6 (void) +{ + V y = (V) { 0x8000, 0, 0x8000, 0, 0x8000, 0, 0x8000, 0 } + v; + return y; +} + +W +f7 (void) +{ + W y = (W) { 0x80808080, 0x80808080, 0x80808080, 0x80808080 } + w; + return y; +} + +W +f8 (void) +{ + W y = (W) { 0x80008000, 0x80008000, 0x80008000, 0x80008000 } + w; + return y; +} + +W +f9 (void) +{ + W y = (W) { 0x80000000, 0x80000000, 0x80000000, 0x80000000 } + w; + return y; +} -- 2.7.4