From 877c9e332f9b2b6eacd6ed4edf5790ee0f41a68f Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 5 Jan 2022 20:06:03 +0100 Subject: [PATCH] i386: Fix expand_vec_perm_pshufb for narrow modes [PR103905] MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit 2022-01-05 Uroš Bizjak gcc/ChangeLog: PR target/103905 * config/i386/i386-expand.c (expand_vec_perm_pshufb): Fix number of narrow mode remapped elements for !one_operand_p case. gcc/testsuite/ChangeLog: PR target/103905 * gcc.target/i386/pr103905.c: New test. --- gcc/config/i386/i386-expand.c | 23 ++++++++++++----------- gcc/testsuite/gcc.target/i386/pr103905.c | 25 +++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr103905.c diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index e93ef1c..9bd8e53 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -18730,7 +18730,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) { unsigned i, nelt, eltsz, mask; unsigned char perm[64]; - machine_mode vmode = V16QImode; + machine_mode vmode; struct expand_vec_perm_d nd; rtx rperm[64], vperm, target, op0, op1; @@ -18754,6 +18754,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) case 16: if (!TARGET_XOP) return false; + vmode = V16QImode; break; case 32: @@ -18803,6 +18804,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) case 16: if (!TARGET_SSSE3) return false; + vmode = V16QImode; break; case 32: @@ -18894,6 +18896,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) /* Or if vpermps can be used. */ else if (d->vmode == V16SFmode) vmode = V16SImode; + if (vmode == V64QImode) { /* vpshufb only works intra lanes, it is not @@ -18946,8 +18949,10 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) machine_mode vpmode = vmode; - if (vmode == V4QImode - || vmode == V8QImode) + nelt = GET_MODE_SIZE (vmode); + + /* Emulate narrow modes with V16QI instructions. */ + if (nelt < 16) { rtx m128 = GEN_INT (-128); @@ -18955,19 +18960,15 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) account for inactive top elements from the first operand. */ if (!d->one_operand_p) { - int sz = GET_MODE_SIZE (vmode); - for (i = 0; i < nelt; ++i) { - int ival = INTVAL (rperm[i]); - if (ival >= sz) - ival += 16-sz; - rperm[i] = GEN_INT (ival); + unsigned ival = UINTVAL (rperm[i]); + if (ival >= nelt) + rperm[i] = GEN_INT (ival + 16 - nelt); } } - /* V4QI/V8QI is emulated with V16QI instruction, fill inactive - elements in the top positions with zeros. */ + /* Fill inactive elements in the top positions with zeros. */ for (i = nelt; i < 16; ++i) rperm[i] = m128; diff --git a/gcc/testsuite/gcc.target/i386/pr103905.c b/gcc/testsuite/gcc.target/i386/pr103905.c new file mode 100644 index 0000000..aef9c4d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr103905.c @@ -0,0 +1,25 @@ +/* PR target/103905 */ +/* { dg-do run } */ +/* { dg-require-effective-target xop } */ +/* { dg-options "-O3 -mxop" } */ + +#include "xop-check.h" + +char perm[64]; + +void +__attribute__((noipa)) +foo (int n) +{ + for (int i = 0; i < n; ++i) + perm[i] = i; +} + +static void +xop_test (void) +{ + foo (8); + + if (perm[7] != 7) + __builtin_abort (); +} -- 2.7.4