From 1442e2031e0bc2d0a5bf88ef3c92c5410e044bab Mon Sep 17 00:00:00 2001 From: liuhongt Date: Tue, 18 Oct 2022 16:58:52 +0800 Subject: [PATCH] Canonicalize vec_perm index to make the first index come from the first vector. Fix unexpected non-canon form from gimple vector selector. gcc/ChangeLog: PR target/107271 * config/i386/i386-expand.cc (ix86_vec_perm_index_canon): New. (expand_vec_perm_shufps_shufps): Call ix86_vec_perm_index_canon gcc/testsuite/ChangeLog: * gcc.target/i386/pr107271.c: New test. --- gcc/config/i386/i386-expand.cc | 17 +++++++++++++++++ gcc/testsuite/gcc.target/i386/pr107271.c | 16 ++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr107271.c diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index a0f8a98..70fd82b 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -19604,6 +19604,22 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) return false; } +/* Canonicalize vec_perm index to make the first index + always comes from the first vector. */ +static void +ix86_vec_perm_index_canon (struct expand_vec_perm_d *d) +{ + unsigned nelt = d->nelt; + if (d->perm[0] < nelt) + return; + + for (unsigned i = 0; i != nelt; i++) + d->perm[i] = (d->perm[i] + nelt) % (2 * nelt); + + std::swap (d->op0, d->op1); + return; +} + /* A subroutine of ix86_expand_vec_perm_const_1. Try to implement D in terms of a pair of shufps+ shufps/pshufd instructions. */ static bool @@ -19621,6 +19637,7 @@ expand_vec_perm_shufps_shufps (struct expand_vec_perm_d *d) if (d->testing_p) return true; + ix86_vec_perm_index_canon (d); for (i = 0; i < 4; ++i) count += d->perm[i] > 3 ? 1 : 0; diff --git a/gcc/testsuite/gcc.target/i386/pr107271.c b/gcc/testsuite/gcc.target/i386/pr107271.c new file mode 100644 index 0000000..fe89c9a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr107271.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O0" } */ + +typedef int __attribute__((__vector_size__ (16))) V; + +static inline __attribute__((__always_inline__)) V +bar (V v128u32_0) +{ + return __builtin_shuffle ((V){}, v128u32_0, v128u32_0); +} + +V +foo (void) +{ + return bar ((V){7, 4, 4}); +} -- 2.7.4