From 08e69332881f8d28ce8b559ffba1900ae5c0d5ee Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Sun, 27 Mar 2022 11:07:39 -0700
Subject: [PATCH] x86: Use Yw constraint on *ssse3_pshufbv8qi3

Since AVX512VL and AVX512BW are required for AVX512 VPSHUFB, replace the
"Yv" register constraint with the "Yw" register constraint.

gcc/

	PR target/105068
	* config/i386/sse.md (*ssse3_pshufbv8qi3): Replace "Yv" with
	"Yw".

gcc/testsuite/

	PR target/105068
	* gcc.target/i386/pr105068.c: New test.
---
 gcc/config/i386/sse.md                   |  6 ++--
 gcc/testsuite/gcc.target/i386/pr105068.c | 47 ++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr105068.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 33bd2c4..58d2bd9 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -20758,9 +20758,9 @@
 })
 
 (define_insn_and_split "*ssse3_pshufbv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
-	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
-		      (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
+	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yw")
+		      (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")
 		      (match_operand:V4SI 4 "reg_or_const_vector_operand"
 					  "i,3,3")]
 		     UNSPEC_PSHUFB))
diff --git a/gcc/testsuite/gcc.target/i386/pr105068.c b/gcc/testsuite/gcc.target/i386/pr105068.c
new file mode 100644
index 0000000..e5fb033
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr105068.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-Og -march=x86-64 -mavx512vl -fsanitize=thread -fstack-protector-all" } */
+
+typedef char __attribute__((__vector_size__(8))) C;
+typedef int __attribute__((__vector_size__(8))) U;
+typedef int __attribute__((__vector_size__(16))) V;
+typedef int __attribute__((__vector_size__(32))) W;
+typedef long long __attribute__((__vector_size__(64))) L;
+typedef _Float64 __attribute__((__vector_size__(16))) F;
+typedef _Float64 __attribute__((__vector_size__(64))) G;
+C c;
+int i;
+
+U foo0( W v256u32_0,
+           W v256s32_0,
+           V v128u64_0,
+           V v128s64_0,
+           W v256u64_0,
+           W v256s64_0,
+           L v512s64_0,
+           W v256u128_0,
+           W v256s128_0,
+           V v128f32_0,
+           W v256f32_0,
+           F F_0,
+           W v256f64_0,
+           G G_0) {
+  C U_1 = __builtin_ia32_pshufb(c, c);
+  G_0 += __builtin_convertvector(v512s64_0, G);
+  F F_1 = __builtin_shufflevector(F_0, G_0, 2, 2);
+  W W_r = v256u32_0 + v256s32_0 + v256u64_0 + v256s64_0 + v256u128_0 +
+                    v256s128_0 + v256f32_0 + v256f64_0;
+  V V_r = ((union {
+                      W a;
+                      V b;
+                    })W_r)
+                        .b +
+                    i + v128u64_0 + v128s64_0 + v128f32_0 +
+                    (V)F_1;
+  U U_r = ((union {
+                    V a;
+                    U b;
+                  })V_r)
+                      .b +
+                  (U)U_1;
+  return U_r;
+}
-- 
2.7.4