i386: Fix wrong codegen for V8HF move without TARGET_AVX512F
authorHongyu Wang <hongyu.wang@intel.com>
Wed, 20 Oct 2021 05:13:39 +0000 (13:13 +0800)
committerHongyu Wang <hongyu.wang@intel.com>
Thu, 21 Oct 2021 08:58:28 +0000 (16:58 +0800)
Since _Float16 type is enabled under sse2 target, returning
V8HFmode vector without AVX512F target would generate wrong
vmovdqa64 instruction. Adjust ix86_get_ssemov to avoid this.

gcc/ChangeLog:
PR target/102812
* config/i386/i386.c (ix86_get_ssemov): Adjust HFmode vector
move to use the same logic as HImode.

gcc/testsuite/ChangeLog:
PR target/102812
* gcc.target/i386/pr102812.c: New test.

gcc/config/i386/i386.c
gcc/testsuite/gcc.target/i386/pr102812.c [new file with mode: 0644]

index e3988f8..299e1ab 100644 (file)
@@ -5399,9 +5399,18 @@ ix86_get_ssemov (rtx *operands, unsigned size,
       switch (scalar_mode)
        {
        case E_HFmode:
-         opcode = (misaligned_p
-                   ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
-                   : "vmovdqa64");
+         if (evex_reg_p)
+           opcode = (misaligned_p
+                     ? (TARGET_AVX512BW
+                        ? "vmovdqu16"
+                        : "vmovdqu64")
+                     : "vmovdqa64");
+         else
+           opcode = (misaligned_p
+                     ? (TARGET_AVX512BW
+                        ? "vmovdqu16"
+                        : "%vmovdqu")
+                     : "%vmovdqa");
          break;
        case E_SFmode:
          opcode = misaligned_p ? "%vmovups" : "%vmovaps";
diff --git a/gcc/testsuite/gcc.target/i386/pr102812.c b/gcc/testsuite/gcc.target/i386/pr102812.c
new file mode 100644 (file)
index 0000000..bad4fa9
--- /dev/null
@@ -0,0 +1,12 @@
+/* PR target/102812 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4 -mno-avx" } */
+/* { dg-final { scan-assembler-not "vmovdqa64\t" } } */
+/* { dg-final { scan-assembler "movdqa\t" } } */
+
+typedef _Float16 v8hf __attribute__((__vector_size__ (16)));
+
+v8hf t (_Float16 a)
+{
+    return (v8hf) {a, 0, 0, 0, 0, 0, 0, 0};
+}