x86: Properly handle USE_VECTOR_FP_CONVERTS/USE_VECTOR_CONVERTS
authorH.J. Lu <hjl.tools@gmail.com>
Wed, 15 Sep 2021 06:17:58 +0000 (14:17 +0800)
committerliuhongt <hongtao.liu@intel.com>
Fri, 17 Sep 2021 08:17:57 +0000 (16:17 +0800)
Check TARGET_USE_VECTOR_FP_CONVERTS or TARGET_USE_VECTOR_CONVERTS when
handling avx_partial_xmm_update attribute.  Don't convert AVX partial
XMM register update if vector packed SSE conversion should be used.

gcc/

PR target/101900
* config/i386/i386-features.c (remove_partial_avx_dependency):
Check TARGET_USE_VECTOR_FP_CONVERTS and TARGET_USE_VECTOR_CONVERTS
before generating vxorps.

gcc/testsuite

PR target/101900
* gcc.target/i386/pr101900-1.c: New test.
* gcc.target/i386/pr101900-2.c: Likewise.
* gcc.target/i386/pr101900-3.c: Likewise.

gcc/config/i386/i386-features.c
gcc/testsuite/gcc.target/i386/pr101900-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr101900-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr101900-3.c [new file with mode: 0644]

index 5a99ea7..a65f601 100644 (file)
@@ -2210,15 +2210,32 @@ remove_partial_avx_dependency (void)
              != AVX_PARTIAL_XMM_UPDATE_TRUE)
            continue;
 
-         if (!v4sf_const0)
-           v4sf_const0 = gen_reg_rtx (V4SFmode);
-
          /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF,
             SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and
             vec_merge with subreg.  */
          rtx src = SET_SRC (set);
          rtx dest = SET_DEST (set);
          machine_mode dest_mode = GET_MODE (dest);
+         machine_mode src_mode = GET_MODE (XEXP (src, 0));
+
+         switch (src_mode)
+           {
+           case E_SFmode:
+           case E_DFmode:
+             if (TARGET_USE_VECTOR_FP_CONVERTS)
+               continue;
+             break;
+           case E_SImode:
+           case E_DImode:
+             if (TARGET_USE_VECTOR_CONVERTS)
+               continue;
+             break;
+           default:
+             break;
+           }
+
+         if (!v4sf_const0)
+           v4sf_const0 = gen_reg_rtx (V4SFmode);
 
          rtx zero;
          machine_mode dest_vecmode;
diff --git a/gcc/testsuite/gcc.target/i386/pr101900-1.c b/gcc/testsuite/gcc.target/i386/pr101900-1.c
new file mode 100644 (file)
index 0000000..0a45f8e
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=use_vector_fp_converts" } */
+
+extern float f;
+extern double d;
+extern int i;
+
+void
+foo (void)
+{
+  d = f;
+  f = i;
+}
+
+/* { dg-final { scan-assembler "vcvtps2pd" } } */
+/* { dg-final { scan-assembler "vcvtsi2ssl" } } */
+/* { dg-final { scan-assembler-not "vcvtss2sd" } } */
+/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr101900-2.c b/gcc/testsuite/gcc.target/i386/pr101900-2.c
new file mode 100644 (file)
index 0000000..c8b2d1d
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=use_vector_converts" } */
+
+extern float f;
+extern double d;
+extern int i;
+
+void
+foo (void)
+{
+  d = f;
+  f = i;
+}
+
+/* { dg-final { scan-assembler "vcvtss2sd" } } */
+/* { dg-final { scan-assembler "vcvtdq2ps" } } */
+/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */
+/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr101900-3.c b/gcc/testsuite/gcc.target/i386/pr101900-3.c
new file mode 100644 (file)
index 0000000..6ee565b
--- /dev/null
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=use_vector_fp_converts,use_vector_converts" } */
+
+extern float f;
+extern double d;
+extern int i;
+
+void
+foo (void)
+{
+  d = f;
+  f = i;
+}
+
+/* { dg-final { scan-assembler "vcvtps2pd" } } */
+/* { dg-final { scan-assembler "vcvtdq2ps" } } */
+/* { dg-final { scan-assembler-not "vcvtss2sd" } } */
+/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */
+/* { dg-final { scan-assembler-not "vxorps" } } */