Do not generate vmaddfp and vnmsubfp

author Michael Meissner <meissner@linux.ibm.com>

Mon, 10 Apr 2023 03:32:27 +0000 (23:32 -0400)

committer Michael Meissner <meissner@linux.ibm.com>

Mon, 10 Apr 2023 03:34:08 +0000 (23:34 -0400)
author Michael Meissner <meissner@linux.ibm.com>
Mon, 10 Apr 2023 03:32:27 +0000 (23:32 -0400)
committer Michael Meissner <meissner@linux.ibm.com>
Mon, 10 Apr 2023 03:34:08 +0000 (23:34 -0400)
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md

index 0865608..806ee43 100644 (file)
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -2009,22 +2009,23 @@
    "x<VSv>tsqrt<sd>p %0,%x1"
    [(set_attr "type" "<VStype_simple>")])
  
-;; Fused vector multiply/add instructions. Support the classical Altivec
-;; versions of fma, which allows the target to be a separate register from the
-;; 3 inputs.  Under VSX, the target must be either the addend or the first
-;; multiply.
-
+;; Fused vector multiply/add instructions. Do not generate the Altivec versions
+;; of fma (vmaddfp and vnmsubfp).  These instructions allows the target to be a
+;; separate register from the 3 inputs, which can possibly save an extra move
+;; being generated (assuming all registers are AltiVec registers).  However,
+;; vmaddfp and vnmsubfp can have different behaviors than the VSX instructions
+;; in some corner cases due to VSCR[NJ] being set or if the addend is +0.0
+;; instead of -0.0.
  (define_insn "*vsx_fmav4sf4"
-  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa")
         (fma:V4SF
-         (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
-         (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
-         (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))]
+         (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa")
+         (match_operand:V4SF 2 "vsx_register_operand" "wa,0")
+         (match_operand:V4SF 3 "vsx_register_operand" "0,wa")))]
    "VECTOR_UNIT_VSX_P (V4SFmode)"
    "@
     xvmaddasp %x0,%x1,%x2
-   xvmaddmsp %x0,%x1,%x3
-   vmaddfp %0,%1,%2,%3"
+   xvmaddmsp %x0,%x1,%x3"
    [(set_attr "type" "vecfloat")])
  
  (define_insn "*vsx_fmav2df4"
@@ -2066,18 +2067,17 @@
    [(set_attr "type" "<VStype_mul>")])
  
  (define_insn "*vsx_nfmsv4sf4"
-  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa")
         (neg:V4SF
          (fma:V4SF
-          (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
-          (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
+          (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa")
+          (match_operand:V4SF 2 "vsx_register_operand" "wa,0")
            (neg:V4SF
-            (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))))]
+            (match_operand:V4SF 3 "vsx_register_operand" "0,wa")))))]
    "VECTOR_UNIT_VSX_P (V4SFmode)"
    "@
     xvnmsubasp %x0,%x1,%x2
-   xvnmsubmsp %x0,%x1,%x3
-   vnmsubfp %0,%1,%2,%3"
+   xvnmsubmsp %x0,%x1,%x3"
    [(set_attr "type" "vecfloat")])
  
  (define_insn "*vsx_nfmsv2df4"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr70243.c b/gcc/testsuite/gcc.target/powerpc/pr70243.c

new file mode 100644 (file)

index 0000000..18a5ce7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr70243.c
@@ -0,0 +1,41 @@
+/* { dg-do compile */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+/* PR 70423, Make sure we don't generate vmaddfp or vnmsubfp.  These
+   instructions have different rounding modes than the VSX instructions
+   xvmaddsp and xvnmsubsp.  These tests are written where the 3 inputs and
+   target are all separate registers.  Because vmaddfp and vnmsubfp are no
+   longer generated the compiler will have to generate an xsmaddsp or xsnmsubsp
+   instruction followed by a move operation.  */
+
+#include <altivec.h>
+
+vector float
+do_add1 (vector float dummy, vector float a, vector float b, vector float c)
+{
+  return (a * b) + c;
+}
+
+vector float
+do_nsub1 (vector float dummy, vector float a, vector float b, vector float c)
+{
+  return -((a * b) - c);
+}
+
+vector float
+do_add2 (vector float dummy, vector float a, vector float b, vector float c)
+{
+  return vec_madd (a, b, c);
+}
+
+vector float
+do_nsub2 (vector float dummy, vector float a, vector float b, vector float c)
+{
+  return vec_nmsub (a, b, c);
+}
+
+/* { dg-final { scan-assembler     {\mxvmadd[am]sp\M}  } } */
+/* { dg-final { scan-assembler     {\mxvnmsub[am]sp\M} } } */
+/* { dg-final { scan-assembler-not {\mvmaddfp\M}       } } */
+/* { dg-final { scan-assembler-not {\mvnmsubfp\M}      } } */
author	Michael Meissner <meissner@linux.ibm.com>
	Mon, 10 Apr 2023 03:32:27 +0000 (23:32 -0400)
committer	Michael Meissner <meissner@linux.ibm.com>
	Mon, 10 Apr 2023 03:34:08 +0000 (23:34 -0400)
gcc/config/rs6000/vsx.md		patch \| blob \| history
gcc/testsuite/gcc.target/powerpc/pr70243.c	[new file with mode: 0644]	patch \| blob