"x<VSv>tsqrt<sd>p %0,%x1"
[(set_attr "type" "<VStype_simple>")])
-;; Fused vector multiply/add instructions. Support the classical Altivec
-;; versions of fma, which allows the target to be a separate register from the
-;; 3 inputs. Under VSX, the target must be either the addend or the first
-;; multiply.
-
+;; Fused vector multiply/add instructions. Do not generate the Altivec versions
+;; of fma (vmaddfp and vnmsubfp). These instructions allows the target to be a
+;; separate register from the 3 inputs, which can possibly save an extra move
+;; being generated (assuming all registers are AltiVec registers). However,
+;; vmaddfp and vnmsubfp can have different behaviors than the VSX instructions
+;; in some corner cases due to VSCR[NJ] being set or if the addend is +0.0
+;; instead of -0.0.
(define_insn "*vsx_fmav4sf4"
- [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa")
(fma:V4SF
- (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
- (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
- (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))]
+ (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa")
+ (match_operand:V4SF 2 "vsx_register_operand" "wa,0")
+ (match_operand:V4SF 3 "vsx_register_operand" "0,wa")))]
"VECTOR_UNIT_VSX_P (V4SFmode)"
"@
xvmaddasp %x0,%x1,%x2
- xvmaddmsp %x0,%x1,%x3
- vmaddfp %0,%1,%2,%3"
+ xvmaddmsp %x0,%x1,%x3"
[(set_attr "type" "vecfloat")])
(define_insn "*vsx_fmav2df4"
[(set_attr "type" "<VStype_mul>")])
(define_insn "*vsx_nfmsv4sf4"
- [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa")
(neg:V4SF
(fma:V4SF
- (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
- (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
+ (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa")
+ (match_operand:V4SF 2 "vsx_register_operand" "wa,0")
(neg:V4SF
- (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))))]
+ (match_operand:V4SF 3 "vsx_register_operand" "0,wa")))))]
"VECTOR_UNIT_VSX_P (V4SFmode)"
"@
xvnmsubasp %x0,%x1,%x2
- xvnmsubmsp %x0,%x1,%x3
- vnmsubfp %0,%1,%2,%3"
+ xvnmsubmsp %x0,%x1,%x3"
[(set_attr "type" "vecfloat")])
(define_insn "*vsx_nfmsv2df4"
--- /dev/null
+/* { dg-do compile */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+/* PR 70423, Make sure we don't generate vmaddfp or vnmsubfp. These
+ instructions have different rounding modes than the VSX instructions
+ xvmaddsp and xvnmsubsp. These tests are written where the 3 inputs and
+ target are all separate registers. Because vmaddfp and vnmsubfp are no
+ longer generated the compiler will have to generate an xsmaddsp or xsnmsubsp
+ instruction followed by a move operation. */
+
+#include <altivec.h>
+
+vector float
+do_add1 (vector float dummy, vector float a, vector float b, vector float c)
+{
+ return (a * b) + c;
+}
+
+vector float
+do_nsub1 (vector float dummy, vector float a, vector float b, vector float c)
+{
+ return -((a * b) - c);
+}
+
+vector float
+do_add2 (vector float dummy, vector float a, vector float b, vector float c)
+{
+ return vec_madd (a, b, c);
+}
+
+vector float
+do_nsub2 (vector float dummy, vector float a, vector float b, vector float c)
+{
+ return vec_nmsub (a, b, c);
+}
+
+/* { dg-final { scan-assembler {\mxvmadd[am]sp\M} } } */
+/* { dg-final { scan-assembler {\mxvnmsub[am]sp\M} } } */
+/* { dg-final { scan-assembler-not {\mvmaddfp\M} } } */
+/* { dg-final { scan-assembler-not {\mvnmsubfp\M} } } */