From 045153299658ed71708347f6cb0c14ccdcdf2de2 Mon Sep 17 00:00:00 2001 From: Olivier Sallenave Date: Wed, 7 Jan 2015 20:54:17 +0000 Subject: [PATCH] More FMA folding opportunities. llvm-svn: 225380 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 134 +++++++++++++++++++++++++- llvm/test/CodeGen/PowerPC/fma-assoc.ll | 79 +++++++++++++++ llvm/test/CodeGen/PowerPC/fma-ext.ll | 93 ++++++++++++++++++ 3 files changed, 305 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/PowerPC/fma-assoc.ll create mode 100644 llvm/test/CodeGen/PowerPC/fma-ext.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 64cc1f5..c68b1e33 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6878,7 +6878,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } } // enable-unsafe-fp-math - // FADD -> FMA combines: if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && TLI.isFMAFasterThanFMulAndFAdd(VT) && @@ -6896,6 +6895,50 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1.getOperand(0), N1.getOperand(1), N0); + + // Remove FP_EXTEND when there is an opportunity to combine. This is + // legal here since extra precision is allowed. + + // fold (fadd (fpext (fmul x, y)), z) -> (fma x, y, z) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N00.getOperand(0), N00.getOperand(1), N1); + } + + // fold (fadd x, (fpext (fmul y, z)), z) -> (fma y, z, x) + // Note: Commutes FADD operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N10.getOperand(0), N10.getOperand(1), N0); + } + } + + // More folding opportunities when target permits. + if (TLI.enableAggressiveFMAFusion(VT)) { + + // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) + if (N0.getOpcode() == ISD::FMA && + N0.getOperand(2).getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + N1)); + + // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) + if (N1->getOpcode() == ISD::FMA && + N1.getOperand(2).getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N1.getOperand(0), N1.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + N1.getOperand(2).getOperand(0), + N1.getOperand(2).getOperand(1), + N0)); } return SDValue(); @@ -6989,6 +7032,95 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { DAG.getNode(ISD::FNEG, dl, VT, N00), N01, DAG.getNode(ISD::FNEG, dl, VT, N1)); } + + // Remove FP_EXTEND when there is an opportunity to combine. This is + // legal here since extra precision is allowed. + + // fold (fsub (fpext (fmul x, y)), z) -> (fma x, y, (fneg z)) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N00.getOperand(0), + N00.getOperand(1), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1)); + } + + // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg y), z, x) + // Note: Commutes FSUB operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N10.getOperand(0)), + N10.getOperand(1), + N0); + } + + // fold (fsub (fpext (fneg (fmul, x, y))), z) + // -> (fma (fneg x), y, (fneg z)) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FNEG) { + SDValue N000 = N00.getOperand(0); + if (N000.getOpcode() == ISD::FMUL) { + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, + N000.getOperand(0)), + N000.getOperand(1), + DAG.getNode(ISD::FNEG, dl, VT, N1)); + } + } + } + + // fold (fsub (fneg (fpext (fmul, x, y))), z) + // -> (fma (fneg x), y, (fneg z)) + if (N0.getOpcode() == ISD::FNEG) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FP_EXTEND) { + SDValue N000 = N00.getOperand(0); + if (N000.getOpcode() == ISD::FMUL) { + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, + N000.getOperand(0)), + N000.getOperand(1), + DAG.getNode(ISD::FNEG, dl, VT, N1)); + } + } + } + + // More folding opportunities when target permits. + if (TLI.enableAggressiveFMAFusion(VT)) { + + // fold (fsub (fma x, y, (fmul u, v)), z) + // -> (fma x, y (fma u, v, (fneg z))) + if (N0.getOpcode() == ISD::FMA && + N0.getOperand(2).getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N1))); + + // fold (fsub x, (fma y, z, (fmul u, v))) + // -> (fma (fneg y), z, (fma (fneg u), v, x)) + if (N1.getOpcode() == ISD::FMA && + N1.getOperand(2).getOpcode() == ISD::FMUL) { + SDValue N20 = N1.getOperand(2).getOperand(0); + SDValue N21 = N1.getOperand(2).getOperand(1); + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N1.getOperand(0)), + N1.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N20), + N21, N0)); + } + } } return SDValue(); diff --git a/llvm/test/CodeGen/PowerPC/fma-assoc.ll b/llvm/test/CodeGen/PowerPC/fma-assoc.ll new file mode 100644 index 0000000..dc1316e --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fma-assoc.ll @@ -0,0 +1,79 @@ +; RUN: llc < %s -march=ppc32 -fp-contract=fast -mattr=-vsx | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -mattr=+vsx -mcpu=pwr7 | FileCheck -check-prefix=CHECK-VSX %s + +define double @test_FMADD_ASSOC1(double %A, double %B, double %C, + double %D, double %E) { + %F = fmul double %A, %B ; [#uses=1] + %G = fmul double %C, %D ; [#uses=1] + %H = fadd double %F, %G ; [#uses=1] + %I = fadd double %H, %E ; [#uses=1] + ret double %I +; CHECK-LABEL: test_FMADD_ASSOC1: +; CHECK: fmadd +; CHECK-NEXT: fmadd +; CHECK-NEXT: blr + +; CHECK-VSX-LABEL: test_FMADD_ASSOC1: +; CHECK-VSX: xsmaddmdp +; CHECK-VSX-NEXT: xsmaddadp +; CHECK-VSX-NEXT: fmr +; CHECK-VSX-NEXT: blr +} + +define double @test_FMADD_ASSOC2(double %A, double %B, double %C, + double %D, double %E) { + %F = fmul double %A, %B ; [#uses=1] + %G = fmul double %C, %D ; [#uses=1] + %H = fadd double %F, %G ; [#uses=1] + %I = fadd double %E, %H ; [#uses=1] + ret double %I +; CHECK-LABEL: test_FMADD_ASSOC2: +; CHECK: fmadd +; CHECK-NEXT: fmadd +; CHECK-NEXT: blr + +; CHECK-VSX-LABEL: test_FMADD_ASSOC2: +; CHECK-VSX: xsmaddmdp +; CHECK-VSX-NEXT: xsmaddadp +; CHECK-VSX-NEXT: fmr +; CHECK-VSX-NEXT: blr +} + +define double @test_FMSUB_ASSOC1(double %A, double %B, double %C, + double %D, double %E) { + %F = fmul double %A, %B ; [#uses=1] + %G = fmul double %C, %D ; [#uses=1] + %H = fadd double %F, %G ; [#uses=1] + %I = fsub double %H, %E ; [#uses=1] + ret double %I +; CHECK-LABEL: test_FMSUB_ASSOC1: +; CHECK: fmsub +; CHECK-NEXT: fmadd +; CHECK-NEXT: blr + +; CHECK-VSX-LABEL: test_FMSUB_ASSOC1: +; CHECK-VSX: xsmsubmdp +; CHECK-VSX-NEXT: xsmaddadp +; CHECK-VSX-NEXT: fmr +; CHECK-VSX-NEXT: blr +} + +define double @test_FMSUB_ASSOC2(double %A, double %B, double %C, + double %D, double %E) { + %F = fmul double %A, %B ; [#uses=1] + %G = fmul double %C, %D ; [#uses=1] + %H = fadd double %F, %G ; [#uses=1] + %I = fsub double %E, %H ; [#uses=1] + ret double %I +; CHECK-LABEL: test_FMSUB_ASSOC2: +; CHECK: fnmsub +; CHECK-NEXT: fnmsub +; CHECK-NEXT: blr + +; CHECK-VSX-LABEL: test_FMSUB_ASSOC2: +; CHECK-VSX: xsnmsubmdp +; CHECK-VSX-NEXT: xsnmsubadp +; CHECK-VSX-NEXT: fmr +; CHECK-VSX-NEXT: blr +} + diff --git a/llvm/test/CodeGen/PowerPC/fma-ext.ll b/llvm/test/CodeGen/PowerPC/fma-ext.ll new file mode 100644 index 0000000..56825ce --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fma-ext.ll @@ -0,0 +1,93 @@ +; RUN: llc < %s -march=ppc32 -fp-contract=fast -mattr=-vsx | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -mattr=+vsx -mcpu=pwr7 | FileCheck -check-prefix=CHECK-VSX %s + +define double @test_FMADD_EXT1(float %A, float %B, double %C) { + %D = fmul float %A, %B ; [#uses=1] + %E = fpext float %D to double ; [#uses=1] + %F = fadd double %E, %C ; [#uses=1] + ret double %F +; CHECK-LABEL: test_FMADD_EXT1: +; CHECK: fmadd +; CHECK-NEXT: blr + +; CHECK-VSX-LABEL: test_FMADD_EXT1: +; CHECK-VSX: xsmaddmdp +; CHECK-VSX-NEXT: blr +} + +define double @test_FMADD_EXT2(float %A, float %B, double %C) { + %D = fmul float %A, %B ; [#uses=1] + %E = fpext float %D to double ; [#uses=1] + %F = fadd double %C, %E ; [#uses=1] + ret double %F +; CHECK-LABEL: test_FMADD_EXT2: +; CHECK: fmadd +; CHECK-NEXT: blr + +; CHECK-VSX-LABEL: test_FMADD_EXT2: +; CHECK-VSX: xsmaddmdp +; CHECK-VSX-NEXT: blr +} + +define double @test_FMSUB_EXT1(float %A, float %B, double %C) { + %D = fmul float %A, %B ; [#uses=1] + %E = fpext float %D to double ; [#uses=1] + %F = fsub double %E, %C ; [#uses=1] + ret double %F +; CHECK-LABEL: test_FMSUB_EXT1: +; CHECK: fmsub +; CHECK-NEXT: blr + +; CHECK-VSX-LABEL: test_FMSUB_EXT1: +; CHECK-VSX: xsmsubmdp +; CHECK-VSX-NEXT: blr +} + +define double @test_FMSUB_EXT2(float %A, float %B, double %C) { + %D = fmul float %A, %B ; [#uses=1] + %E = fpext float %D to double ; [#uses=1] + %F = fsub double %C, %E ; [#uses=1] + ret double %F +; CHECK-LABEL: test_FMSUB_EXT2: +; CHECK: fnmsub +; CHECK-NEXT: blr + +; CHECK-VSX-LABEL: test_FMSUB_EXT2: +; CHECK-VSX: xsnmsubmdp +; CHECK-VSX-NEXT: fmr +; CHECK-VSX-NEXT: blr +} + +define double @test_FMSUB_EXT3(float %A, float %B, double %C) { + %D = fmul float %A, %B ; [#uses=1] + %E = fsub float -0.000000e+00, %D ; [#uses=1] + %F = fpext float %E to double ; [#uses=1] + %G = fsub double %F, %C ; [#uses=1] + ret double %G +; CHECK-LABEL: test_FMSUB_EXT3: +; CHECK: fneg +; CHECK-NEXT: fmsub +; CHECK-NEXT: blr + +; CHECK-VSX-LABEL: test_FMSUB_EXT3: +; CHECK-VSX: xsnegdp +; CHECK-VSX-NEXT: xsmsubmdp +; CHECK-VSX-NEXT: blr +} + +define double @test_FMSUB_EXT4(float %A, float %B, double %C) { + %D = fmul float %A, %B ; [#uses=1] + %E = fpext float %D to double ; [#uses=1] + %F = fsub double -0.000000e+00, %E ; [#uses=1] + %G = fsub double %F, %C ; [#uses=1] + ret double %G +; CHECK-LABEL: test_FMSUB_EXT4: +; CHECK: fneg +; CHECK-NEXT: fmsub +; CHECK-NEXT: blr + +; CHECK-VSX-LABEL: test_FMSUB_EXT4: +; CHECK-VSX: xsnegdp +; CHECK-VSX-NEXT: xsmsubmdp +; CHECK-VSX-NEXT: blr +} \ No newline at end of file -- 2.7.4