From d62d8b771d3cc51b0b9e095011c7a1a6f83ce477 Mon Sep 17 00:00:00 2001 From: Kyle Butt Date: Wed, 3 Feb 2016 01:41:09 +0000 Subject: [PATCH] Codegen: [PPC] Fix PPCVSXFMAMutate to handle duplicates. The purpose of PPCVSXFMAMutate is to elide copies by changing FMA forms on PPC. %vreg6 = COPY %vreg96 %vreg6 = XSMADDASP %vreg6, %vreg5, %vreg7 ;v6 = v6 + v5 * v7 is replaced by %vreg5 = XSMADDMSP %vreg5, %vreg7, %vreg96 ;v5 = v5 * v7 + v96 This was broken in the case where the target register was also used as a multiplicand. Fix this case by checking for it and replacing both uses with the copied register. %vreg6 = COPY %vreg96 %vreg6 = XSMADDASP %vreg6, %vreg5, %vreg6 ;v6 = v6 + v5 * v6 is replaced by %vreg5 = XSMADDMSP %vreg5, %vreg96, %vreg96 ;v5 = v5 * v96 + v96 llvm-svn: 259617 --- llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp | 51 ++++++++++++++-------- .../CodeGen/PowerPC/fma-mutate-duplicate-vreg.ll | 36 +++++++++++++++ 2 files changed, 68 insertions(+), 19 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/fma-mutate-duplicate-vreg.ll diff --git a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index 6b19a2f..e90dc97 100644 --- a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -168,21 +168,32 @@ protected: if (OtherUsers || KillsAddendSrc) continue; - // Find one of the product operands that is killed by this instruction. + // The transformation doesn't work well with things like: + // %vreg5 = A-form-op %vreg5, %vreg11, %vreg5; + // unless vreg11 is also a kill, so skip when it is not, + // and check operand 3 to see it is also a kill to handle the case: + // %vreg5 = A-form-op %vreg5, %vreg5, %vreg11; + // where vreg5 and vreg11 are both kills. This case would be skipped + // otherwise. + unsigned OldFMAReg = MI->getOperand(0).getReg(); + + // Find one of the product operands that is killed by this instruction. unsigned KilledProdOp = 0, OtherProdOp = 0; - if (LIS->getInterval(MI->getOperand(2).getReg()) - .Query(FMAIdx).isKill()) { + unsigned Reg2 = MI->getOperand(2).getReg(); + unsigned Reg3 = MI->getOperand(3).getReg(); + if (LIS->getInterval(Reg2).Query(FMAIdx).isKill() + && Reg2 != OldFMAReg) { KilledProdOp = 2; OtherProdOp = 3; - } else if (LIS->getInterval(MI->getOperand(3).getReg()) - .Query(FMAIdx).isKill()) { + } else if (LIS->getInterval(Reg3).Query(FMAIdx).isKill() + && Reg3 != OldFMAReg) { KilledProdOp = 3; OtherProdOp = 2; } - // If there are no killed product operands, then this transformation is - // likely not profitable. + // If there are no usable killed product operands, then this + // transformation is likely not profitable. if (!KilledProdOp) continue; @@ -212,14 +223,6 @@ protected: bool KilledProdRegUndef = MI->getOperand(KilledProdOp).isUndef(); bool OtherProdRegUndef = MI->getOperand(OtherProdOp).isUndef(); - unsigned OldFMAReg = MI->getOperand(0).getReg(); - - // The transformation doesn't work well with things like: - // %vreg5 = A-form-op %vreg5, %vreg11, %vreg5; - // so leave such things alone. - if (OldFMAReg == KilledProdReg) - continue; - // If there isn't a class that fits, we can't perform the transform. // This is needed for correctness with a mixture of VSX and Altivec // instructions to make sure that a low VSX register is not assigned to @@ -236,23 +239,33 @@ protected: MI->getOperand(0).setReg(KilledProdReg); MI->getOperand(1).setReg(KilledProdReg); MI->getOperand(3).setReg(AddendSrcReg); - MI->getOperand(2).setReg(OtherProdReg); MI->getOperand(0).setSubReg(KilledProdSubReg); MI->getOperand(1).setSubReg(KilledProdSubReg); MI->getOperand(3).setSubReg(AddSubReg); - MI->getOperand(2).setSubReg(OtherProdSubReg); MI->getOperand(1).setIsKill(KilledProdRegKill); MI->getOperand(3).setIsKill(AddRegKill); - MI->getOperand(2).setIsKill(OtherProdRegKill); MI->getOperand(1).setIsUndef(KilledProdRegUndef); MI->getOperand(3).setIsUndef(AddRegUndef); - MI->getOperand(2).setIsUndef(OtherProdRegUndef); MI->setDesc(TII->get(AltOpc)); + // If the addend is also a multiplicand, replace it with the addend + // source in both places. + if (OtherProdReg == AddendMI->getOperand(0).getReg()) { + MI->getOperand(2).setReg(AddendSrcReg); + MI->getOperand(2).setSubReg(AddSubReg); + MI->getOperand(2).setIsKill(AddRegKill); + MI->getOperand(2).setIsUndef(AddRegUndef); + } else { + MI->getOperand(2).setReg(OtherProdReg); + MI->getOperand(2).setSubReg(OtherProdSubReg); + MI->getOperand(2).setIsKill(OtherProdRegKill); + MI->getOperand(2).setIsUndef(OtherProdRegUndef); + } + DEBUG(dbgs() << " -> " << *MI); // The killed product operand was killed here, so we can reuse it now diff --git a/llvm/test/CodeGen/PowerPC/fma-mutate-duplicate-vreg.ll b/llvm/test/CodeGen/PowerPC/fma-mutate-duplicate-vreg.ll new file mode 100644 index 0000000..08d1b1b --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fma-mutate-duplicate-vreg.ll @@ -0,0 +1,36 @@ +; RUN: llc -fp-contract=fast -O2 < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-grtev4-linux-gnu" + +; CHECK-LABEL: f +; CHECK-NOT: xsmaddmsp [[REG:[0-9]+]], [[REG]], {{[0-9]+}} +define float @f(float %xf) #0 { + %1 = fmul float %xf, %xf + %2 = fmul float %1, 0x3F43FB0140000000 + %3 = fsub float 1.000000e+00, %2 + %4 = fmul float %1, %3 + %5 = fmul float %4, 0x3F461C5440000000 + %6 = fsub float 1.000000e+00, %5 + %7 = fmul float %1, %6 + %8 = fmul float %7, 0x3F4899C100000000 + %9 = fsub float 1.000000e+00, %8 + %10 = fmul float %1, %9 + %11 = fmul float %10, 0x3F4B894020000000 + %12 = fsub float 1.000000e+00, %11 + %13 = fmul float %1, %12 + %14 = fmul float %13, 0x3F4F07C200000000 + %15 = fsub float 1.000000e+00, %14 + %16 = fmul float %1, %15 + %17 = fmul float %16, 0x3F519E0120000000 + %18 = fsub float 1.000000e+00, %17 + %19 = fmul float %1, %18 + %20 = fmul float %19, 0x3F542D6620000000 + %21 = fsub float 1.000000e+00, %20 + %22 = fmul float %1, %21 + %23 = fmul float %22, 0x3F5756CAC0000000 + %24 = fsub float 1.000000e+00, %23 + %25 = fmul float %1, %24 + ret float %25 +} + +attributes #0 = { norecurse nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" } -- 2.7.4