From 5efb9188442b06d7a91e92d1b7d94d39c57ffd95 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Tue, 6 Jan 2015 06:01:57 +0000 Subject: [PATCH] [PowerPC] Improve int_to_fp(fp_to_int(x)) combining The old target DAG combine that allowed for performing int_to_fp(fp_to_int(x)) without a load/store pair is updated here with support for unsigned integers, and to support single-precision values without a third rounding step, on newer cores with the appropriate instructions. llvm-svn: 225248 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 103 +++++++++++++++++++-------- llvm/lib/Target/PowerPC/PPCISelLowering.h | 1 + llvm/test/CodeGen/PowerPC/fp-to-int-to-fp.ll | 70 ++++++++++++++++++ 3 files changed, 144 insertions(+), 30 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/fp-to-int-to-fp.ll diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 203a610..715283a6 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -631,6 +631,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM) // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::SINT_TO_FP); + if (Subtarget.hasFPCVT()) + setTargetDAGCombine(ISD::UINT_TO_FP); setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::BR_CC); @@ -8349,6 +8351,75 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, N->getOperand(0), ShiftCst), ShiftCst); } +SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, + DAGCombinerInfo &DCI) const { + assert((N->getOpcode() == ISD::SINT_TO_FP || + N->getOpcode() == ISD::UINT_TO_FP) && + "Need an int -> FP conversion node here"); + + if (!Subtarget.has64BitSupport()) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + SDValue Op(N, 0); + + // Don't handle ppc_fp128 here or i1 conversions. + if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) + return SDValue(); + if (Op.getOperand(0).getValueType() == MVT::i1) + return SDValue(); + + // For i32 intermediate values, unfortunately, the conversion functions + // leave the upper 32 bits of the value are undefined. Within the set of + // scalar instructions, we have no method for zero- or sign-extending the + // value. Thus, we cannot handle i32 intermediate values here. + if (Op.getOperand(0).getValueType() == MVT::i32) + return SDValue(); + + assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && + "UINT_TO_FP is supported only with FPCVT"); + + // If we have FCFIDS, then use it when converting to single-precision. + // Otherwise, convert to double-precision and then round. + unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? + (Op.getOpcode() == ISD::UINT_TO_FP ? + PPCISD::FCFIDUS : PPCISD::FCFIDS) : + (Op.getOpcode() == ISD::UINT_TO_FP ? + PPCISD::FCFIDU : PPCISD::FCFID); + MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? + MVT::f32 : MVT::f64; + + // If we're converting from a float, to an int, and back to a float again, + // then we don't need the store/load pair at all. + if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT && + Subtarget.hasFPCVT()) || + (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) { + SDValue Src = Op.getOperand(0).getOperand(0); + if (Src.getValueType() == MVT::f32) { + Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); + DCI.AddToWorklist(Src.getNode()); + } + + unsigned FCTOp = + Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ : + PPCISD::FCTIDUZ; + + SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src); + SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp); + + if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { + FP = DAG.getNode(ISD::FP_ROUND, dl, + MVT::f32, FP, DAG.getIntPtrConstant(0)); + DCI.AddToWorklist(FP.getNode()); + } + + return FP; + } + + return SDValue(); +} + // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for // builtins) into loads with swaps. SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, @@ -8483,36 +8554,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SELECT_CC: return DAGCombineTruncBoolExt(N, DCI); case ISD::SINT_TO_FP: - if (TM.getSubtarget().has64BitSupport()) { - if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { - // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores. - // We allow the src/dst to be either f32/f64, but the intermediate - // type must be i64. - if (N->getOperand(0).getValueType() == MVT::i64 && - N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) { - SDValue Val = N->getOperand(0).getOperand(0); - if (Val.getValueType() == MVT::f32) { - Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); - DCI.AddToWorklist(Val.getNode()); - } - - Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val); - DCI.AddToWorklist(Val.getNode()); - Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val); - DCI.AddToWorklist(Val.getNode()); - if (N->getValueType(0) == MVT::f32) { - Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val, - DAG.getIntPtrConstant(0)); - DCI.AddToWorklist(Val.getNode()); - } - return Val; - } else if (N->getOperand(0).getValueType() == MVT::i32) { - // If the intermediate type is i32, we can avoid the load/store here - // too. - } - } - } - break; + case ISD::UINT_TO_FP: + return combineFPToIntToFP(N, DCI); case ISD::STORE: { // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). if (TM.getSubtarget().hasSTFIWX() && diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index dd2431e..ee946e6 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -748,6 +748,7 @@ namespace llvm { SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const; SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const; SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps, diff --git a/llvm/test/CodeGen/PowerPC/fp-to-int-to-fp.ll b/llvm/test/CodeGen/PowerPC/fp-to-int-to-fp.ll new file mode 100644 index 0000000..f56b9b3 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fp-to-int-to-fp.ll @@ -0,0 +1,70 @@ +; RUN: llc -mcpu=a2 < %s | FileCheck %s -check-prefix=FPCVT +; RUN: llc -mcpu=ppc64 < %s | FileCheck %s -check-prefix=PPC64 +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Function Attrs: nounwind readnone +define float @fool(float %X) #0 { +entry: + %conv = fptosi float %X to i64 + %conv1 = sitofp i64 %conv to float + ret float %conv1 + +; FPCVT-LABEL: @fool +; FPCVT: fctidz [[REG1:[0-9]+]], 1 +; FPCVT: fcfids 1, [[REG1]] +; FPCVT: blr + +; PPC64-LABEL: @fool +; PPC64: fctidz [[REG1:[0-9]+]], 1 +; PPC64: fcfid [[REG2:[0-9]+]], [[REG1]] +; PPC64: frsp 1, [[REG2]] +; PPC64: blr +} + +; Function Attrs: nounwind readnone +define double @foodl(double %X) #0 { +entry: + %conv = fptosi double %X to i64 + %conv1 = sitofp i64 %conv to double + ret double %conv1 + +; FPCVT-LABEL: @foodl +; FPCVT: fctidz [[REG1:[0-9]+]], 1 +; FPCVT: fcfid 1, [[REG1]] +; FPCVT: blr + +; PPC64-LABEL: @foodl +; PPC64: fctidz [[REG1:[0-9]+]], 1 +; PPC64: fcfid 1, [[REG1]] +; PPC64: blr +} + +; Function Attrs: nounwind readnone +define float @fooul(float %X) #0 { +entry: + %conv = fptoui float %X to i64 + %conv1 = uitofp i64 %conv to float + ret float %conv1 + +; FPCVT-LABEL: @fooul +; FPCVT: fctiduz [[REG1:[0-9]+]], 1 +; FPCVT: fcfidus 1, [[REG1]] +; FPCVT: blr +} + +; Function Attrs: nounwind readnone +define double @fooudl(double %X) #0 { +entry: + %conv = fptoui double %X to i64 + %conv1 = uitofp i64 %conv to double + ret double %conv1 + +; FPCVT-LABEL: @fooudl +; FPCVT: fctiduz [[REG1:[0-9]+]], 1 +; FPCVT: fcfidu 1, [[REG1]] +; FPCVT: blr +} + +attributes #0 = { nounwind readnone } + -- 2.7.4