From 5efb9188442b06d7a91e92d1b7d94d39c57ffd95 Mon Sep 17 00:00:00 2001
From: Hal Finkel <hfinkel@anl.gov>
Date: Tue, 6 Jan 2015 06:01:57 +0000
Subject: [PATCH] [PowerPC] Improve int_to_fp(fp_to_int(x)) combining

The old target DAG combine that allowed for performing int_to_fp(fp_to_int(x))
without a load/store pair is updated here with support for unsigned integers,
and to support single-precision values without a third rounding step, on newer
cores with the appropriate instructions.

llvm-svn: 225248
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp  | 103 +++++++++++++++++++--------
 llvm/lib/Target/PowerPC/PPCISelLowering.h    |   1 +
 llvm/test/CodeGen/PowerPC/fp-to-int-to-fp.ll |  70 ++++++++++++++++++
 3 files changed, 144 insertions(+), 30 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/fp-to-int-to-fp.ll

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 203a610..715283a6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -631,6 +631,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
 
   // We have target-specific dag combine patterns for the following nodes:
   setTargetDAGCombine(ISD::SINT_TO_FP);
+  if (Subtarget.hasFPCVT())
+    setTargetDAGCombine(ISD::UINT_TO_FP);
   setTargetDAGCombine(ISD::LOAD);
   setTargetDAGCombine(ISD::STORE);
   setTargetDAGCombine(ISD::BR_CC);
@@ -8349,6 +8351,75 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
                                  N->getOperand(0), ShiftCst), ShiftCst);
 }
 
+SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
+                                              DAGCombinerInfo &DCI) const {
+  assert((N->getOpcode() == ISD::SINT_TO_FP ||
+          N->getOpcode() == ISD::UINT_TO_FP) &&
+         "Need an int -> FP conversion node here");
+
+  if (!Subtarget.has64BitSupport())
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc dl(N);
+  SDValue Op(N, 0);
+
+  // Don't handle ppc_fp128 here or i1 conversions.
+  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
+    return SDValue();
+  if (Op.getOperand(0).getValueType() == MVT::i1)
+    return SDValue();
+
+  // For i32 intermediate values, unfortunately, the conversion functions
+  // leave the upper 32 bits of the value are undefined. Within the set of
+  // scalar instructions, we have no method for zero- or sign-extending the
+  // value. Thus, we cannot handle i32 intermediate values here.
+  if (Op.getOperand(0).getValueType() == MVT::i32)
+    return SDValue();
+
+  assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
+         "UINT_TO_FP is supported only with FPCVT");
+
+  // If we have FCFIDS, then use it when converting to single-precision.
+  // Otherwise, convert to double-precision and then round.
+  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+                   (Op.getOpcode() == ISD::UINT_TO_FP ?
+                    PPCISD::FCFIDUS : PPCISD::FCFIDS) :
+                   (Op.getOpcode() == ISD::UINT_TO_FP ?
+                    PPCISD::FCFIDU : PPCISD::FCFID);
+  MVT      FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+                   MVT::f32 : MVT::f64;
+
+  // If we're converting from a float, to an int, and back to a float again,
+  // then we don't need the store/load pair at all.
+  if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
+       Subtarget.hasFPCVT()) ||
+      (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
+    SDValue Src = Op.getOperand(0).getOperand(0);
+    if (Src.getValueType() == MVT::f32) {
+      Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
+      DCI.AddToWorklist(Src.getNode());
+    }
+
+    unsigned FCTOp =
+      Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
+                                                        PPCISD::FCTIDUZ;
+
+    SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
+    SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
+
+    if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
+      FP = DAG.getNode(ISD::FP_ROUND, dl,
+                       MVT::f32, FP, DAG.getIntPtrConstant(0));
+      DCI.AddToWorklist(FP.getNode());
+    }
+
+    return FP;
+  }
+
+  return SDValue();
+}
+
 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
 // builtins) into loads with swaps.
 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
@@ -8483,36 +8554,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::SELECT_CC:
     return DAGCombineTruncBoolExt(N, DCI);
   case ISD::SINT_TO_FP:
-    if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
-      if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
-        // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
-        // We allow the src/dst to be either f32/f64, but the intermediate
-        // type must be i64.
-        if (N->getOperand(0).getValueType() == MVT::i64 &&
-            N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {
-          SDValue Val = N->getOperand(0).getOperand(0);
-          if (Val.getValueType() == MVT::f32) {
-            Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
-            DCI.AddToWorklist(Val.getNode());
-          }
-
-          Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val);
-          DCI.AddToWorklist(Val.getNode());
-          Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val);
-          DCI.AddToWorklist(Val.getNode());
-          if (N->getValueType(0) == MVT::f32) {
-            Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val,
-                              DAG.getIntPtrConstant(0));
-            DCI.AddToWorklist(Val.getNode());
-          }
-          return Val;
-        } else if (N->getOperand(0).getValueType() == MVT::i32) {
-          // If the intermediate type is i32, we can avoid the load/store here
-          // too.
-        }
-      }
-    }
-    break;
+  case ISD::UINT_TO_FP:
+    return combineFPToIntToFP(N, DCI);
   case ISD::STORE: {
     // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
     if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index dd2431e..ee946e6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -748,6 +748,7 @@ namespace llvm {
 
     SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
+    SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
 
     SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
                              unsigned &RefinementSteps,
diff --git a/llvm/test/CodeGen/PowerPC/fp-to-int-to-fp.ll b/llvm/test/CodeGen/PowerPC/fp-to-int-to-fp.ll
new file mode 100644
index 0000000..f56b9b3
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/fp-to-int-to-fp.ll
@@ -0,0 +1,70 @@
+; RUN: llc -mcpu=a2 < %s | FileCheck %s -check-prefix=FPCVT
+; RUN: llc -mcpu=ppc64 < %s | FileCheck %s -check-prefix=PPC64
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone
+define float @fool(float %X) #0 {
+entry:
+  %conv = fptosi float %X to i64
+  %conv1 = sitofp i64 %conv to float
+  ret float %conv1
+
+; FPCVT-LABEL: @fool
+; FPCVT: fctidz [[REG1:[0-9]+]], 1
+; FPCVT: fcfids 1, [[REG1]]
+; FPCVT: blr
+
+; PPC64-LABEL: @fool
+; PPC64: fctidz [[REG1:[0-9]+]], 1
+; PPC64: fcfid [[REG2:[0-9]+]], [[REG1]]
+; PPC64: frsp 1, [[REG2]]
+; PPC64: blr
+}
+
+; Function Attrs: nounwind readnone
+define double @foodl(double %X) #0 {
+entry:
+  %conv = fptosi double %X to i64
+  %conv1 = sitofp i64 %conv to double
+  ret double %conv1
+
+; FPCVT-LABEL: @foodl
+; FPCVT: fctidz [[REG1:[0-9]+]], 1
+; FPCVT: fcfid 1, [[REG1]]
+; FPCVT: blr
+
+; PPC64-LABEL: @foodl
+; PPC64: fctidz [[REG1:[0-9]+]], 1
+; PPC64: fcfid 1, [[REG1]]
+; PPC64: blr
+}
+
+; Function Attrs: nounwind readnone
+define float @fooul(float %X) #0 {
+entry:
+  %conv = fptoui float %X to i64
+  %conv1 = uitofp i64 %conv to float
+  ret float %conv1
+
+; FPCVT-LABEL: @fooul
+; FPCVT: fctiduz [[REG1:[0-9]+]], 1
+; FPCVT: fcfidus 1, [[REG1]]
+; FPCVT: blr
+}
+
+; Function Attrs: nounwind readnone
+define double @fooudl(double %X) #0 {
+entry:
+  %conv = fptoui double %X to i64
+  %conv1 = uitofp i64 %conv to double
+  ret double %conv1
+
+; FPCVT-LABEL: @fooudl
+; FPCVT: fctiduz [[REG1:[0-9]+]], 1
+; FPCVT: fcfidu 1, [[REG1]]
+; FPCVT: blr
+}
+
+attributes #0 = { nounwind readnone }
+
-- 
2.7.4