[PowerPC] Implement BuildSDIVPow2, lower i64 pow2 sdiv using sradi

author Hal Finkel <hfinkel@anl.gov>

Thu, 11 Dec 2014 18:37:52 +0000 (18:37 +0000)

committer Hal Finkel <hfinkel@anl.gov>

Thu, 11 Dec 2014 18:37:52 +0000 (18:37 +0000)
author Hal Finkel <hfinkel@anl.gov>
Thu, 11 Dec 2014 18:37:52 +0000 (18:37 +0000)
committer Hal Finkel <hfinkel@anl.gov>
Thu, 11 Dec 2014 18:37:52 +0000 (18:37 +0000)
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp

index 595052e..48c0388 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -1041,35 +1041,26 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
                                    MVT::Other, N->getOperand(0));
    }
  
-  case ISD::SDIV: {
-    // FIXME: since this depends on the setting of the carry flag from the srawi
-    //        we should really be making notes about that for the scheduler.
-    // FIXME: It sure would be nice if we could cheaply recognize the
-    //        srl/add/sra pattern the dag combiner will generate for this as
-    //        sra/addze rather than having to handle sdiv ourselves.  oh well.
-    unsigned Imm;
-    if (isInt32Immediate(N->getOperand(1), Imm)) {
-      SDValue N0 = N->getOperand(0);
-      if ((signed)Imm > 0 && isPowerOf2_32(Imm)) {
-        SDNode *Op =
-          CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
-                                 N0, getI32Imm(Log2_32(Imm)));
-        return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
-                                    SDValue(Op, 0), SDValue(Op, 1));
-      } else if ((signed)Imm < 0 && isPowerOf2_32(-Imm)) {
-        SDNode *Op =
-          CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
-                                 N0, getI32Imm(Log2_32(-Imm)));
-        SDValue PT =
-          SDValue(CurDAG->getMachineNode(PPC::ADDZE, dl, MVT::i32,
-                                         SDValue(Op, 0), SDValue(Op, 1)),
-                    0);
-        return CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT);
-      }
+  case PPCISD::SRA_ADDZE: {
+    SDValue N0 = N->getOperand(0);
+    SDValue ShiftAmt =
+      CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
+                                  getConstantIntValue(), N->getValueType(0));
+    if (N->getValueType(0) == MVT::i64) {
+      SDNode *Op =
+        CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
+                               N0, ShiftAmt);
+      return CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64,
+                                  SDValue(Op, 0), SDValue(Op, 1));
+    } else {
+      assert(N->getValueType(0) == MVT::i32 &&
+             "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
+      SDNode *Op =
+        CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
+                               N0, ShiftAmt);
+      return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+                                  SDValue(Op, 0), SDValue(Op, 1));
      }
-
-    // Other cases are autogenerated.
-    break;
    }
  
    case ISD::LOAD: {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp

index fbcc34e..29bbad4 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -58,8 +58,6 @@ extern cl::opt<bool> ANDIGlueBug;
  PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
      : TargetLowering(TM),
        Subtarget(*TM.getSubtargetImpl()) {
-  setPow2SDivIsCheap();
-
    // Use _setjmp/_longjmp instead of setjmp/longjmp.
    setUseUnderscoreSetJmp(true);
    setUseUnderscoreLongJmp(true);
@@ -8931,6 +8929,36 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
    return SDValue();
  }
  
+SDValue
+PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+                                  SelectionDAG &DAG,
+                                  std::vector<SDNode *> *Created) const {
+  // fold (sdiv X, pow2)
+  EVT VT = N->getValueType(0);
+  if ((VT != MVT::i32 && VT != MVT::i64) ||
+      !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
+    return SDValue();
+
+  SDLoc DL(N);
+  SDValue N0 = N->getOperand(0);
+
+  bool IsNegPow2 = (-Divisor).isPowerOf2();
+  unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
+  SDValue ShiftAmt = DAG.getConstant(Lg2, VT);
+
+  SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
+  if (Created)
+    Created->push_back(Op.getNode());
+
+  if (IsNegPow2) {
+    Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), Op);
+    if (Created)
+      Created->push_back(Op.getNode());
+  }
+
+  return Op;
+}
+
  //===----------------------------------------------------------------------===//
  // Inline Assembly Support
  //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h

index 9842f45..2564d3d 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -94,6 +94,12 @@ namespace llvm {
        /// code.
        SRL, SRA, SHL,
  
+      /// The combination of sra[wd]i and addze used to implemented signed
+      /// integer division by a power of 2. The first operand is the dividend,
+      /// and the second is the constant shift amount (representing the
+      /// divisor).
+      SRA_ADDZE,
+
        /// CALL - A direct function call.
        /// CALL_NOP is a call with the special NOP which follows 64-bit
        /// SVR4 calls.
@@ -425,6 +431,9 @@ namespace llvm {
  
      SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
  
+    SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+                          std::vector<SDNode *> *Created) const override;
+
      unsigned getRegisterByName(const char* RegName, EVT VT) const override;
  
      void computeKnownBitsForTargetNode(const SDValue Op,
diff --git a/llvm/test/CodeGen/PowerPC/sdiv-pow2.ll b/llvm/test/CodeGen/PowerPC/sdiv-pow2.ll

new file mode 100644 (file)

index 0000000..c6330a6
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/sdiv-pow2.ll
@@ -0,0 +1,58 @@
+; RUN: llc -mcpu=ppc64 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone
+define signext i32 @foo4(i32 signext %a) #0 {
+entry:
+  %div = sdiv i32 %a, 8
+  ret i32 %div
+
+; CHECK-LABEL @foo4
+; CHECK: srawi [[REG1:[0-9]+]], 3, 3
+; CHECK: addze [[REG2:[0-9]+]], [[REG1]]
+; CHECK: extsw 3, [[REG2]]
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define i64 @foo8(i64 %a) #0 {
+entry:
+  %div = sdiv i64 %a, 8
+  ret i64 %div
+
+; CHECK-LABEL @foo8
+; CHECK: sradi [[REG1:[0-9]+]], 3, 3
+; CHECK: addze 3, [[REG1]]
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define signext i32 @foo4n(i32 signext %a) #0 {
+entry:
+  %div = sdiv i32 %a, -8
+  ret i32 %div
+
+; CHECK-LABEL: @foo4n
+; CHECK: srawi [[REG1:[0-9]+]], 3, 3
+; CHECK: addze [[REG2:[0-9]+]], [[REG1]]
+; CHECK: neg [[REG3:[0-9]+]], [[REG2]]
+; CHECK: extsw 3, [[REG3]]
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define i64 @foo8n(i64 %a) #0 {
+entry:
+  %div = sdiv i64 %a, -8
+  ret i64 %div
+
+; CHECK-LABEL: @foo8n
+; CHECK: sradi [[REG1:[0-9]+]], 3, 3
+; CHECK: addze [[REG2:[0-9]+]], [[REG1]]
+; CHECK: neg 3, [[REG2]]
+; CHECK: blr
+}
+
+attributes #0 = { nounwind readnone }
+
author	Hal Finkel <hfinkel@anl.gov>
	Thu, 11 Dec 2014 18:37:52 +0000 (18:37 +0000)
committer	Hal Finkel <hfinkel@anl.gov>
	Thu, 11 Dec 2014 18:37:52 +0000 (18:37 +0000)
llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCISelLowering.h		patch \| blob \| history
llvm/test/CodeGen/PowerPC/sdiv-pow2.ll	[new file with mode: 0644]	patch \| blob