From 051db7d838484850784f1d29b3bc18477a3f3c98 Mon Sep 17 00:00:00 2001 From: Pierre Gousseau Date: Tue, 16 Aug 2016 13:53:53 +0000 Subject: [PATCH] [x86] Refactor a PowerPC specific ctlz/srl transformation (NFC). Following the discussion on D22038, this refactors a PowerPC specific setcc -> srl(ctlz) transformation so it can be used by other targets. Differential Revision: https://reviews.llvm.org/D23445 llvm-svn: 278799 --- llvm/include/llvm/Target/TargetLowering.h | 11 +++++++++++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 25 ++++++++++++++++++++++++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 16 +++------------ llvm/lib/Target/PowerPC/PPCISelLowering.h | 4 ++++ 4 files changed, 43 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h index baeb487..879bc47 100644 --- a/llvm/include/llvm/Target/TargetLowering.h +++ b/llvm/include/llvm/Target/TargetLowering.h @@ -322,6 +322,11 @@ public: return false; } + /// \brief Return true if ctlz instruction is fast. + virtual bool isCtlzFast() const { + return false; + } + /// Return true if it is safe to transform an integer-domain bitwise operation /// into the equivalent floating-point operation. This should be set to true /// if the target has IEEE-754-compliant fabs/fneg operations for the input @@ -3053,6 +3058,12 @@ public: virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const; + // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits))) + // If we're comparing for equality to zero and isCtlzFast is true, expose the + // fact that this can be implemented as a ctlz/srl pair, so that the dag + // combiner can fold the new nodes. + SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const; + private: SDValue simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI, diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 478bebe..0411a4a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3562,3 +3562,28 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, "Emulated TLS must have zero offset in GlobalAddressSDNode"); return CallResult.first; } + +SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op, + SelectionDAG &DAG) const { + assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node."); + if (!isCtlzFast()) + return SDValue(); + ISD::CondCode CC = cast(Op.getOperand(2))->get(); + SDLoc dl(Op); + if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { + if (C->isNullValue() && CC == ISD::SETEQ) { + EVT VT = Op.getOperand(0).getValueType(); + SDValue Zext = Op.getOperand(0); + if (VT.bitsLT(MVT::i32)) { + VT = MVT::i32; + Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); + } + unsigned Log2b = Log2_32(VT.getSizeInBits()); + SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); + SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, + DAG.getConstant(Log2b, dl, MVT::i32)); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); + } + } + return SDValue(); +} diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 2e1df66..e2cc921 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2362,20 +2362,10 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // If we're comparing for equality to zero, expose the fact that this is // implemented as a ctlz/srl pair on ppc, so that the dag combiner can // fold the new nodes. + if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG)) + return V; + if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { - if (C->isNullValue() && CC == ISD::SETEQ) { - EVT VT = Op.getOperand(0).getValueType(); - SDValue Zext = Op.getOperand(0); - if (VT.bitsLT(MVT::i32)) { - VT = MVT::i32; - Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); - } - unsigned Log2b = Log2_32(VT.getSizeInBits()); - SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); - SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, - DAG.getConstant(Log2b, dl, MVT::i32)); - return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); - } // Leave comparisons against 0 and -1 alone for now, since they're usually // optimized. FIXME: revisit this when we can custom lower all setcc // optimizations. diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index c12f9c6..13b8637 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -492,6 +492,10 @@ namespace llvm { return true; } + bool isCtlzFast() const override { + return true; + } + bool supportSplitCSR(MachineFunction *MF) const override { return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && -- 2.7.4