From 17020f96c79a0b28275681e16569c81dc369d77b Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 23 Jul 2014 14:57:52 +0000 Subject: [PATCH] [AArch64] Lower sdiv x, pow2 using add + select + shift. The target-independent DAGcombiner will generate: asr w1, X, #31 w1 = splat sign bit. add X, X, w1, lsr #28 X = X + 0 or pow2-1 asr w0, X, asr #4 w0 = X/pow2 However, the add + shifts is expensive, so generate: add w0, X, 15 w0 = X + pow2-1 cmp X, wzr X - 0 csel X, w0, X, lt X = (X < 0) ? X + pow2-1 : X; asr w0, X, asr 4 w0 = X/pow2 llvm-svn: 213758 --- llvm/include/llvm/Target/TargetLowering.h | 5 ++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 32 +++++++++++-- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 42 +++++++++++++++++ llvm/lib/Target/AArch64/AArch64ISelLowering.h | 3 ++ llvm/test/CodeGen/AArch64/sdivpow2.ll | 61 +++++++++++++++++++++++++ 5 files changed, 140 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sdivpow2.ll diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h index ea9a48e..b872f3d 100644 --- a/llvm/include/llvm/Target/TargetLowering.h +++ b/llvm/include/llvm/Target/TargetLowering.h @@ -2545,6 +2545,11 @@ public: SDValue BuildUDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, bool IsAfterLegalization, std::vector *Created) const; + virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + std::vector *Created) const { + return SDValue(); + } //===--------------------------------------------------------------------===// // Legalization utility functions diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 35d6256..3a292c5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -304,6 +304,7 @@ namespace { SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); + SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); @@ -2033,6 +2034,11 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { if (TLI.isPow2DivCheap()) return SDValue(); + // Target-specific implementation of sdiv x, pow2. + SDValue Res = BuildSDIVPow2(N); + if (Res.getNode()) + return Res; + unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); // Splat the sign bit into the register @@ -11482,9 +11488,9 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); } -/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, -/// return a DAG expression to select that will generate the same value by -/// multiplying by a magic number. See: +/// BuildSDIV - Given an ISD::SDIV node expressing a divide by constant, return +/// a DAG expression to select that will generate the same value by multiplying +/// by a magic number. See: /// SDValue DAGCombiner::BuildSDIV(SDNode *N) { ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); @@ -11504,6 +11510,26 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) { return S; } +/// BuildSDIVPow2 - Given an ISD::SDIV node expressing a divide by constant +/// power of 2, return a DAG expression to select that will generate the same +/// value by right shifting. +SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { + ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); + if (!C) + return SDValue(); + + // Avoid division by zero. + if (!C->getAPIntValue()) + return SDValue(); + + std::vector Built; + SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built); + + for (SDNode *N : Built) + AddToWorklist(N); + return S; +} + /// BuildUDIV - Given an ISD::UDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 4921826..ac4f206 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6382,6 +6382,48 @@ static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG, return performIntegerAbsCombine(N, DAG); } +SDValue +AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + std::vector *Created) const { + // fold (sdiv X, pow2) + EVT VT = N->getValueType(0); + if ((VT != MVT::i32 && VT != MVT::i64) || + !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2())) + return SDValue(); + + SDLoc DL(N); + SDValue N0 = N->getOperand(0); + unsigned Lg2 = Divisor.countTrailingZeros(); + SDValue Zero = DAG.getConstant(0, VT); + SDValue Pow2MinusOne = DAG.getConstant((1 << Lg2) - 1, VT); + + // Add (N0 < 0) ? Pow2 - 1 : 0; + SDValue CCVal; + SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne); + SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp); + + if (Created) { + Created->push_back(Cmp.getNode()); + Created->push_back(Add.getNode()); + Created->push_back(CSel.getNode()); + } + + // Divide by pow2. + SDValue SRA = + DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, MVT::i64)); + + // If we're dividing by a positive value, we're done. Otherwise, we must + // negate the result. + if (Divisor.isNonNegative()) + return SRA; + + if (Created) + Created->push_back(SRA.getNode()); + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), SRA); +} + static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index cb0b9ef..39ba372 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -424,6 +424,9 @@ private: SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; + SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, + std::vector *Created) const; + ConstraintType getConstraintType(const std::string &Constraint) const override; unsigned getRegisterByName(const char* RegName, EVT VT) const override; diff --git a/llvm/test/CodeGen/AArch64/sdivpow2.ll b/llvm/test/CodeGen/AArch64/sdivpow2.ll new file mode 100644 index 0000000..6dee06d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sdivpow2.ll @@ -0,0 +1,61 @@ +; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s + +define i32 @test1(i32 %x) { +; CHECK-LABEL: test1 +; CHECK: add w8, w0, #7 +; CHECK: cmp w0, #0 +; CHECK: csel w8, w8, w0, lt +; CHECK: asr w0, w8, #3 + %div = sdiv i32 %x, 8 + ret i32 %div +} + +define i32 @test2(i32 %x) { +; CHECK-LABEL: test2 +; CHECK: add w8, w0, #7 +; CHECK: cmp w0, #0 +; CHECK: csel w8, w8, w0, lt +; CHECK: neg w0, w8, asr #3 + %div = sdiv i32 %x, -8 + ret i32 %div +} + +define i32 @test3(i32 %x) { +; CHECK-LABEL: test3 +; CHECK: add w8, w0, #31 +; CHECK: cmp w0, #0 +; CHECK: csel w8, w8, w0, lt +; CHECK: asr w0, w8, #5 + %div = sdiv i32 %x, 32 + ret i32 %div +} + +define i64 @test4(i64 %x) { +; CHECK-LABEL: test4 +; CHECK: add x8, x0, #7 +; CHECK: cmp x0, #0 +; CHECK: csel x8, x8, x0, lt +; CHECK: asr x0, x8, #3 + %div = sdiv i64 %x, 8 + ret i64 %div +} + +define i64 @test5(i64 %x) { +; CHECK-LABEL: test5 +; CHECK: add x8, x0, #7 +; CHECK: cmp x0, #0 +; CHECK: csel x8, x8, x0, lt +; CHECK: neg x0, x8, asr #3 + %div = sdiv i64 %x, -8 + ret i64 %div +} + +define i64 @test6(i64 %x) { +; CHECK-LABEL: test6 +; CHECK: add x8, x0, #63 +; CHECK: cmp x0, #0 +; CHECK: csel x8, x8, x0, lt +; CHECK: asr x0, x8, #6 + %div = sdiv i64 %x, 64 + ret i64 %div +} -- 2.7.4