From a56f2822d0d3ff3ac1d289a8b7834d7f90415f6d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 19 Mar 2019 16:24:55 +0000 Subject: [PATCH] [SelectionDAG] Handle unary SelectPatternFlavor for ABS case in SelectionDAGBuilder::visitSelect These changes are related to PR37743 and include: SelectionDAGBuilder::visitSelect handles the unary SelectPatternFlavor::SPF_ABS case to build ABS node. Delete the redundant recognizer of the integer ABS pattern from the DAGCombiner. Add promoting the integer ABS node in the LegalizeIntegerType. Expand-based legalization of integer result for the ABS nodes. Expand-based legalization of ABS vector operations. Add some integer abs testcases for different typesizes for Thumb arch Add the custom ABS expanding and change the SAD pattern recognizer for X86 arch: The i64 result of the ABS is expanded to: tmp = (SRA, Hi, 31) Lo = (UADDO tmp, Lo) Hi = (XOR tmp, (ADDCARRY tmp, hi, Lo:1)) Lo = (XOR tmp, Lo) The "detectZextAbsDiff" function is changed for the recognition of pattern with the ABS node. Given a ABS node, detect the following pattern: (ABS (SUB (ZERO_EXTEND a), (ZERO_EXTEND b))). Change integer abs testcases for codegen with the ABS node support for AArch64. Indicate that the ABS is legal for the i64 type when the NEON is supported. Change the integer abs testcases to show changing of codegen. Add combine and legalization of ABS nodes for Thumb arch. Extend 'matchSelectPattern' to recognize the ABS patterns with ICMP_SGE condition. For discussion, see https://bugs.llvm.org/show_bug.cgi?id=37743 Patch by: @ikulagin (Ivan Kulagin) Differential Revision: https://reviews.llvm.org/D49837 llvm-svn: 356468 --- llvm/lib/Analysis/ValueTracking.cpp | 4 + llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 30 ------ .../CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 26 ++++++ llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 2 + .../CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 1 + .../CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 39 ++++++-- llvm/lib/Target/ARM/ARMISelLowering.cpp | 54 +++++++++++ llvm/lib/Target/ARM/ARMISelLowering.h | 2 + llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 104 ++++++++------------- llvm/test/CodeGen/AArch64/iabs.ll | 12 +-- 11 files changed, 167 insertions(+), 110 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 0012378..b566a70 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -4932,6 +4932,10 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes)) return {SPF_ABS, SPNB_NA, false}; + // (X >=s 0) ? X : -X or (X >=s 1) ? X : -X --> ABS(X) + if (Pred == ICmpInst::ICMP_SGE && match(CmpRHS, ZeroOrOne)) + return {SPF_ABS, SPNB_NA, false}; + // (X NABS(X) // (-X NABS(X) if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne)) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d5c1deea..cb19c0a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19047,36 +19047,6 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, getShiftAmountTy(Temp.getValueType()))); } - // Check to see if this is an integer abs. - // select_cc setg[te] X, 0, X, -X -> - // select_cc setgt X, -1, X, -X -> - // select_cc setl[te] X, 0, -X, X -> - // select_cc setlt X, 1, -X, X -> - // Y = sra (X, size(X)-1); xor (add (X, Y), Y) - if (N1C) { - ConstantSDNode *SubC = nullptr; - if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || - (N1C->isAllOnesValue() && CC == ISD::SETGT)) && - N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) - SubC = dyn_cast(N3.getOperand(0)); - else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) || - (N1C->isOne() && CC == ISD::SETLT)) && - N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) - SubC = dyn_cast(N2.getOperand(0)); - - if (SubC && SubC->isNullValue() && CmpOpVT.isInteger()) { - SDLoc DL(N0); - SDValue Shift = DAG.getNode(ISD::SRA, DL, CmpOpVT, N0, - DAG.getConstant(CmpOpVT.getSizeInBits() - 1, - DL, - getShiftAmountTy(CmpOpVT))); - SDValue Add = DAG.getNode(ISD::ADD, DL, CmpOpVT, N0, Shift); - AddToWorklist(Shift.getNode()); - AddToWorklist(Add.getNode()); - return DAG.getNode(ISD::XOR, DL, CmpOpVT, Add, Shift); - } - } - // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X) // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X) // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 1cc739b..ca0f0d6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -150,6 +150,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break; case ISD::SMULFIX: case ISD::UMULFIX: Res = PromoteIntRes_MULFIX(N); break; + case ISD::ABS: Res = PromoteIntRes_ABS(N); break; case ISD::ATOMIC_LOAD: Res = PromoteIntRes_Atomic0(cast(N)); break; @@ -939,6 +940,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) { return SDValue(Res.getNode(), 0); } +SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) { + SDValue Op0 = SExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0); +} + SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { // Promote the overflow bit trivially. if (ResNo == 1) @@ -1582,6 +1588,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::BITREVERSE: ExpandIntRes_BITREVERSE(N, Lo, Hi); break; case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break; case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break; + case ISD::ABS: ExpandIntRes_ABS(N, Lo, Hi); break; case ISD::CTLZ_ZERO_UNDEF: case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break; case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break; @@ -2342,6 +2349,25 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, IsOpaque); } +void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + + // abs(HiLo) -> (Hi < 0 ? -HiLo : HiLo) + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, + DAG.getConstant(0, dl, VT), N0); + SDValue NegLo, NegHi; + SplitInteger(Neg, NegLo, NegHi); + + GetExpandedInteger(N0, Lo, Hi); + EVT NVT = Lo.getValueType(); + SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT), + DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT); + Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo); + Hi = DAG.getSelect(dl, NVT, HiIsNeg, NegHi, Hi); +} + void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index b195401..2273ed5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -347,6 +347,7 @@ private: SDValue PromoteIntRes_MULFIX(SDNode *N); SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N); SDValue PromoteIntRes_VECREDUCE(SDNode *N); + SDValue PromoteIntRes_ABS(SDNode *N); // Integer Operand Promotion. bool PromoteIntegerOperand(SDNode *N, unsigned OpNo); @@ -404,6 +405,7 @@ private: void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ABS (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 7da04e8..e25294c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -823,6 +823,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_ExtVecInRegOp(N, Lo, Hi); break; + case ISD::ABS: case ISD::BITREVERSE: case ISD::BSWAP: case ISD::CTLZ: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index fb57f5d..3aedee8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3210,6 +3210,8 @@ void SelectionDAGBuilder::visitSelect(const User &I) { ISD::NodeType OpCode = Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT; + bool IsUnaryAbs = false; + // Min/max matching is only viable if all output VTs are the same. if (is_splat(ValueVTs)) { EVT VT = ValueVTs[0]; @@ -3270,10 +3272,16 @@ void SelectionDAGBuilder::visitSelect(const User &I) { break; } break; + case SPF_ABS: + IsUnaryAbs = true; + Opc = ISD::ABS; + break; + case SPF_NABS: + // TODO: we need to produce sub(0, abs(X)). default: break; } - if (Opc != ISD::DELETED_NODE && + if (!IsUnaryAbs && Opc != ISD::DELETED_NODE && (TLI.isOperationLegalOrCustom(Opc, VT) || (UseScalarMinMax && TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) && @@ -3286,15 +3294,30 @@ void SelectionDAGBuilder::visitSelect(const User &I) { RHSVal = getValue(RHS); BaseOps = {}; } + + if (IsUnaryAbs) { + OpCode = Opc; + LHSVal = getValue(LHS); + BaseOps = {}; + } } - for (unsigned i = 0; i != NumValues; ++i) { - SmallVector Ops(BaseOps.begin(), BaseOps.end()); - Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); - Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); - Values[i] = DAG.getNode(OpCode, getCurSDLoc(), - LHSVal.getNode()->getValueType(LHSVal.getResNo()+i), - Ops); + if (IsUnaryAbs) { + for (unsigned i = 0; i != NumValues; ++i) { + Values[i] = + DAG.getNode(OpCode, getCurSDLoc(), + LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), + SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); + } + } else { + for (unsigned i = 0; i != NumValues; ++i) { + SmallVector Ops(BaseOps.begin(), BaseOps.end()); + Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); + Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); + Values[i] = DAG.getNode( + OpCode, getCurSDLoc(), + LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops); + } } setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 9748e20..c148282 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1207,6 +1207,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setPrefLoopAlignment(Subtarget->getPrefLoopAlignment()); setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); + + if (Subtarget->isThumb() || Subtarget->isThumb2()) + setTargetDAGCombine(ISD::ABS); } bool ARMTargetLowering::useSoftFloat() const { @@ -8180,6 +8183,10 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, return; case ISD::INTRINSIC_WO_CHAIN: return ReplaceLongIntrinsic(N, Results, DAG); + case ISD::ABS: + lowerABS(N, Results, DAG); + return ; + } if (Res.getNode()) Results.push_back(Res); @@ -10377,6 +10384,19 @@ static SDValue PerformAddeSubeCombine(SDNode *N, return SDValue(); } +static SDValue PerformABSCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + SDValue res; + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + if (!TLI.expandABS(N, res, DAG)) + return SDValue(); + + return res; +} + /// PerformADDECombine - Target-specific dag combine transform from /// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or /// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL @@ -12884,6 +12904,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; + case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget); case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget); case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget); case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); @@ -14395,6 +14416,39 @@ SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { SDLoc(Op)).first; } +void ARMTargetLowering::lowerABS(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const { + EVT VT = N->getValueType(0); + assert(VT == MVT::i64 && "Unexpected type (!= i64) on ABS."); + MVT HalfT = MVT::i32; + SDLoc dl(N); + SDValue Hi, Lo, Tmp; + + if (!isOperationLegalOrCustom(ISD::ADDCARRY, HalfT) || + !isOperationLegalOrCustom(ISD::UADDO, HalfT)) + return ; + + unsigned OpTypeBits = HalfT.getScalarSizeInBits(); + SDVTList VTList = DAG.getVTList(HalfT, MVT::i1); + + Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0), + DAG.getConstant(0, dl, HalfT)); + Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0), + DAG.getConstant(1, dl, HalfT)); + + Tmp = DAG.getNode(ISD::SRA, dl, HalfT, Hi, + DAG.getConstant(OpTypeBits - 1, dl, + getShiftAmountTy(HalfT, DAG.getDataLayout()))); + Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo); + Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi, + SDValue(Lo.getNode(), 1)); + Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi); + Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo); + + Results.push_back(Lo); + Results.push_back(Hi); +} + bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index cc5c310..9728383 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -698,6 +698,8 @@ class VectorType; SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + void lowerABS(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const; unsigned getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const override; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index d10f8bf..2b10036 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -551,7 +551,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // add/sub are legal for all supported vector VT's. setOperationAction(ISD::ADD, VT, Legal); setOperationAction(ISD::SUB, VT, Legal); - setOperationAction(ISD::ABS, VT, Custom); + if (VT.getSizeInBits() == 128) + setOperationAction(ISD::ABS, VT, Custom); // Vector instructions introduced in P8 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) { diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ba8848b..04c58b4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -189,10 +189,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // Integer absolute. if (Subtarget.hasCMov()) { setOperationAction(ISD::ABS , MVT::i16 , Custom); - setOperationAction(ISD::ABS , MVT::i32 , Custom); - if (Subtarget.is64Bit()) - setOperationAction(ISD::ABS , MVT::i64 , Custom); + setOperationAction(ISD::ABS , MVT::i32 , Custom); } + setOperationAction(ISD::ABS , MVT::i64 , Custom); // Funnel shifts. for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) { @@ -26790,6 +26789,31 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(Res); return; } + case ISD::ABS: { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = N->getValueType(0); + assert(VT == MVT::i64 && "Unexpected type (!= i64) on ABS."); + MVT HalfT = MVT::i32; + SDValue Lo, Hi, Tmp; + SDVTList VTList = DAG.getVTList(HalfT, MVT::i1); + + Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0), + DAG.getConstant(0, dl, HalfT)); + Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0), + DAG.getConstant(1, dl, HalfT)); + Tmp = DAG.getNode( + ISD::SRA, dl, HalfT, Hi, + DAG.getConstant(HalfT.getSizeInBits() - 1, dl, + TLI.getShiftAmountTy(HalfT, DAG.getDataLayout()))); + Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo); + Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi, + SDValue(Lo.getNode(), 1)); + Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi); + Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo); + Results.push_back(Lo); + Results.push_back(Hi); + return; + } case ISD::SETCC: { // Widen v2i32 (setcc v2f32). This is really needed for AVX512VL when // setCC result type is v2i1 because type legalzation will end up with @@ -34010,66 +34034,16 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG, return SDValue(); } -// Given a select, detect the following pattern: -// 1: %2 = zext %0 to -// 2: %3 = zext %1 to -// 3: %4 = sub nsw %2, %3 -// 4: %5 = icmp sgt %4, [0 x N] or [-1 x N] -// 5: %6 = sub nsw zeroinitializer, %4 -// 6: %7 = select %5, %4, %6 +// Given a ABS node, detect the following pattern: +// (ABS (SUB (ZERO_EXTEND a), (ZERO_EXTEND b))). // This is useful as it is the input into a SAD pattern. -static bool detectZextAbsDiff(const SDValue &Select, SDValue &Op0, - SDValue &Op1) { - // Check the condition of the select instruction is greater-than. - SDValue SetCC = Select->getOperand(0); - if (SetCC.getOpcode() != ISD::SETCC) - return false; - ISD::CondCode CC = cast(SetCC.getOperand(2))->get(); - if (CC != ISD::SETGT && CC != ISD::SETLT) - return false; - - SDValue SelectOp1 = Select->getOperand(1); - SDValue SelectOp2 = Select->getOperand(2); - - // The following instructions assume SelectOp1 is the subtraction operand - // and SelectOp2 is the negation operand. - // In the case of SETLT this is the other way around. - if (CC == ISD::SETLT) - std::swap(SelectOp1, SelectOp2); - - // The second operand of the select should be the negation of the first - // operand, which is implemented as 0 - SelectOp1. - if (!(SelectOp2.getOpcode() == ISD::SUB && - ISD::isBuildVectorAllZeros(SelectOp2.getOperand(0).getNode()) && - SelectOp2.getOperand(1) == SelectOp1)) - return false; - - // The first operand of SetCC is the first operand of the select, which is the - // difference between the two input vectors. - if (SetCC.getOperand(0) != SelectOp1) - return false; - - // In SetLT case, The second operand of the comparison can be either 1 or 0. - APInt SplatVal; - if ((CC == ISD::SETLT) && - !((ISD::isConstantSplatVector(SetCC.getOperand(1).getNode(), SplatVal) && - SplatVal.isOneValue()) || - (ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode())))) - return false; - - // In SetGT case, The second operand of the comparison can be either -1 or 0. - if ((CC == ISD::SETGT) && - !(ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode()) || - ISD::isBuildVectorAllOnes(SetCC.getOperand(1).getNode()))) - return false; - - // The first operand of the select is the difference between the two input - // vectors. - if (SelectOp1.getOpcode() != ISD::SUB) +static bool detectZextAbsDiff(const SDValue &Abs, SDValue &Op0, SDValue &Op1) { + SDValue AbsOp1 = Abs->getOperand(0); + if (AbsOp1.getOpcode() != ISD::SUB) return false; - Op0 = SelectOp1.getOperand(0); - Op1 = SelectOp1.getOperand(1); + Op0 = AbsOp1.getOperand(0); + Op1 = AbsOp1.getOperand(1); // Check if the operands of the sub are zero-extended from vectors of i8. if (Op0.getOpcode() != ISD::ZERO_EXTEND || @@ -34305,7 +34279,7 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG, // If there was a match, we want Root to be a select that is the root of an // abs-diff pattern. - if (!Root || (Root.getOpcode() != ISD::VSELECT)) + if (!Root || Root.getOpcode() != ISD::ABS) return SDValue(); // Check whether we have an abs-diff pattern feeding into the select. @@ -41644,10 +41618,10 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG, return SDValue(); // We know N is a reduction add, which means one of its operands is a phi. - // To match SAD, we need the other operand to be a vector select. - if (Op0.getOpcode() != ISD::VSELECT) + // To match SAD, we need the other operand to be a ABS. + if (Op0.getOpcode() != ISD::ABS) std::swap(Op0, Op1); - if (Op0.getOpcode() != ISD::VSELECT) + if (Op0.getOpcode() != ISD::ABS) return SDValue(); auto BuildPSADBW = [&](SDValue Op0, SDValue Op1) { @@ -41686,7 +41660,7 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG, Op0 = BuildPSADBW(SadOp0, SadOp1); // It's possible we have a sad on the other side too. - if (Op1.getOpcode() == ISD::VSELECT && + if (Op1.getOpcode() == ISD::ABS && detectZextAbsDiff(Op1, SadOp0, SadOp1)) { Op1 = BuildPSADBW(SadOp0, SadOp1); } diff --git a/llvm/test/CodeGen/AArch64/iabs.ll b/llvm/test/CodeGen/AArch64/iabs.ll index a29c157..dbcd922 100644 --- a/llvm/test/CodeGen/AArch64/iabs.ll +++ b/llvm/test/CodeGen/AArch64/iabs.ll @@ -3,9 +3,9 @@ define i8 @test_i8(i8 %a) nounwind { ; CHECK-LABEL: test_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sbfx w8, w0, #7, #1 -; CHECK-NEXT: add w9, w0, w8 -; CHECK-NEXT: eor w0, w9, w8 +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %tmp1neg = sub i8 0, %a %b = icmp sgt i8 %a, -1 @@ -16,9 +16,9 @@ define i8 @test_i8(i8 %a) nounwind { define i16 @test_i16(i16 %a) nounwind { ; CHECK-LABEL: test_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sbfx w8, w0, #15, #1 -; CHECK-NEXT: add w9, w0, w8 -; CHECK-NEXT: eor w0, w9, w8 +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %tmp1neg = sub i16 0, %a %b = icmp sgt i16 %a, -1 -- 2.7.4