From: Sanjay Patel Date: Tue, 28 Jul 2015 23:05:48 +0000 (+0000) Subject: fix TLI's combineRepeatedFPDivisors interface to return the minimum user threshold X-Git-Tag: studio-1.4~1335 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1dd15598cf6ca4338509c90ec2f93957ba3ee229;p=platform%2Fupstream%2Fllvm.git fix TLI's combineRepeatedFPDivisors interface to return the minimum user threshold This fix was suggested as part of D11345 and is part of fixing PR24141. With this change, we can avoid walking the uses of a divisor node if the target doesn't want the combineRepeatedFPDivisors transform in the first place. There is no NFC-intended other than that. Differential Revision: http://reviews.llvm.org/D11531 llvm-svn: 243498 --- diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h index d9d9f6f..bd23b74 100644 --- a/llvm/include/llvm/Target/TargetLowering.h +++ b/llvm/include/llvm/Target/TargetLowering.h @@ -2732,10 +2732,12 @@ public: return SDValue(); } - /// Indicate whether this target prefers to combine the given number of FDIVs - /// with the same divisor. - virtual bool combineRepeatedFPDivisors(unsigned NumUsers) const { - return false; + /// Indicate whether this target prefers to combine FDIVs with the same + /// divisor. If the transform should never be done, return zero. If the + /// transform should be done, return the minimum number of divisor uses + /// that must exist. + virtual unsigned combineRepeatedFPDivisors() const { + return 0; } /// Hooks for building estimates in place of slower divisions and square diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b06d533..de26516 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8247,23 +8247,29 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { if (!DAG.getTarget().Options.UnsafeFPMath) return SDValue(); + // Skip if current node is a reciprocal. SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast(N0); - - // Skip if current node is a reciprocal. if (N0CFP && N0CFP->isExactlyValue(1.0)) return SDValue(); + // Exit early if the target does not want this transform or if there can't + // possibly be enough uses of the divisor to make the transform worthwhile. SDValue N1 = N->getOperand(1); - SmallVector Users; + unsigned MinUses = TLI.combineRepeatedFPDivisors(); + if (!MinUses || N1->use_size() < MinUses) + return SDValue(); // Find all FDIV users of the same divisor. + SmallVector Users; for (auto *U : N1->uses()) { if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) Users.push_back(U); } - if (!TLI.combineRepeatedFPDivisors(Users.size())) + // Now that we have the actual number of divisor uses, make sure it meets + // the minimum threshold specified by the target. + if (Users.size() < MinUses) return SDValue(); EVT VT = N->getValueType(0); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index be8696b..8a125f3 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9427,10 +9427,10 @@ bool AArch64TargetLowering::useLoadStackGuardNode() const { return true; } -bool AArch64TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const { +unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const { // Combine multiple FDIVs with the same divisor into multiple FMULs by the // reciprocal if there are three or more FDIVs. - return NumUsers > 2; + return 3; } TargetLoweringBase::LegalizeTypeAction diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 92f38be..7820112 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -477,7 +477,7 @@ private: SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, std::vector *Created) const override; - bool combineRepeatedFPDivisors(unsigned NumUsers) const override; + unsigned combineRepeatedFPDivisors() const override; ConstraintType getConstraintType(StringRef Constraint) const override; unsigned getRegisterByName(const char* RegName, EVT VT, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index ea5a663..e7bc903 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9131,7 +9131,7 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, return SDValue(); } -bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const { +unsigned PPCTargetLowering::combineRepeatedFPDivisors() const { // Note: This functionality is used only when unsafe-fp-math is enabled, and // on cores with reciprocal estimates (which are used when unsafe-fp-math is // enabled for division), this functionality is redundant with the default @@ -9144,12 +9144,12 @@ bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const { // one FP pipeline) for three or more FDIVs (for generic OOO cores). switch (Subtarget.getDarwinDirective()) { default: - return NumUsers > 2; + return 3; case PPC::DIR_440: case PPC::DIR_A2: case PPC::DIR_E500mc: case PPC::DIR_E5500: - return NumUsers > 1; + return 2; } } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 6e13533..cbd5add 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -853,7 +853,7 @@ namespace llvm { bool &UseOneConstNR) const override; SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps) const override; - bool combineRepeatedFPDivisors(unsigned NumUsers) const override; + unsigned combineRepeatedFPDivisors() const override; CCAssignFn *useFastISelCCs(unsigned Flag) const; }; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a99cc20..de4506d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -13308,8 +13308,8 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op, /// This is because we still need one division to calculate the reciprocal and /// then we need two multiplies by that reciprocal as replacements for the /// original divisions. -bool X86TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const { - return NumUsers > 1; +unsigned X86TargetLowering::combineRepeatedFPDivisors() const { + return 2; } static bool isAllOnes(SDValue V) { diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 3d058e8..18f823c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1124,7 +1124,7 @@ namespace llvm { unsigned &RefinementSteps) const override; /// Reassociate floating point divisions into multiply by reciprocal. - bool combineRepeatedFPDivisors(unsigned NumUsers) const override; + unsigned combineRepeatedFPDivisors() const override; }; namespace X86 {