From 4d559837e887c278d7c27274f4f6b1b78b97c00d Mon Sep 17 00:00:00 2001 From: Ali Sedaghati Date: Wed, 18 Aug 2021 11:09:04 -0700 Subject: [PATCH] [NFC] factor out unrolling decision logic Decoupling the unrolling logic into three different functions. The shouldPragmaUnroll() covers the 1st and 2nd priorities of the previous code, the shouldFullUnroll() covers the 3rd, and the shouldPartialUnroll() covers the 5th. The output of each function, Optional, could be a value for UP.Count, which means unrolling factor has been set, or None, which means decision hasn't been made yet and should try the next priority. Reviewed By: mtrofin, jdoerfert Differential Revision: https://reviews.llvm.org/D106001 --- llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 310 +++++++++++++++++--------- 1 file changed, 199 insertions(+), 111 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 34dad2a..a5a1302 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -65,6 +65,7 @@ #include #include #include +#include #include #include #include @@ -319,6 +320,16 @@ struct EstimatedUnrollCost { unsigned RolledDynamicCost; }; +struct PragmaInfo { + PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU) + : UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC), + PragmaEnableUnroll(PEU) {} + const bool UserUnrollCount; + const bool PragmaFullUnroll; + const unsigned PragmaCount; + const bool PragmaEnableUnroll; +}; + } // end anonymous namespace /// Figure out if the loop is worth full unrolling. @@ -747,13 +758,132 @@ public: // Returns loop size estimation for unrolled loop, given the unrolling // configuration specified by UP. - uint64_t getUnrolledLoopSize(TargetTransformInfo::UnrollingPreferences &UP) { + uint64_t + getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP, + const unsigned CountOverwrite = 0) const { assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!"); - return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns; + if (CountOverwrite) + return static_cast(LoopSize - UP.BEInsns) * CountOverwrite + + UP.BEInsns; + else + return static_cast(LoopSize - UP.BEInsns) * UP.Count + + UP.BEInsns; } }; +static Optional +shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo, + const unsigned TripMultiple, const unsigned TripCount, + const UnrollCostEstimator UCE, + const TargetTransformInfo::UnrollingPreferences &UP) { + + // Using unroll pragma + // 1st priority is unroll count set by "unroll-count" option. + + if (PInfo.UserUnrollCount) { + if (UP.AllowRemainder && + UCE.getUnrolledLoopSize(UP, (unsigned)UnrollCount) < UP.Threshold) + return (unsigned)UnrollCount; + } + + // 2nd priority is unroll count set by pragma. + if (PInfo.PragmaCount > 0) { + if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)) && + UCE.getUnrolledLoopSize(UP, PInfo.PragmaCount) < PragmaUnrollThreshold) + return PInfo.PragmaCount; + } + + if (PInfo.PragmaFullUnroll && TripCount != 0) { + if (UCE.getUnrolledLoopSize(UP, TripCount) < PragmaUnrollThreshold) + return TripCount; + } + // if didn't return until here, should continue to other priorties + return None; +} + +static Optional shouldFullUnroll( + Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, + ScalarEvolution &SE, const SmallPtrSetImpl &EphValues, + const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE, + const TargetTransformInfo::UnrollingPreferences &UP) { + + if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) { + // When computing the unrolled size, note that BEInsns are not replicated + // like the rest of the loop body. + if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) { + return FullUnrollTripCount; + + } else { + // The loop isn't that small, but we still can fully unroll it if that + // helps to remove a significant number of instructions. + // To check that, run additional analysis on the loop. + if (Optional Cost = analyzeLoopUnrollCost( + L, FullUnrollTripCount, DT, SE, EphValues, TTI, + UP.Threshold * UP.MaxPercentThresholdBoost / 100, + UP.MaxIterationsCountToAnalyze)) { + unsigned Boost = + getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost); + if (Cost->UnrolledCost < UP.Threshold * Boost / 100) { + return FullUnrollTripCount; + } + } + } + } + return None; +} + +static Optional +shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount, + const UnrollCostEstimator UCE, + const TargetTransformInfo::UnrollingPreferences &UP) { + + unsigned count = UP.Count; + if (TripCount) { + if (!UP.Partial) { + LLVM_DEBUG(dbgs() << " will not try to unroll partially because " + << "-unroll-allow-partial not given\n"); + count = 0; + return count; + } + if (count == 0) + count = TripCount; + if (UP.PartialThreshold != NoThreshold) { + // Reduce unroll count to be modulo of TripCount for partial unrolling. + if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold) + count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) / + (LoopSize - UP.BEInsns); + if (count > UP.MaxCount) + count = UP.MaxCount; + while (count != 0 && TripCount % count != 0) + count--; + if (UP.AllowRemainder && count <= 1) { + // If there is no Count that is modulo of TripCount, set Count to + // largest power-of-two factor that satisfies the threshold limit. + // As we'll create fixup loop, do the type of unrolling only if + // remainder loop is allowed. + count = UP.DefaultUnrollRuntimeCount; + while (count != 0 && + UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold) + count >>= 1; + } + if (count < 2) { + count = 0; + } + } else { + count = TripCount; + } + if (count > UP.MaxCount) + count = UP.MaxCount; + + LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n"); + + return count; + } + + // if didn't return until here, should continue to other priorties + return None; +} // Returns true if unroll count was set explicitly. // Calculates unroll count and writes it to UP.Count. // Unless IgnoreUser is true, will also use metadata and command-line options @@ -771,7 +901,18 @@ bool llvm::computeUnrollCount( TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) { UnrollCostEstimator UCE(*L, LoopSize); + Optional UnrollFactor; + + const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0; + const bool PragmaFullUnroll = hasUnrollFullPragma(L); + const unsigned PragmaCount = unrollCountPragmaValue(L); + const bool PragmaEnableUnroll = hasUnrollEnablePragma(L); + + const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll || + PragmaEnableUnroll || UserUnrollCount; + PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount, + PragmaEnableUnroll); // Use an explicit peel count that has been specified for testing. In this // case it's not permitted to also specify an explicit unroll count. if (PP.PeelCount) { @@ -783,47 +924,29 @@ bool llvm::computeUnrollCount( UP.Runtime = false; return true; } - // Check for explicit Count. // 1st priority is unroll count set by "unroll-count" option. - bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0; - if (UserUnrollCount) { - UP.Count = UnrollCount; - UP.AllowExpensiveTripCount = true; - UP.Force = true; - if (UP.AllowRemainder && UCE.getUnrolledLoopSize(UP) < UP.Threshold) - return true; - } - // 2nd priority is unroll count set by pragma. - unsigned PragmaCount = unrollCountPragmaValue(L); - if (PragmaCount > 0) { - UP.Count = PragmaCount; - UP.Runtime = true; - UP.AllowExpensiveTripCount = true; - UP.Force = true; - if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) && - UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold) - return true; - } - bool PragmaFullUnroll = hasUnrollFullPragma(L); - if (PragmaFullUnroll && TripCount != 0) { - UP.Count = TripCount; - if (UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold) - return false; - } + UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount, UCE, UP); - bool PragmaEnableUnroll = hasUnrollEnablePragma(L); - bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll || - PragmaEnableUnroll || UserUnrollCount; - - if (ExplicitUnroll && TripCount != 0) { - // If the loop has an unrolling pragma, we want to be more aggressive with - // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold - // value which is larger than the default limits. - UP.Threshold = std::max(UP.Threshold, PragmaUnrollThreshold); - UP.PartialThreshold = - std::max(UP.PartialThreshold, PragmaUnrollThreshold); + if (UnrollFactor) { + UP.Count = *UnrollFactor; + + if (UserUnrollCount || (PragmaCount > 0)) { + UP.AllowExpensiveTripCount = true; + UP.Force = true; + } + UP.Runtime |= (PragmaCount > 0); + return ExplicitUnroll; + } else { + if (ExplicitUnroll && TripCount != 0) { + // If the loop has an unrolling pragma, we want to be more aggressive with + // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold + // value which is larger than the default limits. + UP.Threshold = std::max(UP.Threshold, PragmaUnrollThreshold); + UP.PartialThreshold = + std::max(UP.PartialThreshold, PragmaUnrollThreshold); + } } // 3rd priority is full unroll count. @@ -853,28 +976,20 @@ bool llvm::computeUnrollCount( unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount; UP.Count = FullUnrollTripCount; - if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) { - // When computing the unrolled size, note that BEInsns are not replicated - // like the rest of the loop body. - if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) { - UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount); - return ExplicitUnroll; - } else { - // The loop isn't that small, but we still can fully unroll it if that - // helps to remove a significant number of instructions. - // To check that, run additional analysis on the loop. - if (Optional Cost = analyzeLoopUnrollCost( - L, FullUnrollTripCount, DT, SE, EphValues, TTI, - UP.Threshold * UP.MaxPercentThresholdBoost / 100, - UP.MaxIterationsCountToAnalyze)) { - unsigned Boost = - getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost); - if (Cost->UnrolledCost < UP.Threshold * Boost / 100) { - UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount); - return ExplicitUnroll; - } - } - } + + UnrollFactor = + shouldFullUnroll(L, TTI, DT, SE, EphValues, FullUnrollTripCount, UCE, UP); + + // if shouldFullUnroll can do the unrolling, some side parameteres should be + // set + if (UnrollFactor) { + UP.Count = *UnrollFactor; + UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount); + TripCount = FullUnrollTripCount; + TripMultiple = UP.UpperBound ? 1 : TripMultiple; + return ExplicitUnroll; + } else { + UP.Count = FullUnrollTripCount; } // 4th priority is loop peeling. @@ -885,39 +1000,31 @@ bool llvm::computeUnrollCount( return ExplicitUnroll; } + // Before starting partial unrolling, set up.partial to true, + // if user explicitly asked for unrolling + if (TripCount) + UP.Partial |= ExplicitUnroll; + // 5th priority is partial unrolling. // Try partial unroll only when TripCount could be statically calculated. - if (TripCount) { - UP.Partial |= ExplicitUnroll; - if (!UP.Partial) { - LLVM_DEBUG(dbgs() << " will not try to unroll partially because " - << "-unroll-allow-partial not given\n"); - UP.Count = 0; - return false; - } - if (UP.Count == 0) - UP.Count = TripCount; + UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP); + + if (UnrollFactor) { + UP.Count = *UnrollFactor; + + if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount && + UP.Count != TripCount) + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, + "FullUnrollAsDirectedTooLarge", + L->getStartLoc(), L->getHeader()) + << "Unable to fully unroll loop as directed by unroll pragma " + "because " + "unrolled size is too large."; + }); + if (UP.PartialThreshold != NoThreshold) { - // Reduce unroll count to be modulo of TripCount for partial unrolling. - if (UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold) - UP.Count = - (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) / - (LoopSize - UP.BEInsns); - if (UP.Count > UP.MaxCount) - UP.Count = UP.MaxCount; - while (UP.Count != 0 && TripCount % UP.Count != 0) - UP.Count--; - if (UP.AllowRemainder && UP.Count <= 1) { - // If there is no Count that is modulo of TripCount, set Count to - // largest power-of-two factor that satisfies the threshold limit. - // As we'll create fixup loop, do the type of unrolling only if - // remainder loop is allowed. - UP.Count = UP.DefaultUnrollRuntimeCount; - while (UP.Count != 0 && - UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold) - UP.Count >>= 1; - } - if (UP.Count < 2) { + if (UP.Count == 0) { if (PragmaEnableUnroll) ORE->emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, @@ -927,25 +1034,8 @@ bool llvm::computeUnrollCount( "pragma " "because unrolled size is too large."; }); - UP.Count = 0; } - } else { - UP.Count = TripCount; } - if (UP.Count > UP.MaxCount) - UP.Count = UP.MaxCount; - if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount && - UP.Count != TripCount) - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, - "FullUnrollAsDirectedTooLarge", - L->getStartLoc(), L->getHeader()) - << "Unable to fully unroll loop as directed by unroll pragma " - "because " - "unrolled size is too large."; - }); - LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count - << "\n"); return ExplicitUnroll; } assert(TripCount == 0 && @@ -982,8 +1072,6 @@ bool llvm::computeUnrollCount( UP.AllowExpensiveTripCount = true; } } - - // Reduce count based on the type of unrolling and the threshold values. UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount; if (!UP.Runtime) { LLVM_DEBUG( @@ -1018,7 +1106,7 @@ bool llvm::computeUnrollCount( using namespace ore; - if (PragmaCount > 0 && !UP.AllowRemainder) + if (unrollCountPragmaValue(L) > 0 && !UP.AllowRemainder) ORE->emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "DifferentUnrollCountFromDirected", -- 2.7.4