#include <cassert>
#include <cstdint>
#include <limits>
-#include <optional>
#include <string>
#include <tuple>
#include <utility>
unsigned RolledDynamicCost;
};
-struct PragmaInfo {
- PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU)
- : UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC),
- PragmaEnableUnroll(PEU) {}
- const bool UserUnrollCount;
- const bool PragmaFullUnroll;
- const unsigned PragmaCount;
- const bool PragmaEnableUnroll;
-};
-
} // end anonymous namespace
/// Figure out if the loop is worth full unrolling.
// Returns loop size estimation for unrolled loop, given the unrolling
// configuration specified by UP.
- uint64_t
- getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP,
- const unsigned CountOverwrite = 0) const {
+ uint64_t getUnrolledLoopSize(TargetTransformInfo::UnrollingPreferences &UP) {
assert(LoopSize >= UP.BEInsns &&
"LoopSize should not be less than BEInsns!");
- if (CountOverwrite)
- return static_cast<uint64_t>(LoopSize - UP.BEInsns) * CountOverwrite +
- UP.BEInsns;
- else
- return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count +
- UP.BEInsns;
+ return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
}
};
-static Optional<unsigned>
-shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
- const unsigned TripMultiple, const unsigned TripCount,
- const UnrollCostEstimator UCE,
- const TargetTransformInfo::UnrollingPreferences &UP) {
-
- // Using unroll pragma
- // 1st priority is unroll count set by "unroll-count" option.
-
- if (PInfo.UserUnrollCount) {
- if (UP.AllowRemainder &&
- UCE.getUnrolledLoopSize(UP, (unsigned)UnrollCount) < UP.Threshold)
- return (unsigned)UnrollCount;
- }
-
- // 2nd priority is unroll count set by pragma.
- if (PInfo.PragmaCount > 0) {
- if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)) &&
- UCE.getUnrolledLoopSize(UP, PInfo.PragmaCount) < PragmaUnrollThreshold)
- return PInfo.PragmaCount;
- }
-
- if (PInfo.PragmaFullUnroll && TripCount != 0) {
- if (UCE.getUnrolledLoopSize(UP, TripCount) < PragmaUnrollThreshold)
- return TripCount;
- }
- // if didn't return until here, should continue to other priorties
- return None;
-}
-
-static Optional<unsigned> shouldFullUnroll(
- Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT,
- ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
- const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
- const TargetTransformInfo::UnrollingPreferences &UP) {
-
- if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
- // When computing the unrolled size, note that BEInsns are not replicated
- // like the rest of the loop body.
- if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
- return FullUnrollTripCount;
-
- } else {
- // The loop isn't that small, but we still can fully unroll it if that
- // helps to remove a significant number of instructions.
- // To check that, run additional analysis on the loop.
- if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
- L, FullUnrollTripCount, DT, SE, EphValues, TTI,
- UP.Threshold * UP.MaxPercentThresholdBoost / 100,
- UP.MaxIterationsCountToAnalyze)) {
- unsigned Boost =
- getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
- if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
- return FullUnrollTripCount;
- }
- }
- }
- }
- return None;
-}
-
-static Optional<unsigned>
-shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
- const UnrollCostEstimator UCE,
- const TargetTransformInfo::UnrollingPreferences &UP) {
-
- unsigned count = UP.Count;
- if (TripCount) {
- if (!UP.Partial) {
- LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
- << "-unroll-allow-partial not given\n");
- count = 0;
- return count;
- }
- if (count == 0)
- count = TripCount;
- if (UP.PartialThreshold != NoThreshold) {
- // Reduce unroll count to be modulo of TripCount for partial unrolling.
- if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
- count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
- (LoopSize - UP.BEInsns);
- if (count > UP.MaxCount)
- count = UP.MaxCount;
- while (count != 0 && TripCount % count != 0)
- count--;
- if (UP.AllowRemainder && count <= 1) {
- // If there is no Count that is modulo of TripCount, set Count to
- // largest power-of-two factor that satisfies the threshold limit.
- // As we'll create fixup loop, do the type of unrolling only if
- // remainder loop is allowed.
- count = UP.DefaultUnrollRuntimeCount;
- while (count != 0 &&
- UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
- count >>= 1;
- }
- if (count < 2) {
- count = 0;
- }
- } else {
- count = TripCount;
- }
- if (count > UP.MaxCount)
- count = UP.MaxCount;
-
- LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n");
-
- return count;
- }
-
- // if didn't return until here, should continue to other priorties
- return None;
-}
// Returns true if unroll count was set explicitly.
// Calculates unroll count and writes it to UP.Count.
// Unless IgnoreUser is true, will also use metadata and command-line options
TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
UnrollCostEstimator UCE(*L, LoopSize);
- Optional<unsigned> UnrollFactor;
-
- const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
- const bool PragmaFullUnroll = hasUnrollFullPragma(L);
- const unsigned PragmaCount = unrollCountPragmaValue(L);
- const bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
-
- const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
- PragmaEnableUnroll || UserUnrollCount;
- PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,
- PragmaEnableUnroll);
// Use an explicit peel count that has been specified for testing. In this
// case it's not permitted to also specify an explicit unroll count.
if (PP.PeelCount) {
UP.Runtime = false;
return true;
}
+
// Check for explicit Count.
// 1st priority is unroll count set by "unroll-count" option.
- // 2nd priority is unroll count set by pragma.
- UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount, UCE, UP);
+ bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
+ if (UserUnrollCount) {
+ UP.Count = UnrollCount;
+ UP.AllowExpensiveTripCount = true;
+ UP.Force = true;
+ if (UP.AllowRemainder && UCE.getUnrolledLoopSize(UP) < UP.Threshold)
+ return true;
+ }
- if (UnrollFactor) {
- UP.Count = *UnrollFactor;
+ // 2nd priority is unroll count set by pragma.
+ unsigned PragmaCount = unrollCountPragmaValue(L);
+ if (PragmaCount > 0) {
+ UP.Count = PragmaCount;
+ UP.Runtime = true;
+ UP.AllowExpensiveTripCount = true;
+ UP.Force = true;
+ if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
+ UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold)
+ return true;
+ }
+ bool PragmaFullUnroll = hasUnrollFullPragma(L);
+ if (PragmaFullUnroll && TripCount != 0) {
+ UP.Count = TripCount;
+ if (UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold)
+ return false;
+ }
- if (UserUnrollCount || (PragmaCount > 0)) {
- UP.AllowExpensiveTripCount = true;
- UP.Force = true;
- }
- UP.Runtime |= (PragmaCount > 0);
- return ExplicitUnroll;
- } else {
- if (ExplicitUnroll && TripCount != 0) {
- // If the loop has an unrolling pragma, we want to be more aggressive with
- // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
- // value which is larger than the default limits.
- UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
- UP.PartialThreshold =
- std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
- }
+ bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
+ bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
+ PragmaEnableUnroll || UserUnrollCount;
+
+ if (ExplicitUnroll && TripCount != 0) {
+ // If the loop has an unrolling pragma, we want to be more aggressive with
+ // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
+ // value which is larger than the default limits.
+ UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
+ UP.PartialThreshold =
+ std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
}
// 3rd priority is full unroll count.
unsigned FullUnrollTripCount =
ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount;
UP.Count = FullUnrollTripCount;
-
- UnrollFactor =
- shouldFullUnroll(L, TTI, DT, SE, EphValues, FullUnrollTripCount, UCE, UP);
-
- // if shouldFullUnroll can do the unrolling, some side parameteres should be
- // set
- if (UnrollFactor) {
- UP.Count = *UnrollFactor;
- UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
- TripCount = FullUnrollTripCount;
- TripMultiple = UP.UpperBound ? 1 : TripMultiple;
- return ExplicitUnroll;
- } else {
- UP.Count = FullUnrollTripCount;
+ if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
+ // When computing the unrolled size, note that BEInsns are not replicated
+ // like the rest of the loop body.
+ if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
+ UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
+ return ExplicitUnroll;
+ } else {
+ // The loop isn't that small, but we still can fully unroll it if that
+ // helps to remove a significant number of instructions.
+ // To check that, run additional analysis on the loop.
+ if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
+ L, FullUnrollTripCount, DT, SE, EphValues, TTI,
+ UP.Threshold * UP.MaxPercentThresholdBoost / 100,
+ UP.MaxIterationsCountToAnalyze)) {
+ unsigned Boost =
+ getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
+ if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
+ UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
+ return ExplicitUnroll;
+ }
+ }
+ }
}
// 4th priority is loop peeling.
return ExplicitUnroll;
}
- // Before starting partial unrolling, set up.partial to true,
- // if user explicitly asked for unrolling
- if (TripCount)
- UP.Partial |= ExplicitUnroll;
-
// 5th priority is partial unrolling.
// Try partial unroll only when TripCount could be statically calculated.
- UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP);
-
- if (UnrollFactor) {
- UP.Count = *UnrollFactor;
-
- if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
- UP.Count != TripCount)
- ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE,
- "FullUnrollAsDirectedTooLarge",
- L->getStartLoc(), L->getHeader())
- << "Unable to fully unroll loop as directed by unroll pragma "
- "because "
- "unrolled size is too large.";
- });
-
+ if (TripCount) {
+ UP.Partial |= ExplicitUnroll;
+ if (!UP.Partial) {
+ LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
+ << "-unroll-allow-partial not given\n");
+ UP.Count = 0;
+ return false;
+ }
+ if (UP.Count == 0)
+ UP.Count = TripCount;
if (UP.PartialThreshold != NoThreshold) {
- if (UP.Count == 0) {
+ // Reduce unroll count to be modulo of TripCount for partial unrolling.
+ if (UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
+ UP.Count =
+ (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
+ (LoopSize - UP.BEInsns);
+ if (UP.Count > UP.MaxCount)
+ UP.Count = UP.MaxCount;
+ while (UP.Count != 0 && TripCount % UP.Count != 0)
+ UP.Count--;
+ if (UP.AllowRemainder && UP.Count <= 1) {
+ // If there is no Count that is modulo of TripCount, set Count to
+ // largest power-of-two factor that satisfies the threshold limit.
+ // As we'll create fixup loop, do the type of unrolling only if
+ // remainder loop is allowed.
+ UP.Count = UP.DefaultUnrollRuntimeCount;
+ while (UP.Count != 0 &&
+ UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
+ UP.Count >>= 1;
+ }
+ if (UP.Count < 2) {
if (PragmaEnableUnroll)
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE,
"pragma "
"because unrolled size is too large.";
});
+ UP.Count = 0;
}
+ } else {
+ UP.Count = TripCount;
}
+ if (UP.Count > UP.MaxCount)
+ UP.Count = UP.MaxCount;
+ if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
+ UP.Count != TripCount)
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE,
+ "FullUnrollAsDirectedTooLarge",
+ L->getStartLoc(), L->getHeader())
+ << "Unable to fully unroll loop as directed by unroll pragma "
+ "because "
+ "unrolled size is too large.";
+ });
+ LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count
+ << "\n");
return ExplicitUnroll;
}
assert(TripCount == 0 &&
UP.AllowExpensiveTripCount = true;
}
}
+
+ // Reduce count based on the type of unrolling and the threshold values.
UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
if (!UP.Runtime) {
LLVM_DEBUG(
using namespace ore;
- if (unrollCountPragmaValue(L) > 0 && !UP.AllowRemainder)
+ if (PragmaCount > 0 && !UP.AllowRemainder)
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE,
"DifferentUnrollCountFromDirected",