/// The recursive computation is memozied into the provided DT-indexed cost map
/// to allow querying it for most nodes in the domtree without it becoming
/// quadratic.
-static int
-computeDomSubtreeCost(DomTreeNode &N,
- const SmallDenseMap<BasicBlock *, int, 4> &BBCostMap,
- SmallDenseMap<DomTreeNode *, int, 4> &DTCostMap) {
+static InstructionCost computeDomSubtreeCost(
+ DomTreeNode &N,
+ const SmallDenseMap<BasicBlock *, InstructionCost, 4> &BBCostMap,
+ SmallDenseMap<DomTreeNode *, InstructionCost, 4> &DTCostMap) {
// Don't accumulate cost (or recurse through) blocks not in our block cost
// map and thus not part of the duplication cost being considered.
auto BBCostIt = BBCostMap.find(N.getBlock());
// If not, we have to compute it. We can't use insert above and update
// because computing the cost may insert more things into the map.
- int Cost = std::accumulate(
- N.begin(), N.end(), BBCostIt->second, [&](int Sum, DomTreeNode *ChildN) {
+ InstructionCost Cost = std::accumulate(
+ N.begin(), N.end(), BBCostIt->second,
+ [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
});
bool Inserted = DTCostMap.insert({&N, Cost}).second;
// subsets of the loop for duplication during unswitching.
SmallPtrSet<const Value *, 4> EphValues;
CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
- SmallDenseMap<BasicBlock *, int, 4> BBCostMap;
+ SmallDenseMap<BasicBlock *, InstructionCost, 4> BBCostMap;
// Compute the cost of each block, as well as the total loop cost. Also, bail
// out if we see instructions which are incompatible with loop unswitching
L.getHeader()->getParent()->hasMinSize()
? TargetTransformInfo::TCK_CodeSize
: TargetTransformInfo::TCK_SizeAndLatency;
- int LoopCost = 0;
+ InstructionCost LoopCost = 0;
for (auto *BB : L.blocks()) {
- int Cost = 0;
+ InstructionCost Cost = 0;
for (auto &I : *BB) {
if (EphValues.count(&I))
continue;
// This requires memoizing each dominator subtree to avoid redundant work.
//
// FIXME: Need to actually do the number of candidates part above.
- SmallDenseMap<DomTreeNode *, int, 4> DTCostMap;
+ SmallDenseMap<DomTreeNode *, InstructionCost, 4> DTCostMap;
// Given a terminator which might be unswitched, computes the non-duplicated
// cost for that terminator.
- auto ComputeUnswitchedCost = [&](Instruction &TI, bool FullUnswitch) {
+ auto ComputeUnswitchedCost = [&](Instruction &TI,
+ bool FullUnswitch) -> InstructionCost {
BasicBlock &BB = *TI.getParent();
SmallPtrSet<BasicBlock *, 4> Visited;
- int Cost = LoopCost;
+ InstructionCost Cost = 0;
for (BasicBlock *SuccBB : successors(&BB)) {
// Don't count successors more than once.
if (!Visited.insert(SuccBB).second)
llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
return PredBB == &BB || DT.dominates(SuccBB, PredBB);
})) {
- Cost -= computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
- assert(Cost >= 0 &&
+ Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
+ assert(Cost <= LoopCost &&
"Non-duplicated cost should never exceed total loop cost!");
}
}
int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
assert(SuccessorsCount > 1 &&
"Cannot unswitch a condition without multiple distinct successors!");
- return Cost * (SuccessorsCount - 1);
+ return (LoopCost - Cost) * (SuccessorsCount - 1);
};
Instruction *BestUnswitchTI = nullptr;
- int BestUnswitchCost = 0;
+ InstructionCost BestUnswitchCost = 0;
ArrayRef<Value *> BestUnswitchInvariants;
for (auto &TerminatorAndInvariants : UnswitchCandidates) {
Instruction &TI = *TerminatorAndInvariants.first;
ArrayRef<Value *> Invariants = TerminatorAndInvariants.second;
BranchInst *BI = dyn_cast<BranchInst>(&TI);
- int CandidateCost = ComputeUnswitchedCost(
+ InstructionCost CandidateCost = ComputeUnswitchedCost(
TI, /*FullUnswitch*/ !BI || (Invariants.size() == 1 &&
Invariants[0] == BI->getCondition()));
// Calculate cost multiplier which is a tool to limit potentially