NFC: Migrate SimpleLoopUnswitch to work on InstructionCost

author Sander de Smalen <sander.desmalen@arm.com>

Tue, 2 Feb 2021 12:28:20 +0000 (12:28 +0000)

committer Sander de Smalen <sander.desmalen@arm.com>

Tue, 2 Feb 2021 13:32:44 +0000 (13:32 +0000)
author Sander de Smalen <sander.desmalen@arm.com>
Tue, 2 Feb 2021 12:28:20 +0000 (12:28 +0000)
committer Sander de Smalen <sander.desmalen@arm.com>
Tue, 2 Feb 2021 13:32:44 +0000 (13:32 +0000)
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

index 9d3c8d0..49196e8 100644 (file)
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -2399,10 +2399,10 @@ static void unswitchNontrivialInvariants(
  /// The recursive computation is memozied into the provided DT-indexed cost map
  /// to allow querying it for most nodes in the domtree without it becoming
  /// quadratic.
-static int
-computeDomSubtreeCost(DomTreeNode &N,
-                      const SmallDenseMap<BasicBlock *, int, 4> &BBCostMap,
-                      SmallDenseMap<DomTreeNode *, int, 4> &DTCostMap) {
+static InstructionCost computeDomSubtreeCost(
+    DomTreeNode &N,
+    const SmallDenseMap<BasicBlock *, InstructionCost, 4> &BBCostMap,
+    SmallDenseMap<DomTreeNode *, InstructionCost, 4> &DTCostMap) {
    // Don't accumulate cost (or recurse through) blocks not in our block cost
    // map and thus not part of the duplication cost being considered.
    auto BBCostIt = BBCostMap.find(N.getBlock());
@@ -2416,8 +2416,9 @@ computeDomSubtreeCost(DomTreeNode &N,
  
    // If not, we have to compute it. We can't use insert above and update
    // because computing the cost may insert more things into the map.
-  int Cost = std::accumulate(
-      N.begin(), N.end(), BBCostIt->second, [&](int Sum, DomTreeNode *ChildN) {
+  InstructionCost Cost = std::accumulate(
+      N.begin(), N.end(), BBCostIt->second,
+      [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
          return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
        });
    bool Inserted = DTCostMap.insert({&N, Cost}).second;
@@ -2699,7 +2700,7 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
    // subsets of the loop for duplication during unswitching.
    SmallPtrSet<const Value *, 4> EphValues;
    CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
-  SmallDenseMap<BasicBlock *, int, 4> BBCostMap;
+  SmallDenseMap<BasicBlock *, InstructionCost, 4> BBCostMap;
  
    // Compute the cost of each block, as well as the total loop cost. Also, bail
    // out if we see instructions which are incompatible with loop unswitching
@@ -2710,9 +2711,9 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
        L.getHeader()->getParent()->hasMinSize()
        ? TargetTransformInfo::TCK_CodeSize
        : TargetTransformInfo::TCK_SizeAndLatency;
-  int LoopCost = 0;
+  InstructionCost LoopCost = 0;
    for (auto *BB : L.blocks()) {
-    int Cost = 0;
+    InstructionCost Cost = 0;
      for (auto &I : *BB) {
        if (EphValues.count(&I))
          continue;
@@ -2746,14 +2747,15 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
    // This requires memoizing each dominator subtree to avoid redundant work.
    //
    // FIXME: Need to actually do the number of candidates part above.
-  SmallDenseMap<DomTreeNode *, int, 4> DTCostMap;
+  SmallDenseMap<DomTreeNode *, InstructionCost, 4> DTCostMap;
    // Given a terminator which might be unswitched, computes the non-duplicated
    // cost for that terminator.
-  auto ComputeUnswitchedCost = [&](Instruction &TI, bool FullUnswitch) {
+  auto ComputeUnswitchedCost = [&](Instruction &TI,
+                                   bool FullUnswitch) -> InstructionCost {
      BasicBlock &BB = *TI.getParent();
      SmallPtrSet<BasicBlock *, 4> Visited;
  
-    int Cost = LoopCost;
+    InstructionCost Cost = 0;
      for (BasicBlock *SuccBB : successors(&BB)) {
        // Don't count successors more than once.
        if (!Visited.insert(SuccBB).second)
@@ -2787,8 +2789,8 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
            llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
              return PredBB == &BB || DT.dominates(SuccBB, PredBB);
            })) {
-        Cost -= computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
-        assert(Cost >= 0 &&
+        Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
+        assert(Cost <= LoopCost &&
                 "Non-duplicated cost should never exceed total loop cost!");
        }
      }
@@ -2801,16 +2803,16 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
      int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
      assert(SuccessorsCount > 1 &&
             "Cannot unswitch a condition without multiple distinct successors!");
-    return Cost * (SuccessorsCount - 1);
+    return (LoopCost - Cost) * (SuccessorsCount - 1);
    };
    Instruction *BestUnswitchTI = nullptr;
-  int BestUnswitchCost = 0;
+  InstructionCost BestUnswitchCost = 0;
    ArrayRef<Value *> BestUnswitchInvariants;
    for (auto &TerminatorAndInvariants : UnswitchCandidates) {
      Instruction &TI = *TerminatorAndInvariants.first;
      ArrayRef<Value *> Invariants = TerminatorAndInvariants.second;
      BranchInst *BI = dyn_cast<BranchInst>(&TI);
-    int CandidateCost = ComputeUnswitchedCost(
+    InstructionCost CandidateCost = ComputeUnswitchedCost(
          TI, /*FullUnswitch*/ !BI || (Invariants.size() == 1 &&
                                       Invariants[0] == BI->getCondition()));
      // Calculate cost multiplier which is a tool to limit potentially
author	Sander de Smalen <sander.desmalen@arm.com>
	Tue, 2 Feb 2021 12:28:20 +0000 (12:28 +0000)
committer	Sander de Smalen <sander.desmalen@arm.com>
	Tue, 2 Feb 2021 13:32:44 +0000 (13:32 +0000)