From 4e9dd21015f2ed4d57d6bea08ca08a30ef6e5ce8 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Thu, 29 Sep 2022 09:00:38 -0700
Subject: [PATCH] [ModuleInliner] Add a cost-benefit-based priority

This patch teaches the module inliner a traversal order designed for
the instrumentation FDO (+ThinLTO) scenario.

The new traversal order prioritizes call sites in the following order:

1. Those call sites that are expected to reduce the caller size

2. Those call sites that have gone through the cost-benefit analaysis

3. The remaining call sites

With this fairly simple traversal order, a large internel benchmark
yields performance comparable to the bottom-up inliner -- both in
terms of the execution performance and .text* sizes.

Big thanks goes to Liqiang Tao for the module inliner infrastructure.

I still have hacks outside this patch to prevent excessively long
compilation or .text* size explosion.  I'm trying to come up with
acceptable solutions in near future.

Differential Revision: https://reviews.llvm.org/D134376
---
 llvm/lib/Analysis/InlineOrder.cpp | 86 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 81 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Analysis/InlineOrder.cpp b/llvm/lib/Analysis/InlineOrder.cpp
index 9532fb0..274096f 100644
--- a/llvm/lib/Analysis/InlineOrder.cpp
+++ b/llvm/lib/Analysis/InlineOrder.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
 
 #define DEBUG_TYPE "inline-order"
 
-enum class InlinePriorityMode : int { Size, Cost, OptRatio };
+enum class InlinePriorityMode : int { Size, Cost, CostBenefit };
 
 static cl::opt<InlinePriorityMode> UseInlinePriority(
     "inline-priority-mode", cl::init(InlinePriorityMode::Size), cl::Hidden,
@@ -30,7 +30,14 @@ static cl::opt<InlinePriorityMode> UseInlinePriority(
     cl::values(clEnumValN(InlinePriorityMode::Size, "size",
                           "Use callee size priority."),
                clEnumValN(InlinePriorityMode::Cost, "cost",
-                          "Use inline cost priority.")));
+                          "Use inline cost priority."),
+               clEnumValN(InlinePriorityMode::CostBenefit, "cost-benefit",
+                          "Use cost-benefit ratio.")));
+
+static cl::opt<int> ModuleInlinerTopPriorityThreshold(
+    "moudle-inliner-top-priority-threshold", cl::Hidden, cl::init(0),
+    cl::desc("The cost threshold for call sites that get inlined without the "
+             "cost-benefit analysis"));
 
 namespace {
 
@@ -100,6 +107,74 @@ private:
   int Cost;
 };
 
+class CostBenefitPriority {
+public:
+  CostBenefitPriority() = default;
+  CostBenefitPriority(const CallBase *CB, FunctionAnalysisManager &FAM,
+                      const InlineParams &Params) {
+    auto IC = getInlineCostWrapper(const_cast<CallBase &>(*CB), FAM, Params);
+    Cost = IC.getCost();
+    StaticBonusApplied = IC.getStaticBonusApplied();
+    CostBenefit = IC.getCostBenefit();
+  }
+
+  static bool isMoreDesirable(const CostBenefitPriority &P1,
+                              const CostBenefitPriority &P2) {
+    // We prioritize call sites in the dictionary order of the following
+    // priorities:
+    //
+    // 1. Those call sites that are expected to reduce the caller size when
+    //    inlined.  Within them, we prioritize those call sites with bigger
+    //    reduction.
+    //
+    // 2. Those call sites that have gone through the cost-benefit analysis.
+    //    Currently, they are limited to hot call sites.  Within them, we
+    //    prioritize those call sites with higher benefit-to-cost ratios.
+    //
+    // 3. Remaining call sites are prioritized according to their costs.
+
+    // We add back StaticBonusApplied to determine whether we expect the caller
+    // to shrink (even if we don't delete the callee).
+    bool P1ReducesCallerSize =
+        P1.Cost + P1.StaticBonusApplied < ModuleInlinerTopPriorityThreshold;
+    bool P2ReducesCallerSize =
+        P2.Cost + P2.StaticBonusApplied < ModuleInlinerTopPriorityThreshold;
+    if (P1ReducesCallerSize || P2ReducesCallerSize) {
+      // If one reduces the caller size while the other doesn't, then return
+      // true iff P1 reduces the caller size.
+      if (P1ReducesCallerSize != P2ReducesCallerSize)
+        return P1ReducesCallerSize;
+
+      // If they both reduce the caller size, pick the one with the smaller
+      // cost.
+      return P1.Cost < P2.Cost;
+    }
+
+    bool P1HasCB = P1.CostBenefit.has_value();
+    bool P2HasCB = P2.CostBenefit.has_value();
+    if (P1HasCB || P2HasCB) {
+      // If one has undergone the cost-benefit analysis while the other hasn't,
+      // then return true iff P1 has.
+      if (P1HasCB != P2HasCB)
+        return P1HasCB;
+
+      // If they have undergone the cost-benefit analysis, then pick the one
+      // with a higher benefit-to-cost ratio.
+      APInt LHS = P1.CostBenefit->getBenefit() * P2.CostBenefit->getCost();
+      APInt RHS = P2.CostBenefit->getBenefit() * P1.CostBenefit->getCost();
+      return LHS.ugt(RHS);
+    }
+
+    // Remaining call sites are ordered according to their costs.
+    return P1.Cost < P2.Cost;
+  }
+
+private:
+  int Cost;
+  int StaticBonusApplied;
+  Optional<CostBenefitPair> CostBenefit;
+};
+
 template <typename PriorityT>
 class PriorityInlineOrder : public InlineOrder<std::pair<CallBase *, int>> {
   using T = std::pair<CallBase *, int>;
@@ -195,9 +270,10 @@ llvm::getInlineOrder(FunctionAnalysisManager &FAM, const InlineParams &Params) {
     LLVM_DEBUG(dbgs() << "    Current used priority: Cost priority ---- \n");
     return std::make_unique<PriorityInlineOrder<CostPriority>>(FAM, Params);
 
-  default:
-    llvm_unreachable("Unsupported Inline Priority Mode");
-    break;
+  case InlinePriorityMode::CostBenefit:
+    LLVM_DEBUG(
+        dbgs() << "    Current used priority: cost-benefit priority ---- \n");
+    return std::make_unique<PriorityInlineOrder<CostBenefitPriority>>(FAM, Params);
   }
   return nullptr;
 }
-- 
2.7.4