[Inliner,OptDiag] Add hotness attribute to opt diagnostics
authorAdam Nemet <anemet@apple.com>
Wed, 10 Aug 2016 00:44:44 +0000 (00:44 +0000)
committerAdam Nemet <anemet@apple.com>
Wed, 10 Aug 2016 00:44:44 +0000 (00:44 +0000)
Summary:
The inliner not being a function pass requires the work-around of
generating the OptimizationRemarkEmitter and in turn BFI on demand.
This will go away after the new PM is ready.

BFI is only computed inside ORE if the user has requested hotness
information for optimization diagnostitics (-pass-remark-with-hotness at
the 'opt' level).  Thus there is no additional overhead without the
flag.

Reviewers: hfinkel, davidxl, eraman

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D22694

llvm-svn: 278185

llvm/include/llvm/Analysis/OptimizationDiagnosticInfo.h
llvm/include/llvm/Transforms/IPO/InlinerPass.h
llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp
llvm/lib/Transforms/IPO/Inliner.cpp
llvm/test/Transforms/Inline/optimization-remarks-with-hotness.ll [new file with mode: 0644]

index 981e5c2..0fa0cf0 100644 (file)
 #define LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H
 
 #include "llvm/ADT/Optional.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 
 namespace llvm {
-class BlockFrequencyInfo;
 class DebugLoc;
 class Function;
 class LLVMContext;
@@ -34,6 +34,19 @@ public:
   OptimizationRemarkEmitter(Function *F, BlockFrequencyInfo *BFI)
       : F(F), BFI(BFI) {}
 
+  /// \brief This variant can be used to generate ORE on demand (without the
+  /// analysis pass).
+  ///
+  /// Note that this ctor has a very different cost depending on whether
+  /// F->getContext().getDiagnosticHotnessRequested() is on or not.  If it's off
+  /// the operation is free.
+  ///
+  /// Whereas if DiagnosticHotnessRequested is on, it is fairly expensive
+  /// operation since BFI and all its required analyses are computed.  This is
+  /// for example useful for CGSCC passes that can't use function analyses
+  /// passes in the old PM.
+  OptimizationRemarkEmitter(Function *F);
+
   OptimizationRemarkEmitter(OptimizationRemarkEmitter &&Arg)
       : F(Arg.F), BFI(Arg.BFI) {}
 
@@ -149,6 +162,9 @@ private:
 
   BlockFrequencyInfo *BFI;
 
+  /// If we generate BFI on demand, we need to free it when ORE is freed.
+  std::unique_ptr<BlockFrequencyInfo> OwnedBFI;
+
   Optional<uint64_t> computeHotness(const Value *V);
 
   OptimizationRemarkEmitter(const OptimizationRemarkEmitter &) = delete;
index 8041037..de5f5d8 100644 (file)
@@ -27,6 +27,7 @@ class AssumptionCacheTracker;
 class CallSite;
 class DataLayout;
 class InlineCost;
+class OptimizationRemarkEmitter;
 class ProfileSummaryInfo;
 template <class PtrType, unsigned SmallSize> class SmallPtrSet;
 
index 5256437..e150d9d 100644 (file)
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/LLVMContext.h"
 
 using namespace llvm;
 
+OptimizationRemarkEmitter::OptimizationRemarkEmitter(Function *F)
+    : F(F), BFI(nullptr) {
+  if (!F->getContext().getDiagnosticHotnessRequested())
+    return;
+
+  // First create a dominator tree.
+  DominatorTree DT;
+  DT.recalculate(*F);
+
+  // Generate LoopInfo from it.
+  LoopInfo LI;
+  LI.analyze(DT);
+
+  // Then compute BranchProbabilityInfo.
+  BranchProbabilityInfo BPI;
+  BPI.calculate(*F, LI);
+
+  // Finally compute BFI.
+  OwnedBFI = llvm::make_unique<BlockFrequencyInfo>(*F, BPI, LI);
+  BFI = OwnedBFI.get();
+}
+
 Optional<uint64_t> OptimizationRemarkEmitter::computeHotness(const Value *V) {
   if (!BFI)
     return None;
index d676c09..c814b01 100644 (file)
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/CallSite.h"
@@ -237,11 +238,9 @@ static bool InlineCallIfPossible(
   return true;
 }
 
-static void emitAnalysis(CallSite CS, const Twine &Msg) {
-  Function *Caller = CS.getCaller();
-  LLVMContext &Ctx = Caller->getContext();
-  DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
-  emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg);
+static void emitAnalysis(CallSite CS, OptimizationRemarkEmitter &ORE,
+                         const Twine &Msg) {
+  ORE.emitOptimizationRemarkAnalysis(DEBUG_TYPE, CS.getInstruction(), Msg);
 }
 
 /// Return true if inlining of CS can block the caller from being
@@ -323,22 +322,23 @@ shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
 
 /// Return true if the inliner should attempt to inline at the given CallSite.
 static bool shouldInline(CallSite CS,
-                         function_ref<InlineCost(CallSite CS)> GetInlineCost) {
+                         function_ref<InlineCost(CallSite CS)> GetInlineCost,
+                         OptimizationRemarkEmitter &ORE) {
   InlineCost IC = GetInlineCost(CS);
 
   if (IC.isAlways()) {
     DEBUG(dbgs() << "    Inlining: cost=always"
                  << ", Call: " << *CS.getInstruction() << "\n");
-    emitAnalysis(CS, Twine(CS.getCalledFunction()->getName()) +
-                         " should always be inlined (cost=always)");
+    emitAnalysis(CS, ORE, Twine(CS.getCalledFunction()->getName()) +
+                              " should always be inlined (cost=always)");
     return true;
   }
 
   if (IC.isNever()) {
     DEBUG(dbgs() << "    NOT Inlining: cost=never"
                  << ", Call: " << *CS.getInstruction() << "\n");
-    emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() +
-                           " should never be inlined (cost=never)"));
+    emitAnalysis(CS, ORE, Twine(CS.getCalledFunction()->getName() +
+                                " should never be inlined (cost=never)"));
     return false;
   }
 
@@ -347,10 +347,10 @@ static bool shouldInline(CallSite CS,
     DEBUG(dbgs() << "    NOT Inlining: cost=" << IC.getCost()
                  << ", thres=" << (IC.getCostDelta() + IC.getCost())
                  << ", Call: " << *CS.getInstruction() << "\n");
-    emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() +
-                           " too costly to inline (cost=") +
-                         Twine(IC.getCost()) + ", threshold=" +
-                         Twine(IC.getCostDelta() + IC.getCost()) + ")");
+    emitAnalysis(CS, ORE, Twine(CS.getCalledFunction()->getName() +
+                                " too costly to inline (cost=") +
+                              Twine(IC.getCost()) + ", threshold=" +
+                              Twine(IC.getCostDelta() + IC.getCost()) + ")");
     return false;
   }
 
@@ -359,20 +359,22 @@ static bool shouldInline(CallSite CS,
     DEBUG(dbgs() << "    NOT Inlining: " << *CS.getInstruction()
                  << " Cost = " << IC.getCost()
                  << ", outer Cost = " << TotalSecondaryCost << '\n');
-    emitAnalysis(CS, Twine("Not inlining. Cost of inlining " +
-                           CS.getCalledFunction()->getName() +
-                           " increases the cost of inlining " +
-                           CS.getCaller()->getName() + " in other contexts"));
+    emitAnalysis(CS, ORE,
+                 Twine("Not inlining. Cost of inlining " +
+                       CS.getCalledFunction()->getName() +
+                       " increases the cost of inlining " +
+                       CS.getCaller()->getName() + " in other contexts"));
     return false;
   }
 
   DEBUG(dbgs() << "    Inlining: cost=" << IC.getCost()
                << ", thres=" << (IC.getCostDelta() + IC.getCost())
                << ", Call: " << *CS.getInstruction() << '\n');
-  emitAnalysis(
-      CS, CS.getCalledFunction()->getName() + Twine(" can be inlined into ") +
-              CS.getCaller()->getName() + " with cost=" + Twine(IC.getCost()) +
-              " (threshold=" + Twine(IC.getCostDelta() + IC.getCost()) + ")");
+  emitAnalysis(CS, ORE, CS.getCalledFunction()->getName() +
+                            Twine(" can be inlined into ") +
+                            CS.getCaller()->getName() + " with cost=" +
+                            Twine(IC.getCost()) + " (threshold=" +
+                            Twine(IC.getCostDelta() + IC.getCost()) + ")");
   return true;
 }
 
@@ -513,18 +515,21 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
             InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
           continue;
 
-        LLVMContext &CallerCtx = Caller->getContext();
-
         // Get DebugLoc to report. CS will be invalid after Inliner.
         DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
+        BasicBlock *Block = CS.getParent();
+        // FIXME for new PM: because of the old PM we currently generate ORE and
+        // in turn BFI on demand.  With the new PM, the ORE dependency should
+        // just become a regular analysis dependency.
+        OptimizationRemarkEmitter ORE(Caller);
 
         // If the policy determines that we should inline this function,
         // try to do so.
-        if (!shouldInline(CS, GetInlineCost)) {
-          emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
-                                       Twine(Callee->getName() +
-                                             " will not be inlined into " +
-                                             Caller->getName()));
+        if (!shouldInline(CS, GetInlineCost, ORE)) {
+          ORE.emitOptimizationRemarkMissed(DEBUG_TYPE, DLoc, Block,
+                                           Twine(Callee->getName() +
+                                                 " will not be inlined into " +
+                                                 Caller->getName()));
           continue;
         }
 
@@ -532,17 +537,17 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
         if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
                                   InlineHistoryID, InsertLifetime, AARGetter,
                                   ImportedFunctionsStats)) {
-          emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
-                                       Twine(Callee->getName() +
-                                             " will not be inlined into " +
-                                             Caller->getName()));
+          ORE.emitOptimizationRemarkMissed(DEBUG_TYPE, DLoc, Block,
+                                           Twine(Callee->getName() +
+                                                 " will not be inlined into " +
+                                                 Caller->getName()));
           continue;
         }
         ++NumInlined;
 
         // Report the inline decision.
-        emitOptimizationRemark(
-            CallerCtx, DEBUG_TYPE, *Caller, DLoc,
+        ORE.emitOptimizationRemark(
+            DEBUG_TYPE, DLoc, Block,
             Twine(Callee->getName() + " inlined into " + Caller->getName()));
 
         // If inlining this function gave us any new call sites, throw them
diff --git a/llvm/test/Transforms/Inline/optimization-remarks-with-hotness.ll b/llvm/test/Transforms/Inline/optimization-remarks-with-hotness.ll
new file mode 100644 (file)
index 0000000..9611a2d
--- /dev/null
@@ -0,0 +1,39 @@
+; RUN: opt < %s -inline -pass-remarks=inline -pass-remarks-missed=inline \
+; RUN:     -pass-remarks-analysis=inline -pass-remarks-with-hotness -S 2>&1 \
+; RUN:     | FileCheck %s
+
+; CHECK: foo should always be inlined (cost=always) (hotness: 30)
+; CHECK: foo inlined into bar (hotness: 30)
+; CHECK: foz should never be inlined (cost=never) (hotness: 30)
+; CHECK: foz will not be inlined into bar (hotness: 30)
+
+; Function Attrs: alwaysinline nounwind uwtable
+define i32 @foo() #0 !prof !1 {
+entry:
+  ret i32 4
+}
+
+; Function Attrs: noinline nounwind uwtable
+define i32 @foz() #1 !prof !2 {
+entry:
+  ret i32 2
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @bar() !prof !3 {
+entry:
+  %call = call i32 @foo()
+  %call2 = call i32 @foz()
+  %mul = mul i32 %call, %call2
+  ret i32 %mul
+}
+
+attributes #0 = { alwaysinline }
+attributes #1 = { noinline }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.5.0 "}
+!1 = !{!"function_entry_count", i64 10}
+!2 = !{!"function_entry_count", i64 20}
+!3 = !{!"function_entry_count", i64 30}