From ce7f9cdb50a98cef5ee6e232e45e16c150c966e9 Mon Sep 17 00:00:00 2001 From: modimo Date: Mon, 25 Jan 2021 15:25:39 -0800 Subject: [PATCH] [InlineAdvisor] Allow replay of inline decisions for the CGSCC inliner from optimization remarks This change leverages the work done in D83743 to replay in the SampleProfile inliner to also be used in the CGSCC inliner. NOTE: currently restricted to non-ML advisors only. The added switch `-cgscc-inline-replay=` will replay the inlining decisions in that file where the remarks file is generated via `-Rpass=inline`. The aim here is to make it easier to analyze changes that would modify inlining heuristics to be separated from this behavior. Doing so allows easier examination of assembly and runtime behavior compared to the baseline rather than trying to dig through the large churn caused by inlining. In LTO compilation, since inlining is done twice you can separately specify replay by passing the flag to the FE (`-cgscc-inline-replay=`) and to the linker (`-Wl,cgscc-inline-replay=`) with the remarks generated from their respective places. Testing on mysqld by comparing the inline decisions between base (generates remarks.txt) and diff (replay using identical input/tools with remarks.txt) and examining the inlining sites with `diff` shows 14,000 mismatches out of 247,341 for a ~94% replay accuracy. I believe this gap can be narrowed further though for the general case we may never achieve full accuracy. For my personal use, this is close enough to be representative: I set the baseline as the one generated by the replay on identical input/toolset and compare that to my modified input/toolset using the same replay. Testing: ninja check-llvm newly added test correctly replays CGSCC inlining decisions Reviewed By: mtrofin, wenlei Differential Revision: https://reviews.llvm.org/D94334 --- llvm/include/llvm/Analysis/InlineAdvisor.h | 3 +- llvm/include/llvm/Analysis/ReplayInlineAdvisor.h | 6 +- llvm/include/llvm/Transforms/IPO/Inliner.h | 3 +- llvm/lib/Analysis/InlineAdvisor.cpp | 12 ++- llvm/lib/Analysis/ReplayInlineAdvisor.cpp | 12 +-- llvm/lib/Transforms/IPO/Inliner.cpp | 24 ++++- llvm/lib/Transforms/IPO/SampleProfile.cpp | 3 +- .../Inline/Inputs/cgscc-inline-replay.txt | 2 + llvm/test/Transforms/Inline/cgscc-inline-replay.ll | 119 +++++++++++++++++++++ 9 files changed, 167 insertions(+), 17 deletions(-) create mode 100644 llvm/test/Transforms/Inline/Inputs/cgscc-inline-replay.txt create mode 100644 llvm/test/Transforms/Inline/cgscc-inline-replay.ll diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h index bd046d8..c39fae1 100644 --- a/llvm/include/llvm/Analysis/InlineAdvisor.h +++ b/llvm/include/llvm/Analysis/InlineAdvisor.h @@ -228,7 +228,8 @@ public: // InlineAdvisor must be preserved across analysis invalidations. return false; } - bool tryCreate(InlineParams Params, InliningAdvisorMode Mode); + bool tryCreate(InlineParams Params, InliningAdvisorMode Mode, + StringRef ReplayFile); InlineAdvisor *getAdvisor() const { return Advisor.get(); } void clear() { Advisor.reset(); } diff --git a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h index 9ef572f..3018bcc2 100644 --- a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h +++ b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h @@ -25,13 +25,15 @@ class OptimizationRemarkEmitter; class ReplayInlineAdvisor : public InlineAdvisor { public: ReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, - LLVMContext &Context, StringRef RemarksFile, - bool EmitRemarks); + LLVMContext &Context, + std::unique_ptr OriginalAdvisor, + StringRef RemarksFile, bool EmitRemarks); std::unique_ptr getAdviceImpl(CallBase &CB) override; bool areReplayRemarksLoaded() const { return HasReplayRemarks; } private: StringSet<> InlineSitesFromRemarks; + std::unique_ptr OriginalAdvisor; bool HasReplayRemarks = false; bool EmitRemarks = false; }; diff --git a/llvm/include/llvm/Transforms/IPO/Inliner.h b/llvm/include/llvm/Transforms/IPO/Inliner.h index c5617ee..21ff869 100644 --- a/llvm/include/llvm/Transforms/IPO/Inliner.h +++ b/llvm/include/llvm/Transforms/IPO/Inliner.h @@ -14,6 +14,7 @@ #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/Analysis/ReplayInlineAdvisor.h" #include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h" #include "llvm/IR/PassManager.h" #include @@ -105,7 +106,7 @@ public: private: InlineAdvisor &getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM, FunctionAnalysisManager &FAM, Module &M); - std::unique_ptr OwnedDefaultAdvisor; + std::unique_ptr OwnedAdvisor; const bool OnlyMandatory; }; diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp index 0270848..9a2276a1 100644 --- a/llvm/lib/Analysis/InlineAdvisor.cpp +++ b/llvm/lib/Analysis/InlineAdvisor.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/ReplayInlineAdvisor.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -153,11 +154,19 @@ void InlineAdvice::recordInliningWithCalleeDeleted() { AnalysisKey InlineAdvisorAnalysis::Key; bool InlineAdvisorAnalysis::Result::tryCreate(InlineParams Params, - InliningAdvisorMode Mode) { + InliningAdvisorMode Mode, + StringRef ReplayFile) { auto &FAM = MAM.getResult(M).getManager(); switch (Mode) { case InliningAdvisorMode::Default: Advisor.reset(new DefaultInlineAdvisor(M, FAM, Params)); + // Restrict replay to default advisor, ML advisors are stateful so + // replay will need augmentations to interleave with them correctly. + if (!ReplayFile.empty()) { + Advisor = std::make_unique( + M, FAM, M.getContext(), std::move(Advisor), ReplayFile, + /* EmitRemarks =*/true); + } break; case InliningAdvisorMode::Development: #ifdef LLVM_HAVE_TF_API @@ -174,6 +183,7 @@ bool InlineAdvisorAnalysis::Result::tryCreate(InlineParams Params, #endif break; } + return !!Advisor; } diff --git a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp index d6595ba..b9dac2f 100644 --- a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp +++ b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp @@ -22,12 +22,12 @@ using namespace llvm; #define DEBUG_TYPE "inline-replay" -ReplayInlineAdvisor::ReplayInlineAdvisor(Module &M, - FunctionAnalysisManager &FAM, - LLVMContext &Context, - StringRef RemarksFile, - bool EmitRemarks) - : InlineAdvisor(M, FAM), HasReplayRemarks(false), EmitRemarks(EmitRemarks) { +ReplayInlineAdvisor::ReplayInlineAdvisor( + Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, + std::unique_ptr OriginalAdvisor, StringRef RemarksFile, + bool EmitRemarks) + : InlineAdvisor(M, FAM), OriginalAdvisor(std::move(OriginalAdvisor)), + HasReplayRemarks(false), EmitRemarks(EmitRemarks) { auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(RemarksFile); std::error_code EC = BufferOrErr.getError(); if (EC) { diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp index a7d7594..e91b6c9 100644 --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -92,6 +92,13 @@ static cl::opt extern cl::opt InlinerFunctionImportStats; +static cl::opt CGSCCInlineReplayFile( + "cgscc-inline-replay", cl::init(""), cl::value_desc("filename"), + cl::desc( + "Optimization remarks file containing inline remarks to be replayed " + "by inlining from cgscc inline remarks."), + cl::Hidden); + LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {} LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime) @@ -633,8 +640,8 @@ bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG, InlineAdvisor & InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM, FunctionAnalysisManager &FAM, Module &M) { - if (OwnedDefaultAdvisor) - return *OwnedDefaultAdvisor; + if (OwnedAdvisor) + return *OwnedAdvisor; auto *IAA = MAM.getCachedResult(M); if (!IAA) { @@ -646,9 +653,16 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM, // duration of the inliner pass, and thus the lifetime of the owned advisor. // The one we would get from the MAM can be invalidated as a result of the // inliner's activity. - OwnedDefaultAdvisor = + OwnedAdvisor = std::make_unique(M, FAM, getInlineParams()); - return *OwnedDefaultAdvisor; + + if (!CGSCCInlineReplayFile.empty()) + OwnedAdvisor = std::make_unique( + M, FAM, M.getContext(), std::move(OwnedAdvisor), + CGSCCInlineReplayFile, + /*EmitRemarks=*/true); + + return *OwnedAdvisor; } assert(IAA->getAdvisor() && "Expected a present InlineAdvisorAnalysis also have an " @@ -998,7 +1012,7 @@ ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params, PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M, ModuleAnalysisManager &MAM) { auto &IAA = MAM.getResult(M); - if (!IAA.tryCreate(Params, Mode)) { + if (!IAA.tryCreate(Params, Mode, CGSCCInlineReplayFile)) { M.getContext().emitError( "Could not setup Inlining Advisor for the requested " "mode and/or options"); diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 73ad42f..264ac40 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1967,7 +1967,8 @@ bool SampleProfileLoader::doInitialization(Module &M, if (FAM && !ProfileInlineReplayFile.empty()) { ExternalInlineAdvisor = std::make_unique( - M, *FAM, Ctx, ProfileInlineReplayFile, /*EmitRemarks=*/false); + M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr, ProfileInlineReplayFile, + /*EmitRemarks=*/false); if (!ExternalInlineAdvisor->areReplayRemarksLoaded()) ExternalInlineAdvisor.reset(); } diff --git a/llvm/test/Transforms/Inline/Inputs/cgscc-inline-replay.txt b/llvm/test/Transforms/Inline/Inputs/cgscc-inline-replay.txt new file mode 100644 index 0000000..3d6b588 --- /dev/null +++ b/llvm/test/Transforms/Inline/Inputs/cgscc-inline-replay.txt @@ -0,0 +1,2 @@ +remark: calls.cc:10:0: _Z3sumii inlined into main with (cost=45, threshold=337) at callsite main:3:0.1; +remark: calls.cc:4:0: _Z3subii inlined into main with (cost=-5, threshold=337) at callsite _Z3sumii:1:0 @ main:3:0.1; diff --git a/llvm/test/Transforms/Inline/cgscc-inline-replay.ll b/llvm/test/Transforms/Inline/cgscc-inline-replay.ll new file mode 100644 index 0000000..15846d4 --- /dev/null +++ b/llvm/test/Transforms/Inline/cgscc-inline-replay.ll @@ -0,0 +1,119 @@ +;; Note that this needs new pass manager for now. Passing `-cgscc-inline-replay` to legacy pass manager is a no-op. + +;; Check replay inline decisions +; RUN: opt < %s -passes=inline -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=DEFAULT %s +; RUN: opt < %s -passes=inline -cgscc-inline-replay=%S/Inputs/cgscc-inline-replay.txt -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=REPLAY %s + +@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 + +define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !6 { +entry: + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %tmp = load i32, i32* %x.addr, align 4, !dbg !8 + %tmp1 = load i32, i32* %y.addr, align 4, !dbg !8 + %add = add nsw i32 %tmp, %tmp1, !dbg !8 + %tmp2 = load i32, i32* %x.addr, align 4, !dbg !8 + %tmp3 = load i32, i32* %y.addr, align 4, !dbg !8 + %call = call i32 @_Z3subii(i32 %tmp2, i32 %tmp3), !dbg !8 + ret i32 %add, !dbg !8 +} + +define i32 @_Z3subii(i32 %x, i32 %y) #0 !dbg !9 { +entry: + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %tmp = load i32, i32* %x.addr, align 4, !dbg !10 + %tmp1 = load i32, i32* %y.addr, align 4, !dbg !10 + %add = sub nsw i32 %tmp, %tmp1, !dbg !10 + ret i32 %add, !dbg !11 +} + +define i32 @main() #0 !dbg !12 { +entry: + %retval = alloca i32, align 4 + %s = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval + store i32 0, i32* %i, align 4, !dbg !13 + br label %while.cond, !dbg !14 + +while.cond: ; preds = %if.end, %entry + %tmp = load i32, i32* %i, align 4, !dbg !15 + %inc = add nsw i32 %tmp, 1, !dbg !15 + store i32 %inc, i32* %i, align 4, !dbg !15 + %cmp = icmp slt i32 %tmp, 400000000, !dbg !15 + br i1 %cmp, label %while.body, label %while.end, !dbg !15 + +while.body: ; preds = %while.cond + %tmp1 = load i32, i32* %i, align 4, !dbg !17 + %cmp1 = icmp ne i32 %tmp1, 100, !dbg !17 + br i1 %cmp1, label %if.then, label %if.else, !dbg !17 + +if.then: ; preds = %while.body + %tmp2 = load i32, i32* %i, align 4, !dbg !19 + %tmp3 = load i32, i32* %s, align 4, !dbg !19 + %call = call i32 @_Z3sumii(i32 %tmp2, i32 %tmp3), !dbg !19 + store i32 %call, i32* %s, align 4, !dbg !19 + br label %if.end, !dbg !19 + +if.else: ; preds = %while.body + store i32 30, i32* %s, align 4, !dbg !21 + br label %if.end + +if.end: ; preds = %if.else, %if.then + br label %while.cond, !dbg !23 + +while.end: ; preds = %while.cond + %tmp4 = load i32, i32* %s, align 4, !dbg !25 + %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %tmp4), !dbg !25 + ret i32 0, !dbg !26 +} + +declare i32 @printf(i8*, ...) + +attributes #0 = { "use-sample-profile" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.5 ", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2) +!1 = !DIFile(filename: "calls.cc", directory: ".") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 1, !"Debug Info Version", i32 3} +!5 = !{!"clang version 3.5 "} +!6 = distinct !DISubprogram(name: "sum", linkageName: "_Z3sumii", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!7 = !DISubroutineType(types: !2) +!8 = !DILocation(line: 4, scope: !6) +!9 = distinct !DISubprogram(name: "sub", linkageName: "_Z3subii", scope: !1, file: !1, line: 20, type: !7, scopeLine: 20, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!10 = !DILocation(line: 20, scope: !9) +!11 = !DILocation(line: 21, scope: !9) +!12 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !7, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!13 = !DILocation(line: 8, scope: !12) +!14 = !DILocation(line: 9, scope: !12) +!15 = !DILocation(line: 9, scope: !16) +!16 = !DILexicalBlockFile(scope: !12, file: !1, discriminator: 2) +!17 = !DILocation(line: 10, scope: !18) +!18 = distinct !DILexicalBlock(scope: !12, file: !1, line: 10) +!19 = !DILocation(line: 10, scope: !20) +!20 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 2) +!21 = !DILocation(line: 10, scope: !22) +!22 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 4) +!23 = !DILocation(line: 10, scope: !24) +!24 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 6) +!25 = !DILocation(line: 11, scope: !12) +!26 = !DILocation(line: 12, scope: !12) + +; DEFAULT: _Z3subii inlined into _Z3sumii +; DEFAULT: _Z3sumii inlined into main +; DEFAULT-NOT: _Z3subii inlined into main + +; REPLAY: _Z3sumii inlined into main +; REPLAY: _Z3subii inlined into main +; REPLAY-NOT: _Z3subii inlined into _Z3sumii -- 2.7.4