From eecb8c5f06149baf970fa0943e9fb9a6afe00207 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 20 Mar 2023 13:42:56 -0700 Subject: [PATCH] [SampleProfile] Use LazyCallGraph instead of CallGraph The function order in some tests had to be changed because they relied on ordering of functions returned in an SCC which is consistent but unspecified. --- llvm/lib/Transforms/IPO/SampleProfile.cpp | 52 +++++++++++----------- .../new-pm-thinlto-postlink-samplepgo-defaults.ll | 4 +- .../new-pm-thinlto-prelink-samplepgo-defaults.ll | 4 +- .../SampleProfile/profile-context-order.ll | 38 ++++++++-------- .../SampleProfile/profile-topdown-order.ll | 18 ++++---- 5 files changed, 58 insertions(+), 58 deletions(-) diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 3086cd6..ccccb37 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -35,9 +35,9 @@ #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" -#include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ReplayInlineAdvisor.h" @@ -479,7 +479,7 @@ public: bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr); bool runOnModule(Module &M, ModuleAnalysisManager *AM, - ProfileSummaryInfo *_PSI, CallGraph *CG); + ProfileSummaryInfo *_PSI, LazyCallGraph &CG); protected: bool runOnFunction(Function &F, ModuleAnalysisManager *AM); @@ -520,8 +520,8 @@ protected: void promoteMergeNotInlinedContextSamples( MapVector NonInlinedCallSites, const Function &F); - std::vector buildFunctionOrder(Module &M, CallGraph *CG); - std::unique_ptr buildProfiledCallGraph(CallGraph &CG); + std::vector buildFunctionOrder(Module &M, LazyCallGraph &CG); + std::unique_ptr buildProfiledCallGraph(Module &M); void generateMDProfMetadata(Function &F); /// Map from function name to Function *. Used to find the function from @@ -1821,7 +1821,7 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { } std::unique_ptr -SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) { +SampleProfileLoader::buildProfiledCallGraph(Module &M) { std::unique_ptr ProfiledCG; if (FunctionSamples::ProfileIsCS) ProfiledCG = std::make_unique(*ContextTracker); @@ -1831,18 +1831,17 @@ SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) { // Add all functions into the profiled call graph even if they are not in // the profile. This makes sure functions missing from the profile still // gets a chance to be processed. - for (auto &Node : CG) { - const auto *F = Node.first; - if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile")) + for (Function &F : M) { + if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile")) continue; - ProfiledCG->addProfiledFunction(FunctionSamples::getCanonicalFnName(*F)); + ProfiledCG->addProfiledFunction(FunctionSamples::getCanonicalFnName(F)); } return ProfiledCG; } std::vector -SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) { +SampleProfileLoader::buildFunctionOrder(Module &M, LazyCallGraph &CG) { std::vector FunctionOrderList; FunctionOrderList.reserve(M.size()); @@ -1850,7 +1849,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) { errs() << "WARNING: -use-profiled-call-graph ignored, should be used " "together with -sample-profile-top-down-load.\n"; - if (!ProfileTopDownLoad || CG == nullptr) { + if (!ProfileTopDownLoad) { if (ProfileMergeInlinee) { // Disable ProfileMergeInlinee if profile is not loaded in top down order, // because the profile for a function may be used for the profile @@ -1866,8 +1865,6 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) { return FunctionOrderList; } - assert(&CG->getModule() == &M); - if (UseProfiledCallGraph || (FunctionSamples::ProfileIsCS && !UseProfiledCallGraph.getNumOccurrences())) { // Use profiled call edges to augment the top-down order. There are cases @@ -1918,7 +1915,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) { // static call edges are not so important when they don't correspond to a // context in the profile. - std::unique_ptr ProfiledCG = buildProfiledCallGraph(*CG); + std::unique_ptr ProfiledCG = buildProfiledCallGraph(M); scc_iterator CGI = scc_begin(ProfiledCG.get()); while (!CGI.isAtEnd()) { auto Range = *CGI; @@ -1935,25 +1932,27 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) { ++CGI; } } else { - scc_iterator CGI = scc_begin(CG); - while (!CGI.isAtEnd()) { - for (CallGraphNode *Node : *CGI) { - auto *F = Node->getFunction(); - if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile")) - FunctionOrderList.push_back(F); + CG.buildRefSCCs(); + for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) { + for (LazyCallGraph::SCC &C : RC) { + for (LazyCallGraph::Node &N : C) { + Function &F = N.getFunction(); + if (!F.isDeclaration() && F.hasFnAttribute("use-sample-profile")) + FunctionOrderList.push_back(&F); + } } - ++CGI; } } + std::reverse(FunctionOrderList.begin(), FunctionOrderList.end()); + LLVM_DEBUG({ dbgs() << "Function processing order:\n"; - for (auto F : reverse(FunctionOrderList)) { + for (auto F : FunctionOrderList) { dbgs() << F->getName() << "\n"; } }); - std::reverse(FunctionOrderList.begin(), FunctionOrderList.end()); return FunctionOrderList; } @@ -2205,7 +2204,8 @@ void SampleProfileMatcher::detectProfileMismatch() { } bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, - ProfileSummaryInfo *_PSI, CallGraph *CG) { + ProfileSummaryInfo *_PSI, + LazyCallGraph &CG) { GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap); PSI = _PSI; @@ -2369,8 +2369,8 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M, return PreservedAnalyses::all(); ProfileSummaryInfo *PSI = &AM.getResult(M); - CallGraph &CG = AM.getResult(M); - if (!SampleLoader.runOnModule(M, &AM, PSI, &CG)) + LazyCallGraph &CG = AM.getResult(M); + if (!SampleLoader.runOnModule(M, &AM, PSI, CG)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index a275cf1..a65af44 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -46,7 +46,7 @@ ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Running pass: SampleProfileLoaderPass ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis -; CHECK-O-NEXT: Running analysis: CallGraphAnalysis +; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion ; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis @@ -76,11 +76,11 @@ ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA ; CHECK-O-NEXT: Running analysis: GlobalsAA +; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}AAManager ; CHECK-O-NEXT: Invalidating analysis: AAManager ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy -; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll index ec4e123..de3643b 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -44,7 +44,7 @@ ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Running pass: SampleProfileLoaderPass ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis -; CHECK-O-NEXT: Running analysis: CallGraphAnalysis +; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running pass: OpenMPOptPass ; CHECK-O-NEXT: Running pass: IPSCCPPass @@ -69,11 +69,11 @@ ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA ; CHECK-O-NEXT: Running analysis: GlobalsAA +; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}AAManager ; CHECK-O-NEXT: Invalidating analysis: AAManager ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy -; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}> ; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass diff --git a/llvm/test/Transforms/SampleProfile/profile-context-order.ll b/llvm/test/Transforms/SampleProfile/profile-context-order.ll index 0771345..db368bc 100644 --- a/llvm/test/Transforms/SampleProfile/profile-context-order.ll +++ b/llvm/test/Transforms/SampleProfile/profile-context-order.ll @@ -28,6 +28,25 @@ @factor = dso_local global i32 3, align 4, !dbg !0 @fp = dso_local global ptr null, align 8 +; INLINE: define dso_local i32 @_Z5funcAi +; INLINE-NOT: call i32 @_Z8funcLeafi +; NOINLINE: define dso_local i32 @_Z5funcAi +; NOINLINE: call i32 @_Z8funcLeafi +; ICALL-INLINE: define dso_local i32 @_Z5funcAi +; ICALL-INLINE: call i32 @_Z3foo +; INLINEB: define dso_local i32 @_Z5funcBi +; INLINEB-NOT: call i32 @_Z8funcLeafi +; NOINLINEB: define dso_local i32 @_Z5funcBi +; NOINLINEB: call i32 @_Z8funcLeafi +define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #0 !dbg !40 { +entry: + %add = add nsw i32 %x, 100000, !dbg !44 + %0 = load ptr, ptr @fp, align 8 + %call = call i32 %0(i32 8), !dbg !45 + %call1 = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !46 + ret i32 %call, !dbg !46 +} + define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 { entry: store ptr @_Z3fibi, ptr @fp, align 8, !dbg !25 @@ -49,25 +68,6 @@ for.body: ; preds = %for.body, %entry br i1 %cmp, label %for.cond.cleanup, label %for.body, !dbg !25 } -; INLINE: define dso_local i32 @_Z5funcAi -; INLINE-NOT: call i32 @_Z8funcLeafi -; NOINLINE: define dso_local i32 @_Z5funcAi -; NOINLINE: call i32 @_Z8funcLeafi -; ICALL-INLINE: define dso_local i32 @_Z5funcAi -; ICALL-INLINE: call i32 @_Z3foo -; INLINEB: define dso_local i32 @_Z5funcBi -; INLINEB-NOT: call i32 @_Z8funcLeafi -; NOINLINEB: define dso_local i32 @_Z5funcBi -; NOINLINEB: call i32 @_Z8funcLeafi -define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #0 !dbg !40 { -entry: - %add = add nsw i32 %x, 100000, !dbg !44 - %0 = load ptr, ptr @fp, align 8 - %call = call i32 %0(i32 8), !dbg !45 - %call1 = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !46 - ret i32 %call, !dbg !46 -} - ; INLINE: define dso_local i32 @_Z8funcLeafi ; NOINLINE: define dso_local i32 @_Z8funcLeafi ; ICALL-INLINE: define dso_local i32 @_Z8funcLeafi diff --git a/llvm/test/Transforms/SampleProfile/profile-topdown-order.ll b/llvm/test/Transforms/SampleProfile/profile-topdown-order.ll index fa24937..f85ab24 100644 --- a/llvm/test/Transforms/SampleProfile/profile-topdown-order.ll +++ b/llvm/test/Transforms/SampleProfile/profile-topdown-order.ll @@ -19,6 +19,15 @@ @factor = dso_local global i32 3, align 4, !dbg !0 @fp = dso_local global ptr null, align 8 +define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #0 !dbg !40 { +entry: + %add = add nsw i32 %x, 100000, !dbg !44 + %0 = load ptr, ptr @fp, align 8 + %call = call i32 %0(i32 8), !dbg !45 + %call1 = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !46 + ret i32 %call, !dbg !46 +} + define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 { entry: store ptr @_Z3fibi, ptr @fp, align 8, !dbg !25 @@ -40,15 +49,6 @@ for.body: ; preds = %for.body, %entry br i1 %cmp, label %for.cond.cleanup, label %for.body, !dbg !25 } -define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #0 !dbg !40 { -entry: - %add = add nsw i32 %x, 100000, !dbg !44 - %0 = load ptr, ptr @fp, align 8 - %call = call i32 %0(i32 8), !dbg !45 - %call1 = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !46 - ret i32 %call, !dbg !46 -} - ; INLINE: define dso_local i32 @_Z8funcLeafi ; NOINLINE: define dso_local i32 @_Z8funcLeafi ; ICALL-INLINE: define dso_local i32 @_Z8funcLeafi -- 2.7.4