ThinLTOPhase Phase,
bool DebugLogging = false);
+ /// Construct the module pipeline that performs inlining as well as
+ /// the inlining-driven cleanups.
+ ModulePassManager buildInlinerPipeline(OptimizationLevel Level,
+ ThinLTOPhase Phase,
+ bool DebugLogging = false);
+
/// Construct the core LLVM module optimization pipeline.
///
/// This pipeline focuses on optimizing the execution speed of the IR. It
return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
}
-ModulePassManager
-PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
- ThinLTOPhase Phase,
- bool DebugLogging) {
+ModulePassManager PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
+ ThinLTOPhase Phase,
+ bool DebugLogging) {
+ ModulePassManager MPM(DebugLogging);
+
+ // Now begin the main postorder CGSCC pipeline.
+ // FIXME: The current CGSCC pipeline has its origins in the legacy pass
+ // manager and trying to emulate its precise behavior. Much of this doesn't
+ // make a lot of sense and we should revisit the core CGSCC structure.
+ CGSCCPassManager MainCGPipeline(DebugLogging);
+
+ // Note: historically, the PruneEH pass was run first to deduce nounwind and
+ // generally clean up exception handling overhead. It isn't clear this is
+ // valuable as the inliner doesn't currently care whether it is inlining an
+ // invoke or a call.
+
+ // Run the inliner first. The theory is that we are walking bottom-up and so
+ // the callees have already been fully optimized, and we want to inline them
+ // into the callers so that our optimizations can reflect that.
+ // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO
+ // because it makes profile annotation in the backend inaccurate.
+ InlineParams IP = getInlineParamsFromOptLevel(Level);
+ if (Phase == ThinLTOPhase::PreLink && PGOOpt &&
+ PGOOpt->Action == PGOOptions::SampleUse)
+ IP.HotCallSiteThreshold = 0;
+ MainCGPipeline.addPass(InlinerPass(IP));
+
+ if (AttributorRun & AttributorRunOption::CGSCC)
+ MainCGPipeline.addPass(AttributorCGSCCPass());
+
+ if (PTO.Coroutines)
+ MainCGPipeline.addPass(CoroSplitPass());
+
+ // Now deduce any function attributes based in the current code.
+ MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
+
+ // When at O3 add argument promotion to the pass pipeline.
+ // FIXME: It isn't at all clear why this should be limited to O3.
+ if (Level == OptimizationLevel::O3)
+ MainCGPipeline.addPass(ArgumentPromotionPass());
+
+ // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
+ // there are no OpenMP runtime calls present in the module.
+ if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
+ MainCGPipeline.addPass(OpenMPOptPass());
+
+ // Lastly, add the core function simplification pipeline nested inside the
+ // CGSCC walk.
+ MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
+ buildFunctionSimplificationPipeline(Level, Phase, DebugLogging)));
+
+ for (auto &C : CGSCCOptimizerLateEPCallbacks)
+ C(MainCGPipeline, Level);
+
+ // We wrap the CGSCC pipeline in a devirtualization repeater. This will try
+ // to detect when we devirtualize indirect calls and iterate the SCC passes
+ // in that case to try and catch knock-on inlining or function attrs
+ // opportunities. Then we add it to the module pipeline by walking the SCCs
+ // in postorder (or bottom-up).
+ MPM.addPass(
+ createModuleToPostOrderCGSCCPassAdaptor(createDevirtSCCRepeatedPass(
+ std::move(MainCGPipeline), MaxDevirtIterations)));
+ return MPM;
+}
+
+ModulePassManager PassBuilder::buildModuleSimplificationPipeline(
+ OptimizationLevel Level, ThinLTOPhase Phase, bool DebugLogging) {
ModulePassManager MPM(DebugLogging);
bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
// the inliner pass.
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
- // Now begin the main postorder CGSCC pipeline.
- // FIXME: The current CGSCC pipeline has its origins in the legacy pass
- // manager and trying to emulate its precise behavior. Much of this doesn't
- // make a lot of sense and we should revisit the core CGSCC structure.
- CGSCCPassManager MainCGPipeline(DebugLogging);
-
- // Note: historically, the PruneEH pass was run first to deduce nounwind and
- // generally clean up exception handling overhead. It isn't clear this is
- // valuable as the inliner doesn't currently care whether it is inlining an
- // invoke or a call.
-
- // Run the inliner first. The theory is that we are walking bottom-up and so
- // the callees have already been fully optimized, and we want to inline them
- // into the callers so that our optimizations can reflect that.
- // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO
- // because it makes profile annotation in the backend inaccurate.
- InlineParams IP = getInlineParamsFromOptLevel(Level);
- if (Phase == ThinLTOPhase::PreLink && PGOOpt &&
- PGOOpt->Action == PGOOptions::SampleUse)
- IP.HotCallSiteThreshold = 0;
- MainCGPipeline.addPass(InlinerPass(IP));
-
- if (AttributorRun & AttributorRunOption::CGSCC)
- MainCGPipeline.addPass(AttributorCGSCCPass());
-
- if (PTO.Coroutines)
- MainCGPipeline.addPass(CoroSplitPass());
-
- // Now deduce any function attributes based in the current code.
- MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
-
- // When at O3 add argument promotion to the pass pipeline.
- // FIXME: It isn't at all clear why this should be limited to O3.
- if (Level == OptimizationLevel::O3)
- MainCGPipeline.addPass(ArgumentPromotionPass());
-
- // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
- // there are no OpenMP runtime calls present in the module.
- if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
- MainCGPipeline.addPass(OpenMPOptPass());
-
- // Lastly, add the core function simplification pipeline nested inside the
- // CGSCC walk.
- MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
- buildFunctionSimplificationPipeline(Level, Phase, DebugLogging)));
-
- for (auto &C : CGSCCOptimizerLateEPCallbacks)
- C(MainCGPipeline, Level);
-
- // We wrap the CGSCC pipeline in a devirtualization repeater. This will try
- // to detect when we devirtualize indirect calls and iterate the SCC passes
- // in that case to try and catch knock-on inlining or function attrs
- // opportunities. Then we add it to the module pipeline by walking the SCCs
- // in postorder (or bottom-up).
- MPM.addPass(
- createModuleToPostOrderCGSCCPassAdaptor(createDevirtSCCRepeatedPass(
- std::move(MainCGPipeline), MaxDevirtIterations)));
-
+ MPM.addPass(buildInlinerPipeline(Level, Phase, DebugLogging));
return MPM;
}
// Propagate constants at call sites into the functions they call. This
// opens opportunities for globalopt (and inlining) by substituting function
// pointers passed as arguments to direct uses of functions.
- MPM.addPass(IPSCCPPass());
+ MPM.addPass(IPSCCPPass());
- // Attach metadata to indirect call sites indicating the set of functions
- // they may target at run-time. This should follow IPSCCP.
- MPM.addPass(CalledValuePropagationPass());
+ // Attach metadata to indirect call sites indicating the set of functions
+ // they may target at run-time. This should follow IPSCCP.
+ MPM.addPass(CalledValuePropagationPass());
}
// Now deduce any function attributes based in the current code.
MODULE_PASS("rewrite-symbols", RewriteSymbolPass())
MODULE_PASS("rpo-functionattrs", ReversePostOrderFunctionAttrsPass())
MODULE_PASS("sample-profile", SampleProfileLoaderPass())
+MODULE_PASS("scc-oz-module-inliner",
+ buildInlinerPipeline(OptimizationLevel::Oz, ThinLTOPhase::None, DebugLogging))
MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass())
MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation())
MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass(nullptr, nullptr))
; CHECK-O-NEXT: Running analysis: CallGraphAnalysis
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis
+; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
+; CHECK-O-NEXT: Starting llvm::Module pass manager run.
; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
; CHECK-EP-CGSCC-LATE-NEXT: Running pass: NoOpCGSCCPass
; CHECK-O-NEXT: Finished CGSCC pass manager run.
; CHECK-O-NEXT: Finished llvm::Module pass manager run.
+; CHECK-O-NEXT: Finished llvm::Module pass manager run.
; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
; CHECK-O-NEXT: Starting llvm::Module pass manager run.
; CHECK-O-NEXT: Running pass: GlobalOptPass
; CHECK-O-NEXT: Running analysis: CallGraphAnalysis
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
; CHECK-PRELINK-O-NEXT: Running analysis: ProfileSummaryAnalysis
+; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
+; CHECK-O-NEXT: Starting llvm::Module pass manager run.
; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
; CHECK-O-NEXT: Finished llvm::Function pass manager run.
; CHECK-O-NEXT: Finished CGSCC pass manager run.
; CHECK-O-NEXT: Finished llvm::Module pass manager run.
+; CHECK-O-NEXT: Finished llvm::Module pass manager run.
; CHECK-PRELINK-O-NEXT: Running pass: GlobalOptPass
; CHECK-POSTLINK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
; CHECK-POSTLINK-O-NEXT: Starting llvm::Module pass manager run.
; CHECK-O-NEXT: Running analysis: GlobalsAA
; CHECK-O-NEXT: Running analysis: CallGraphAnalysis
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
+; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
+; CHECK-O-NEXT: Starting {{.*}}Module pass manager run.
; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
; CHECK-O-NEXT: Finished {{.*}}Function pass manager run.
; CHECK-O-NEXT: Finished CGSCC pass manager run.
; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
+; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
; CHECK-O-NEXT: Starting {{.*}}Module pass manager run.
; CHECK-O-NEXT: Running pass: GlobalOptPass
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA
; CHECK-O-NEXT: Running analysis: GlobalsAA
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
+; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
+; CHECK-O-NEXT: Starting {{.*}}Module pass manager run.
; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
; CHECK-O-NEXT: Finished {{.*}}Function pass manager run.
; CHECK-O-NEXT: Finished CGSCC pass manager run.
; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
+; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
; CHECK-O-NEXT: Starting {{.*}}Module pass manager run.
; CHECK-O-NEXT: Running pass: GlobalOptPass
; CHECK-O-NEXT: Running analysis: GlobalsAA
; CHECK-O-NEXT: Running analysis: CallGraphAnalysis
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
+; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
+; CHECK-O-NEXT: Starting {{.*}}Module pass manager run.
; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
; CHECK-O-NEXT: Finished {{.*}}Function pass manager run.
; CHECK-O-NEXT: Finished CGSCC pass manager run.
; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
+; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
; CHECK-O-NEXT: Running pass: GlobalOptPass
; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis on bar
; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis on bar
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA
; CHECK-O-NEXT: Running analysis: GlobalsAA
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
+; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
+; CHECK-O-NEXT: Starting {{.*}}Module pass manager run.
; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
; CHECK-O-NEXT: Finished {{.*}}Function pass manager run.
; CHECK-O-NEXT: Finished CGSCC pass manager run.
; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
+; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
; CHECK-O-NEXT: Running pass: GlobalOptPass
; CHECK-O-NEXT: Finished {{.*}}Module pass manager run.
; CHECK-O-NEXT: Running pass: NameAnonGlobalPass
--- /dev/null
+; modify_value will be inlined into main. With just the inliner pass, at most
+; some trivial DCE would happen, which in this case doesn't modify post-inlined
+; main much.
+; In contrast, with the full set of module inliner-related passes, at the end of
+; inlining (incl. function cleanups ran after inlining), main will be reduced to
+; a 'ret 10'
+;
+; RUN: opt -passes=inline -S < %s | FileCheck %s --check-prefix=INLINE --check-prefix=CHECK
+; RUN: opt -passes=scc-oz-module-inliner -S < %s | FileCheck %s --check-prefix=MODULE --check-prefix=CHECK
+
+define void @modify_value({i32, float}* %v) {
+ %f = getelementptr { i32, float }, { i32, float }* %v, i64 0, i32 0
+ store i32 10, i32* %f
+ ret void
+}
+
+define i32 @main() {
+ %my_val = alloca {i32, float}
+ call void @modify_value({i32, float}* %my_val)
+ %f = getelementptr { i32, float }, { i32, float }* %my_val, i64 0, i32 0
+ %ret = load i32, i32* %f
+ ret i32 %ret
+}
+
+; CHECK-LABEL: @main
+; INLINE-NEXT: %my_val = alloca
+; MODULE-NEXT: ret i32 10
\ No newline at end of file