From 67fc52f06743a1437c829577b721c061cb004090 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Wed, 17 Aug 2016 02:56:20 +0000 Subject: [PATCH] [PM] Port the always inliner to the new pass manager in a much more minimal and boring form than the old pass manager's version. This pass does the very minimal amount of work necessary to inline functions declared as always-inline. It doesn't support a wide array of things that the legacy pass manager did support, but is alse ... about 20 lines of code. So it has that going for it. Notably things this doesn't support: - Array alloca merging - To support the above, bottom-up inlining with careful history tracking and call graph updates - DCE of the functions that become dead after this inlining. - Inlining through call instructions with the always_inline attribute. Instead, it focuses on inlining functions with that attribute. The first I've omitted because I'm hoping to just turn it off for the primary pass manager. If that doesn't pan out, I can add it here but it will be reasonably expensive to do so. The second should really be handled by running global-dce after the inliner. I don't want to re-implement the non-trivial logic necessary to do comdat-correct DCE of functions. This means the -O0 pipeline will have to be at least 'always-inline,global-dce', but that seems reasonable to me. If others are seriously worried about this I'd like to hear about it and understand why. Again, this is all solveable by factoring that logic into a utility and calling it here, but I'd like to wait to do that until there is a clear reason why the existing pass-based factoring won't work. The final point is a serious one. I can fairly easily add support for this, but it seems both costly and a confusing construct for the use case of the always inliner running at -O0. This attribute can of course still impact the normal inliner easily (although I find that a questionable re-use of the same attribute). I've started a discussion to sort out what semantics we want here and based on that can figure out if it makes sense ta have this complexity at O0 or not. One other advantage of this design is that it should be quite a bit faster due to checking for whether the function is a viable candidate for inlining exactly once per function instead of doing it for each call site. Anyways, hopefully a reasonable starting point for this pass. Differential Revision: https://reviews.llvm.org/D23299 llvm-svn: 278896 --- llvm/include/llvm/InitializePasses.h | 2 +- llvm/include/llvm/LinkAllPasses.h | 3 +- llvm/include/llvm/Transforms/IPO.h | 6 --- llvm/include/llvm/Transforms/IPO/AlwaysInliner.h | 40 +++++++++++++++ llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassRegistry.def | 1 + llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 3 +- .../IPO/{InlineAlways.cpp => AlwaysInliner.cpp} | 57 ++++++++++++++++------ llvm/lib/Transforms/IPO/CMakeLists.txt | 2 +- llvm/lib/Transforms/IPO/IPO.cpp | 5 +- llvm/lib/Transforms/Utils/InlineFunction.cpp | 2 +- llvm/test/Transforms/Inline/always-inline.ll | 29 ++++++++--- llvm/tools/bugpoint/bugpoint.cpp | 3 +- llvm/tools/opt/opt.cpp | 3 +- 14 files changed, 120 insertions(+), 37 deletions(-) create mode 100644 llvm/include/llvm/Transforms/IPO/AlwaysInliner.h rename llvm/lib/Transforms/IPO/{InlineAlways.cpp => AlwaysInliner.cpp} (62%) diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 2858723..a411273 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -66,7 +66,7 @@ void initializeAddressSanitizerModulePass(PassRegistry&); void initializeAddressSanitizerPass(PassRegistry&); void initializeAliasSetPrinterPass(PassRegistry&); void initializeAlignmentFromAssumptionsPass(PassRegistry&); -void initializeAlwaysInlinerPass(PassRegistry&); +void initializeAlwaysInlinerLegacyPassPass(PassRegistry&); void initializeArgPromotionPass(PassRegistry&); void initializeAssumptionCacheTrackerPass(PassRegistry &); void initializeAtomicExpandPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index b2721d0..16c5f5c 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -39,6 +39,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/ObjCARC.h" @@ -97,7 +98,7 @@ namespace { (void) llvm::createInstrProfilingLegacyPass(); (void) llvm::createFunctionImportPass(); (void) llvm::createFunctionInliningPass(); - (void) llvm::createAlwaysInlinerPass(); + (void) llvm::createAlwaysInlinerLegacyPass(); (void) llvm::createGlobalDCEPass(); (void) llvm::createGlobalOptimizerPass(); (void) llvm::createGlobalsAAWrapperPass(); diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h index 3fe7115..9ef3881 100644 --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -107,12 +107,6 @@ Pass *createFunctionInliningPass(unsigned OptLevel, unsigned SizeOptLevel); Pass *createFunctionInliningPass(InlineParams &Params); //===----------------------------------------------------------------------===// -/// createAlwaysInlinerPass - Return a new pass object that inlines only -/// functions that are marked as "always_inline". -Pass *createAlwaysInlinerPass(); -Pass *createAlwaysInlinerPass(bool InsertLifetime); - -//===----------------------------------------------------------------------===// /// createPruneEHPass - Return a new pass object which transforms invoke /// instructions into calls, if the callee can _not_ unwind the stack. /// diff --git a/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h b/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h new file mode 100644 index 0000000..15c8035 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h @@ -0,0 +1,40 @@ +//===-- AlwaysInliner.h - Pass to inline "always_inline" functions --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Provides passes to inlining "always_inline" functions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_ALWAYSINLINER_H +#define LLVM_TRANSFORMS_IPO_ALWAYSINLINER_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// Inlines functions marked as "always_inline". +/// +/// Note that this does not inline call sites marked as always_inline and does +/// not delete the functions even when all users are inlined. The normal +/// inliner should be used to handle call site inlining, this pass's goal is to +/// be the simplest possible pass to remove always_inline function definitions' +/// uses by inlining them. The \c GlobalDCE pass can be used to remove these +/// functions once all users are gone. +struct AlwaysInlinerPass : PassInfoMixin { + PreservedAnalyses run(Module &M, ModuleAnalysisManager &); +}; + +/// Create a legacy pass manager instance of a pass to inline and remove +/// functions marked as "always_inline". +Pass *createAlwaysInlinerLegacyPass(bool InsertLifetime = true); + +} + +#endif // LLVM_TRANSFORMS_IPO_ALWAYSINLINER_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 24d38a1..cdbc2a2 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -59,6 +59,7 @@ #include "llvm/Support/Regex.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/GCOVProfiler.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/ConstantMerge.h" #include "llvm/Transforms/IPO/CrossDSOCFI.h" #include "llvm/Transforms/IPO/DeadArgumentElimination.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 32dd570..2b45345 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -38,6 +38,7 @@ MODULE_ALIAS_ANALYSIS("globals-aa", GlobalsAA()) #ifndef MODULE_PASS #define MODULE_PASS(NAME, CREATE_PASS) #endif +MODULE_PASS("always-inline", AlwaysInlinerPass()) MODULE_PASS("constmerge", ConstantMergePass()) MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass()) MODULE_PASS("deadargelim", DeadArgumentEliminationPass()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 6e2455a..f782ea3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Vectorize.h" @@ -360,7 +361,7 @@ void AMDGPUPassConfig::addIRPasses() { // Function calls are not supported, so make sure we inline everything. addPass(createAMDGPUAlwaysInlinePass()); - addPass(createAlwaysInlinerPass()); + addPass(createAlwaysInlinerLegacyPass()); // We need to add the barrier noop pass, otherwise adding the function // inlining pass will cause all of the PassConfigs passes to be run // one function at a time, which means if we have a nodule with two diff --git a/llvm/lib/Transforms/IPO/InlineAlways.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp similarity index 62% rename from llvm/lib/Transforms/IPO/InlineAlways.cpp rename to llvm/lib/Transforms/IPO/AlwaysInliner.cpp index ddb7c87..de059b6 100644 --- a/llvm/lib/Transforms/IPO/InlineAlways.cpp +++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp @@ -12,7 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" @@ -25,25 +26,51 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/InlinerPass.h" +#include "llvm/Transforms/Utils/Cloning.h" using namespace llvm; #define DEBUG_TYPE "inline" +PreservedAnalyses AlwaysInlinerPass::run(Module &M, ModuleAnalysisManager &) { + InlineFunctionInfo IFI; + SmallSetVector Calls; + bool Changed = false; + for (Function &F : M) + if (!F.isDeclaration() && F.hasFnAttribute(Attribute::AlwaysInline) && + isInlineViable(F)) { + Calls.clear(); + + for (User *U : F.users()) + if (auto CS = CallSite(U)) + if (CS.getCalledFunction() == &F) + Calls.insert(CS); + + for (CallSite CS : Calls) + // FIXME: We really shouldn't be able to fail to inline at this point! + // We should do something to log or check the inline failures here. + Changed |= InlineFunction(CS, IFI); + } + + return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} + namespace { -/// \brief Inliner pass which only handles "always inline" functions. -class AlwaysInliner : public Inliner { +/// Inliner pass which only handles "always inline" functions. +/// +/// Unlike the \c AlwaysInlinerPass, this uses the more heavyweight \c Inliner +/// base class to provide several facilities such as array alloca merging. +class AlwaysInlinerLegacyPass : public Inliner { public: - AlwaysInliner() : Inliner(ID, /*InsertLifetime*/ true) { - initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); + AlwaysInlinerLegacyPass() : Inliner(ID, /*InsertLifetime*/ true) { + initializeAlwaysInlinerLegacyPassPass(*PassRegistry::getPassRegistry()); } - AlwaysInliner(bool InsertLifetime) : Inliner(ID, InsertLifetime) { - initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); + AlwaysInlinerLegacyPass(bool InsertLifetime) : Inliner(ID, InsertLifetime) { + initializeAlwaysInlinerLegacyPassPass(*PassRegistry::getPassRegistry()); } /// Main run interface method. We override here to avoid calling skipSCC(). @@ -60,20 +87,18 @@ public: }; } -char AlwaysInliner::ID = 0; -INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline", +char AlwaysInlinerLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(AlwaysInlinerLegacyPass, "always-inline", "Inliner for always_inline functions", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(AlwaysInliner, "always-inline", +INITIALIZE_PASS_END(AlwaysInlinerLegacyPass, "always-inline", "Inliner for always_inline functions", false, false) -Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); } - -Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) { - return new AlwaysInliner(InsertLifetime); +Pass *llvm::createAlwaysInlinerLegacyPass(bool InsertLifetime) { + return new AlwaysInlinerLegacyPass(InsertLifetime); } /// \brief Get the inline cost for the always-inliner. @@ -88,7 +113,7 @@ Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) { /// computed here, but as we only expect to do this for relatively few and /// small functions which have the explicit attribute to force inlining, it is /// likely not worth it in practice. -InlineCost AlwaysInliner::getInlineCost(CallSite CS) { +InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallSite CS) { Function *Callee = CS.getCalledFunction(); // Only inline direct calls to functions with always-inline attributes diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt index d6782c7..341ce55 100644 --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_library(LLVMipo + AlwaysInliner.cpp ArgumentPromotion.cpp BarrierNoopPass.cpp ConstantMerge.cpp @@ -14,7 +15,6 @@ add_llvm_library(LLVMipo IPConstantPropagation.cpp IPO.cpp InferFunctionAttrs.cpp - InlineAlways.cpp InlineSimple.cpp Inliner.cpp Internalize.cpp diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp index 181875e..58b89b2 100644 --- a/llvm/lib/Transforms/IPO/IPO.cpp +++ b/llvm/lib/Transforms/IPO/IPO.cpp @@ -18,6 +18,7 @@ #include "llvm/InitializePasses.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" using namespace llvm; @@ -32,7 +33,7 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeGlobalDCELegacyPassPass(Registry); initializeGlobalOptLegacyPassPass(Registry); initializeIPCPPass(Registry); - initializeAlwaysInlinerPass(Registry); + initializeAlwaysInlinerLegacyPassPass(Registry); initializeSimpleInlinerPass(Registry); initializeInferFunctionAttrsLegacyPassPass(Registry); initializeInternalizeLegacyPassPass(Registry); @@ -82,7 +83,7 @@ void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) { } void LLVMAddAlwaysInlinerPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(llvm::createAlwaysInlinerPass()); + unwrap(PM)->add(llvm::createAlwaysInlinerLegacyPass()); } void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM) { diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index da04c68..7573467 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1053,7 +1053,7 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, /// If the inlined function has non-byval align arguments, then /// add @llvm.assume-based alignment assumptions to preserve this information. static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { - if (!PreserveAlignmentAssumptions) + if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache) return; AssumptionCache *AC = IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*CS.getCaller()) diff --git a/llvm/test/Transforms/Inline/always-inline.ll b/llvm/test/Transforms/Inline/always-inline.ll index 5ad1bde..0378121 100644 --- a/llvm/test/Transforms/Inline/always-inline.ll +++ b/llvm/test/Transforms/Inline/always-inline.ll @@ -1,8 +1,12 @@ -; RUN: opt < %s -inline-threshold=0 -always-inline -S | FileCheck %s +; RUN: opt < %s -inline-threshold=0 -always-inline -S | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CALL ; ; Ensure the threshold has no impact on these decisions. -; RUN: opt < %s -inline-threshold=20000000 -always-inline -S | FileCheck %s -; RUN: opt < %s -inline-threshold=-20000000 -always-inline -S | FileCheck %s +; RUN: opt < %s -inline-threshold=20000000 -always-inline -S | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CALL +; RUN: opt < %s -inline-threshold=-20000000 -always-inline -S | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CALL +; +; The new pass manager doesn't re-use any threshold based infrastructure for +; the always inliner, but test that we get the correct result. +; RUN: opt < %s -passes=always-inline -S | FileCheck %s --check-prefix=CHECK define i32 @inner1() alwaysinline { ret i32 1 @@ -126,10 +130,23 @@ define i32 @inner7() { ret i32 1 } define i32 @outer7() { -; CHECK-LABEL: @outer7( -; CHECK-NOT: call -; CHECK: ret +; CHECK-CALL-LABEL: @outer7( +; CHECK-CALL-NOT: call +; CHECK-CALL: ret %r = call i32 @inner7() alwaysinline ret i32 %r } + +define float* @inner8(float* nocapture align 128 %a) alwaysinline { + ret float* %a +} +define float @outer8(float* nocapture %a) { +; CHECK-LABEL: @outer8( +; CHECK-NOT: call float* @inner8 +; CHECK: ret + + %inner_a = call float* @inner8(float* %a) + %f = load float, float* %inner_a, align 4 + ret float %f +} diff --git a/llvm/tools/bugpoint/bugpoint.cpp b/llvm/tools/bugpoint/bugpoint.cpp index 2c9fdaf..25c0e41 100644 --- a/llvm/tools/bugpoint/bugpoint.cpp +++ b/llvm/tools/bugpoint/bugpoint.cpp @@ -27,6 +27,7 @@ #include "llvm/Support/Process.h" #include "llvm/Support/Signals.h" #include "llvm/Support/Valgrind.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" //Enable this macro to debug bugpoint itself. @@ -179,7 +180,7 @@ int main(int argc, char **argv) { if (OptLevelO1 || OptLevelO2 || OptLevelO3) { PassManagerBuilder Builder; if (OptLevelO1) - Builder.Inliner = createAlwaysInlinerPass(); + Builder.Inliner = createAlwaysInlinerLegacyPass(); else if (OptLevelOs || OptLevelO2) Builder.Inliner = createFunctionInliningPass(2, OptLevelOs ? 1 : 0); else diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index 4aa21db..0297682 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -51,6 +51,7 @@ #include "llvm/Support/ToolOutputFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Coroutines.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/Cloning.h" #include @@ -259,7 +260,7 @@ static void AddOptimizationPasses(legacy::PassManagerBase &MPM, } else if (OptLevel > 1) { Builder.Inliner = createFunctionInliningPass(OptLevel, SizeLevel); } else { - Builder.Inliner = createAlwaysInlinerPass(); + Builder.Inliner = createAlwaysInlinerLegacyPass(); } Builder.DisableUnitAtATime = !UnitAtATime; Builder.DisableUnrollLoops = (DisableLoopUnrolling.getNumOccurrences() > 0) ? -- 2.7.4