From: Rong Xu Date: Tue, 26 Feb 2019 22:37:46 +0000 (+0000) Subject: [PGO] Context sensitive PGO (part 1) X-Git-Tag: llvmorg-9.0.0-rc1~11195 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=35d2d51369b4ddc03893c10082a47cab4a2185e5;p=platform%2Fupstream%2Fllvm.git [PGO] Context sensitive PGO (part 1) Current PGO profile counts are not context sensitive. The branch probabilities for the inlined functions are kept the same for all call-sites, and they might be very different from the actual branch probabilities. These suboptimal profiles can greatly affect some downstream optimizations, in particular for the machine basic block placement optimization. In this patch, we propose to have a post-inline PGO instrumentation/use pass, which we called Context Sensitive PGO (CSPGO). For the users who want the best possible performance, they can perform a second round of PGO instrument/use on the top of the regular PGO. They will have two sets of profile counts. The first pass profile will be manly for inline, indirect-call promotion, and CGSCC simplification pass optimizations. The second pass profile is for post-inline optimizations and code-gen optimizations. A typical usage: // Regular PGO instrumentation and generate pass1 profile. > clang -O2 -fprofile-generate source.c -o gen > ./gen > llvm-profdata merge default.*profraw -o pass1.profdata // CSPGO instrumentation. > clang -O2 -fprofile-use=pass1.profdata -fcs-profile-generate -o gen2 > ./gen2 // Merge two sets of profiles > llvm-profdata merge default.*profraw pass1.profdata -o profile.profdata // Use the combined profile. Pass manager will invoke two PGO use passes. > clang -O2 -fprofile-use=profile.profdata -o use This change touches many components in the compiler. The reviewed patch (D54175) will committed in phrases. Differential Revision: https://reviews.llvm.org/D54175 llvm-svn: 354930 --- diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index acc7065de98b..9b549b07044f 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -299,6 +299,7 @@ void initializePEIPass(PassRegistry&); void initializePGOIndirectCallPromotionLegacyPassPass(PassRegistry&); void initializePGOInstrumentationGenLegacyPassPass(PassRegistry&); void initializePGOInstrumentationUseLegacyPassPass(PassRegistry&); +void initializePGOInstrumentationGenCreateVarLegacyPassPass(PassRegistry&); void initializePGOMemOPSizeOptLegacyPassPass(PassRegistry&); void initializePHIEliminationPass(PassRegistry&); void initializePartialInlinerLegacyPassPass(PassRegistry&); diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h index f0e1b1d16490..bf8cc98ac4da 100644 --- a/llvm/include/llvm/LTO/Config.h +++ b/llvm/include/llvm/LTO/Config.h @@ -55,6 +55,9 @@ struct Config { /// Disable entirely the optimizer, including importing for ThinLTO bool CodeGenOnly = false; + /// Run PGO context sensitive IR instrumentation. + bool RunCSIRInstr = false; + /// If this field is set, the set of passes run in the middle-end optimizer /// will be the one specified by the string. Only works with the new pass /// manager as the old one doesn't have this ability. @@ -73,6 +76,9 @@ struct Config { /// with this triple. std::string DefaultTriple; + /// Context Sensitive PGO profile path. + std::string CSIRProfile; + /// Sample PGO profile path. std::string SampleProfile; diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index ec81b1618cc5..fb3b19e2dab9 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -102,6 +102,7 @@ namespace { (void) llvm::createGCOVProfilerPass(); (void) llvm::createPGOInstrumentationGenLegacyPass(); (void) llvm::createPGOInstrumentationUseLegacyPass(); + (void) llvm::createPGOInstrumentationGenCreateVarLegacyPass(); (void) llvm::createPGOIndirectCallPromotionLegacyPass(); (void) llvm::createPGOMemOPSizeOptLegacyPass(); (void) llvm::createInstrProfilingLegacyPass(); diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 9ea1b9bd2fe3..1d55a8ea2c64 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -767,10 +767,20 @@ struct NamedInstrProfRecord : InstrProfRecord { StringRef Name; uint64_t Hash; + // We reserve this bit as the flag for context sensitive profile record. + static const int CS_FLAG_IN_FUNC_HASH = 60; + NamedInstrProfRecord() = default; NamedInstrProfRecord(StringRef Name, uint64_t Hash, std::vector Counts) : InstrProfRecord(std::move(Counts)), Name(Name), Hash(Hash) {} + + static bool hasCSFlagInHash(uint64_t FuncHash) { + return ((FuncHash >> CS_FLAG_IN_FUNC_HASH) & 1); + } + static void setCSFlagInHash(uint64_t &FuncHash) { + FuncHash |= ((uint64_t)1 << CS_FLAG_IN_FUNC_HASH); + } }; uint32_t InstrProfRecord::getNumValueKinds() const { @@ -1004,6 +1014,8 @@ namespace RawInstrProf { // from control data struct is changed from raw pointer to Name's MD5 value. // Version 4: ValueDataBegin and ValueDataSizes fields are removed from the // raw header. +// Version 5: Bit 60 of FuncHash is reserved for the flag for the context +// sensitive records. const uint64_t Version = INSTR_PROF_RAW_VERSION; template inline uint64_t getMagic(); @@ -1040,6 +1052,10 @@ struct Header { void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart, int64_t &RangeLast); +// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime +// aware this is an ir_level profile so it can set the version flag. +void createIRLevelProfileFlagVar(Module &M, bool IsCS); + // Create the variable for the profile file name. void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput); diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc index e1e2df55699a..19d465129258 100644 --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -635,10 +635,12 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, * version for other variants of profile. We set the lowest bit of the upper 8 * bits (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentaiton * generated profile, and 0 if this is a Clang FE generated profile. + * 1 in bit 57 indicates there are context-sensitive records in the profile. */ #define VARIANT_MASKS_ALL 0xff00000000000000ULL #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL) #define VARIANT_MASK_IR_PROF (0x1ULL << 56) +#define VARIANT_MASK_CSIR_PROF (0x1ULL << 57) #define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version #define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime diff --git a/llvm/include/llvm/Transforms/Instrumentation.h b/llvm/include/llvm/Transforms/Instrumentation.h index ff0ebf75245b..9eea552b238d 100644 --- a/llvm/include/llvm/Transforms/Instrumentation.h +++ b/llvm/include/llvm/Transforms/Instrumentation.h @@ -87,10 +87,14 @@ struct GCOVOptions { ModulePass *createGCOVProfilerPass(const GCOVOptions &Options = GCOVOptions::getDefault()); -// PGO Instrumention -ModulePass *createPGOInstrumentationGenLegacyPass(); +// PGO Instrumention. Parameter IsCS indicates if this is the context senstive +// instrumentation. +ModulePass *createPGOInstrumentationGenLegacyPass(bool IsCS = false); ModulePass * -createPGOInstrumentationUseLegacyPass(StringRef Filename = StringRef("")); +createPGOInstrumentationUseLegacyPass(StringRef Filename = StringRef(""), + bool IsCS = false); +ModulePass *createPGOInstrumentationGenCreateVarLegacyPass( + StringRef CSInstrName = StringRef("")); ModulePass *createPGOIndirectCallPromotionLegacyPass(bool InLTO = false, bool SamplePGO = false); FunctionPass *createPGOMemOPSizeOptLegacyPass(); @@ -132,15 +136,19 @@ struct InstrProfOptions { // Use atomic profile counter increments. bool Atomic = false; + // Use BFI to guide register promotion + bool UseBFIInPromotion = false; + // Name of the profile file to use as output std::string InstrProfileOutput; InstrProfOptions() = default; }; -/// Insert frontend instrumentation based profiling. +/// Insert frontend instrumentation based profiling. Parameter IsCS indicates if +// this is the context senstive instrumentation. ModulePass *createInstrProfilingLegacyPass( - const InstrProfOptions &Options = InstrProfOptions()); + const InstrProfOptions &Options = InstrProfOptions(), bool IsCS = false); FunctionPass *createHWAddressSanitizerPass(bool CompileKernel = false, bool Recover = false); diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h index 4fad6506810b..4428be7e4d48 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h +++ b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h @@ -35,7 +35,8 @@ using LoadStorePair = std::pair; class InstrProfiling : public PassInfoMixin { public: InstrProfiling() = default; - InstrProfiling(const InstrProfOptions &Options) : Options(Options) {} + InstrProfiling(const InstrProfOptions &Options, bool IsCS) + : Options(Options), IsCS(IsCS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); bool run(Module &M, const TargetLibraryInfo &TLI); @@ -60,6 +61,9 @@ private: GlobalVariable *NamesVar; size_t NamesSize; + // Is this lowering for the context-sensitive instrumentation. + bool IsCS; + // vector of counter load/store pairs to be register promoted. std::vector PromotionCandidates; diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h b/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h index 4f22e28232e7..e59d1ba64cfe 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h +++ b/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h @@ -17,6 +17,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/IR/PassManager.h" +#include "llvm/ProfileData/InstrProf.h" #include #include @@ -26,23 +27,51 @@ class Function; class Instruction; class Module; +/// The instrumentation (profile-instr-gen) pass for IR based PGO. +// We use this pass to create COMDAT profile variables for context +// sensitive PGO (CSPGO). The reason to have a pass for this is CSPGO +// can be run after LTO/ThinLTO linking. Lld linker needs to see +// all the COMDAT variables before linking. So we have this pass +// always run before linking for CSPGO. +class PGOInstrumentationGenCreateVar + : public PassInfoMixin { +public: + PGOInstrumentationGenCreateVar(std::string CSInstrName = "") + : CSInstrName(CSInstrName) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) { + createProfileFileNameVar(M, CSInstrName); + createIRLevelProfileFlagVar(M, /* IsCS */ true); + return PreservedAnalyses::all(); + } + +private: + std::string CSInstrName; +}; + /// The instrumentation (profile-instr-gen) pass for IR based PGO. class PGOInstrumentationGen : public PassInfoMixin { public: + PGOInstrumentationGen(bool IsCS = false) : IsCS(IsCS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + +private: + // If this is a context sensitive instrumentation. + bool IsCS; }; /// The profile annotation (profile-instr-use) pass for IR based PGO. class PGOInstrumentationUse : public PassInfoMixin { public: PGOInstrumentationUse(std::string Filename = "", - std::string RemappingFilename = ""); + std::string RemappingFilename = "", bool IsCS = false); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); private: std::string ProfileFileName; std::string ProfileRemappingFileName; + // If this is a context sensitive instrumentation. + bool IsCS; }; /// The indirect function call promotion pass. diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 9d5f5aa4b785..cfc436183eb3 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -569,7 +569,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, if (!ProfileGenFile.empty()) Options.InstrProfileOutput = ProfileGenFile; Options.DoCounterPromotion = true; - MPM.addPass(InstrProfiling(Options)); + Options.UseBFIInPromotion = false; + MPM.addPass(InstrProfiling(Options, false)); } if (!ProfileUseFile.empty()) diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 8a2ff7769f16..ccb651a22b30 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -1011,6 +1011,25 @@ void getMemOPSizeRangeFromOption(StringRef MemOPSizeRange, int64_t &RangeStart, assert(RangeLast >= RangeStart); } +// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime +// aware this is an ir_level profile so it can set the version flag. +void createIRLevelProfileFlagVar(Module &M, bool IsCS) { + const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); + Type *IntTy64 = Type::getInt64Ty(M.getContext()); + uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); + if (IsCS) + ProfileVersion |= VARIANT_MASK_CSIR_PROF; + auto IRLevelVersionVariable = new GlobalVariable( + M, IntTy64, true, GlobalValue::WeakAnyLinkage, + Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName); + IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility); + Triple TT(M.getTargetTriple()); + if (TT.supportsCOMDAT()) { + IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage); + IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName)); + } +} + // Create the variable for the profile file name. void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput) { if (InstrProfileOutput.empty()) diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index e77427aa91dd..9c1dc5c8e86c 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -18,6 +18,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Attributes.h" @@ -147,8 +149,8 @@ public: static char ID; InstrProfilingLegacyPass() : ModulePass(ID) {} - InstrProfilingLegacyPass(const InstrProfOptions &Options) - : ModulePass(ID), InstrProf(Options) {} + InstrProfilingLegacyPass(const InstrProfOptions &Options, bool IsCS) + : ModulePass(ID), InstrProf(Options, IsCS) {} StringRef getPassName() const override { return "Frontend instrumentation-based coverage lowering"; @@ -232,9 +234,9 @@ class PGOCounterPromoter { public: PGOCounterPromoter( DenseMap> &LoopToCands, - Loop &CurLoop, LoopInfo &LI) + Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI) : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop), - LI(LI) { + LI(LI), BFI(BFI) { SmallVector LoopExitBlocks; SmallPtrSet BlockSet; @@ -263,6 +265,20 @@ public: SSAUpdater SSA(&NewPHIs); Value *InitVal = ConstantInt::get(Cand.first->getType(), 0); + // If BFI is set, we will use it to guide the promotions. + if (BFI) { + auto *BB = Cand.first->getParent(); + auto InstrCount = BFI->getBlockProfileCount(BB); + if (!InstrCount) + continue; + auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader()); + // If the average loop trip count is not greater than 1.5, we skip + // promotion. + if (PreheaderCount && + (PreheaderCount.getValue() * 3) >= (InstrCount.getValue() * 2)) + continue; + } + PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal, L.getLoopPreheader(), ExitBlocks, InsertPts, LoopToCandidates, LI); @@ -312,6 +328,11 @@ private: SmallVector ExitingBlocks; LP->getExitingBlocks(ExitingBlocks); + + // If BFI is set, we do more aggressive promotions based on BFI. + if (BFI) + return (unsigned)-1; + // Not considierered speculative. if (ExitingBlocks.size() == 1) return MaxNumOfPromotionsPerLoop; @@ -343,6 +364,7 @@ private: SmallVector InsertPts; Loop &L; LoopInfo &LI; + BlockFrequencyInfo *BFI; }; } // end anonymous namespace @@ -365,8 +387,9 @@ INITIALIZE_PASS_END( "Frontend instrumentation-based coverage lowering.", false, false) ModulePass * -llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options) { - return new InstrProfilingLegacyPass(Options); +llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options, + bool IsCS) { + return new InstrProfilingLegacyPass(Options, IsCS); } static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) { @@ -415,6 +438,13 @@ void InstrProfiling::promoteCounterLoadStores(Function *F) { LoopInfo LI(DT); DenseMap> LoopPromotionCandidates; + std::unique_ptr BFI; + if (Options.UseBFIInPromotion) { + std::unique_ptr BPI; + BPI.reset(new BranchProbabilityInfo(*F, LI, TLI)); + BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI)); + } + for (const auto &LoadStore : PromotionCandidates) { auto *CounterLoad = LoadStore.first; auto *CounterStore = LoadStore.second; @@ -430,7 +460,7 @@ void InstrProfiling::promoteCounterLoadStores(Function *F) { // Do a post-order traversal of the loops so that counter updates can be // iteratively hoisted outside the loop nest. for (auto *Loop : llvm::reverse(Loops)) { - PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI); + PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get()); Promoter.run(&TotalCountersPromoted); } } @@ -681,7 +711,6 @@ static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) { // Don't do this for Darwin. compiler-rt uses linker magic. if (TT.isOSDarwin()) return false; - // Use linker script magic to get data/cnts/name start/end. if (TT.isOSLinux() || TT.isOSFreeBSD() || TT.isOSNetBSD() || TT.isOSFuchsia() || TT.isPS4CPU() || TT.isOSWindows()) @@ -985,8 +1014,12 @@ void InstrProfiling::emitUses() { } void InstrProfiling::emitInitialization() { - // Create variable for profile name. - createProfileFileNameVar(*M, Options.InstrProfileOutput); + // Create ProfileFileName variable. Don't don't this for the + // context-sensitive instrumentation lowering: This lowering is after + // LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should + // have already create the variable before LTO/ThinLTO linking. + if (!IsCS) + createProfileFileNameVar(*M, Options.InstrProfileOutput); Function *RegisterF = M->getFunction(getInstrProfRegFuncsName()); if (!RegisterF) return; diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index bb2e3359881c..c956f4e32e94 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -65,6 +65,7 @@ #include "llvm/Analysis/IndirectCallVisitor.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -132,6 +133,19 @@ STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); STATISTIC(NumOfPGOMissing, "Number of functions without profile."); STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); +STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); +STATISTIC(NumOfCSPGOSelectInsts, + "Number of select instruction instrumented in CSPGO."); +STATISTIC(NumOfCSPGOMemIntrinsics, + "Number of mem intrinsics instrumented in CSPGO."); +STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); +STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); +STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); +STATISTIC(NumOfCSPGOFunc, + "Number of functions having valid profile counts in CSPGO."); +STATISTIC(NumOfCSPGOMismatch, + "Number of functions having mismatch profile in CSPGO."); +STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); // Command line option to specify the file to read profile from. This is // mainly used for testing. @@ -383,7 +397,8 @@ class PGOInstrumentationGenLegacyPass : public ModulePass { public: static char ID; - PGOInstrumentationGenLegacyPass() : ModulePass(ID) { + PGOInstrumentationGenLegacyPass(bool IsCS = false) + : ModulePass(ID), IsCS(IsCS) { initializePGOInstrumentationGenLegacyPassPass( *PassRegistry::getPassRegistry()); } @@ -391,6 +406,8 @@ public: StringRef getPassName() const override { return "PGOInstrumentationGenPass"; } private: + // Is this is context-sensitive instrumentation. + bool IsCS; bool runOnModule(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -403,8 +420,8 @@ public: static char ID; // Provide the profile filename as the parameter. - PGOInstrumentationUseLegacyPass(std::string Filename = "") - : ModulePass(ID), ProfileFileName(std::move(Filename)) { + PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false) + : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) { if (!PGOTestProfileFile.empty()) ProfileFileName = PGOTestProfileFile; initializePGOInstrumentationUseLegacyPassPass( @@ -415,14 +432,38 @@ public: private: std::string ProfileFileName; + // Is this is context-sensitive instrumentation use. + bool IsCS; bool runOnModule(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); } }; +class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass { +public: + static char ID; + StringRef getPassName() const override { + return "PGOInstrumentationGenCreateVarPass"; + } + PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "") + : ModulePass(ID), InstrProfileOutput(CSInstrName) { + initializePGOInstrumentationGenCreateVarLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + +private: + bool runOnModule(Module &M) override { + createProfileFileNameVar(M, InstrProfileOutput); + createIRLevelProfileFlagVar(M, true); + return false; + } + std::string InstrProfileOutput; +}; + } // end anonymous namespace char PGOInstrumentationGenLegacyPass::ID = 0; @@ -434,8 +475,8 @@ INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", "PGO instrumentation.", false, false) -ModulePass *llvm::createPGOInstrumentationGenLegacyPass() { - return new PGOInstrumentationGenLegacyPass(); +ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) { + return new PGOInstrumentationGenLegacyPass(IsCS); } char PGOInstrumentationUseLegacyPass::ID = 0; @@ -444,11 +485,25 @@ INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use", "Read PGO instrumentation profile.", false, false) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", "Read PGO instrumentation profile.", false, false) -ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename) { - return new PGOInstrumentationUseLegacyPass(Filename.str()); +ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename, + bool IsCS) { + return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS); +} + +char PGOInstrumentationGenCreateVarLegacyPass::ID = 0; + +INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass, + "pgo-instr-gen-create-var", + "Create PGO instrumentation version variable for CSPGO.", false, + false) + +ModulePass * +llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) { + return new PGOInstrumentationGenCreateVarLegacyPass(CSInstrName); } namespace { @@ -496,6 +551,9 @@ template class FuncPGOInstrumentation { private: Function &F; + // Is this is context-sensitive instrumentation. + bool IsCS; + // A map that stores the Comdat group in function F. std::unordered_multimap &ComdatMembers; @@ -535,15 +593,23 @@ public: Function &Func, std::unordered_multimap &ComdatMembers, bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, - BlockFrequencyInfo *BFI = nullptr) - : F(Func), ComdatMembers(ComdatMembers), ValueSites(IPVK_Last + 1), - SIVisitor(Func), MIVisitor(Func), MST(F, BPI, BFI) { + BlockFrequencyInfo *BFI = nullptr, bool IsCS = false) + : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), + ValueSites(IPVK_Last + 1), SIVisitor(Func), MIVisitor(Func), + MST(F, BPI, BFI) { // This should be done before CFG hash computation. SIVisitor.countSelects(Func); MIVisitor.countMemIntrinsics(Func); - NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); - NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics(); - ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func); + if (!IsCS) { + NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); + NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics(); + NumOfPGOBB += MST.BBInfos.size(); + ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func); + } else { + NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); + NumOfCSPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics(); + NumOfCSPGOBB += MST.BBInfos.size(); + } ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func); FuncName = getPGOFuncName(F); @@ -552,13 +618,12 @@ public: renameComdatFunction(); LLVM_DEBUG(dumpInfo("after CFGMST")); - NumOfPGOBB += MST.BBInfos.size(); for (auto &E : MST.AllEdges) { if (E->Removed) continue; - NumOfPGOEdge++; + IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; if (!E->InMST) - NumOfPGOInstrument++; + IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; } if (CreateGlobalVar) @@ -597,9 +662,17 @@ void FuncPGOInstrumentation::computeCFGHash() { } } JC.update(Indexes); + + // Hash format for context sensitive profile. Reserve 4 bits for other + // information. FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | + //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); + // Reserve bit 60-63 for other information purpose. + FunctionHash &= 0x0FFFFFFFFFFFFFFF; + if (IsCS) + NamedInstrProfRecord::setCSFlagInHash(FunctionHash); LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" << " CRC = " << JC.getCRC() << ", Selects = " << SIVisitor.getNumOfSelectInsts() @@ -705,7 +778,7 @@ BasicBlock *FuncPGOInstrumentation::getInstrBB(Edge *E) { // For a critical edge, we have to split. Instrument the newly // created BB. - NumOfPGOSplit++; + IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index << " --> " << getBBInfo(DestBB).Index << "\n"); unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); @@ -720,12 +793,14 @@ BasicBlock *FuncPGOInstrumentation::getInstrBB(Edge *E) { // Critical edges will be split. static void instrumentOneFunc( Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, - std::unordered_multimap &ComdatMembers) { + std::unordered_multimap &ComdatMembers, + bool IsCS) { // Split indirectbr critical edges here before computing the MST rather than // later in getInstrBB() to avoid invalidating it. SplitIndirectBrCriticalEdges(F, BPI, BFI); + FuncPGOInstrumentation FuncInfo(F, ComdatMembers, true, BPI, - BFI); + BFI, IsCS); unsigned NumCounters = FuncInfo.getNumCounters(); uint32_t I = 0; @@ -852,10 +927,10 @@ public: PGOUseFunc(Function &Func, Module *Modu, std::unordered_multimap &ComdatMembers, BranchProbabilityInfo *BPI = nullptr, - BlockFrequencyInfo *BFIin = nullptr) + BlockFrequencyInfo *BFIin = nullptr, bool IsCS = false) : F(Func), M(Modu), BFI(BFIin), - FuncInfo(Func, ComdatMembers, false, BPI, BFIin), - FreqAttr(FFA_Normal) {} + FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS), + FreqAttr(FFA_Normal), IsCS(IsCS) {} // Read counts for the instrumented BB from profile. bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros); @@ -928,6 +1003,9 @@ private: // Function hotness info derived from profile. FuncFreqAttr FreqAttr; + // Is to use the context sensitive profile. + bool IsCS; + // Find the Instrumented BB and set the value. void setInstrumentedCounts(const std::vector &CountFromProfile); @@ -1021,23 +1099,31 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros) handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { auto Err = IPE.get(); bool SkipWarning = false; + LLVM_DEBUG(dbgs() << "Error in reading profile for Func " + << FuncInfo.FuncName << ": "); if (Err == instrprof_error::unknown_function) { - NumOfPGOMissing++; + IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; SkipWarning = !PGOWarnMissing; + LLVM_DEBUG(dbgs() << "unknown function"); } else if (Err == instrprof_error::hash_mismatch || Err == instrprof_error::malformed) { - NumOfPGOMismatch++; + IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; SkipWarning = NoPGOWarnMismatch || (NoPGOWarnMismatchComdat && (F.hasComdat() || F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); + LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); } + LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); if (SkipWarning) return; - std::string Msg = IPE.message() + std::string(" ") + F.getName().str(); + std::string Msg = IPE.message() + std::string(" ") + F.getName().str() + + std::string(" Hash = ") + + std::to_string(FuncInfo.FunctionHash); + Ctx.diagnose( DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); }); @@ -1046,7 +1132,7 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros) ProfileRecord = std::move(Result.get()); std::vector &CountFromProfile = ProfileRecord.Counts; - NumOfPGOFunc++; + IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); uint64_t ValueSum = 0; for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { @@ -1061,7 +1147,11 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros) getBBInfo(nullptr).UnknownCountInEdge = 2; setInstrumentedCounts(CountFromProfile); +#if 0 + ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); +#else ProgramMaxCount = PGOReader->getMaximumFunctionCount(); +#endif return true; } @@ -1166,7 +1256,8 @@ void PGOUseFunc::populateCounters() { // Assign the scaled count values to the BB with multiple out edges. void PGOUseFunc::setBranchWeights() { // Generate MD_prof metadata for every branch instruction. - LLVM_DEBUG(dbgs() << "\nSetting branch weights.\n"); + LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() + << " IsCS=" << IsCS << "\n"); for (auto &BB : F) { Instruction *TI = BB.getTerminator(); if (TI->getNumSuccessors() < 2) @@ -1174,6 +1265,7 @@ void PGOUseFunc::setBranchWeights() { if (!(isa(TI) || isa(TI) || isa(TI))) continue; + if (getBBInfo(&BB).CountValue == 0) continue; @@ -1351,24 +1443,6 @@ void PGOUseFunc::annotateValueSites(uint32_t Kind) { } } -// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime -// aware this is an ir_level profile so it can set the version flag. -static void createIRLevelProfileFlagVariable(Module &M) { - Type *IntTy64 = Type::getInt64Ty(M.getContext()); - uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); - auto IRLevelVersionVariable = new GlobalVariable( - M, IntTy64, true, GlobalVariable::ExternalLinkage, - Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), - INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); - IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility); - Triple TT(M.getTargetTriple()); - if (!TT.supportsCOMDAT()) - IRLevelVersionVariable->setLinkage(GlobalValue::WeakAnyLinkage); - else - IRLevelVersionVariable->setComdat(M.getOrInsertComdat( - StringRef(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)))); -} - // Collect the set of members for each Comdat in module M and store // in ComdatMembers. static void collectComdatMembers( @@ -1389,8 +1463,11 @@ static void collectComdatMembers( static bool InstrumentAllFunctions( Module &M, function_ref LookupBPI, - function_ref LookupBFI) { - createIRLevelProfileFlagVariable(M); + function_ref LookupBFI, bool IsCS) { + // For the context-sensitve instrumentation, we should have a separated pass + // (before LTO/ThinLTO linking) to create these variables. + if (!IsCS) + createIRLevelProfileFlagVar(M, /* IsCS */ false); std::unordered_multimap ComdatMembers; collectComdatMembers(M, ComdatMembers); @@ -1399,7 +1476,7 @@ static bool InstrumentAllFunctions( continue; auto *BPI = LookupBPI(F); auto *BFI = LookupBFI(F); - instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers); + instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers, IsCS); } return true; } @@ -1414,7 +1491,7 @@ bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) { auto LookupBFI = [this](Function &F) { return &this->getAnalysis(F).getBFI(); }; - return InstrumentAllFunctions(M, LookupBPI, LookupBFI); + return InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS); } PreservedAnalyses PGOInstrumentationGen::run(Module &M, @@ -1428,7 +1505,7 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M, return &FAM.getResult(F); }; - if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI)) + if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); @@ -1437,7 +1514,7 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M, static bool annotateAllFunctions( Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, function_ref LookupBPI, - function_ref LookupBFI) { + function_ref LookupBFI, bool IsCS) { LLVM_DEBUG(dbgs() << "Read in profile counters: "); auto &Ctx = M.getContext(); // Read the counter array from file. @@ -1458,6 +1535,11 @@ static bool annotateAllFunctions( StringRef("Cannot get PGOReader"))); return false; } +#if 0 + if (!PGOReader->hasCSIRLevelProfile() && IsCS) + return false; +#endif + // TODO: might need to change the warning once the clang option is finalized. if (!PGOReader->isIRLevelProfile()) { Ctx.diagnose(DiagnosticInfoPGOProfile( @@ -1477,7 +1559,7 @@ static bool annotateAllFunctions( // Split indirectbr critical edges here before computing the MST rather than // later in getInstrBB() to avoid invalidating it. SplitIndirectBrCriticalEdges(F, BPI, BFI); - PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI); + PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, IsCS); bool AllZeros = false; if (!Func.readCounters(PGOReader.get(), AllZeros)) continue; @@ -1525,7 +1607,14 @@ static bool annotateAllFunctions( } } } +#if 0 + M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), + IsCS ? ProfileSummary::PSK_CSInstr + : ProfileSummary::PSK_Instr); +#else M.setProfileSummary(PGOReader->getSummary().getMD(M.getContext())); +#endif + // Set function hotness attribute from the profile. // We have to apply these attributes at the end because their presence // can affect the BranchProbabilityInfo of any callers, resulting in an @@ -1544,9 +1633,10 @@ static bool annotateAllFunctions( } PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, - std::string RemappingFilename) + std::string RemappingFilename, + bool IsCS) : ProfileFileName(std::move(Filename)), - ProfileRemappingFileName(std::move(RemappingFilename)) { + ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { if (!PGOTestProfileFile.empty()) ProfileFileName = PGOTestProfileFile; if (!PGOTestProfileRemappingFile.empty()) @@ -1566,7 +1656,7 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M, }; if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, - LookupBPI, LookupBFI)) + LookupBPI, LookupBFI, IsCS)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); @@ -1583,7 +1673,8 @@ bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { return &this->getAnalysis(F).getBFI(); }; - return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI); + return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI, + IsCS); } static std::string getSimpleNodeName(const BasicBlock *Node) { diff --git a/llvm/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp index 626fb5f63634..fbf78b02551b 100644 --- a/llvm/tools/gold/gold-plugin.cpp +++ b/llvm/tools/gold/gold-plugin.cpp @@ -209,6 +209,10 @@ namespace options { static std::string OptRemarksFilename; static bool OptRemarksWithHotness = false; + // Context sensitive PGO options. + static std::string cs_profile_path; + static bool cs_pgo_gen = false; + static void process_plugin_option(const char *opt_) { if (opt_ == nullptr) @@ -268,7 +272,11 @@ namespace options { } else if (opt == "disable-verify") { DisableVerify = true; } else if (opt.startswith("sample-profile=")) { - sample_profile= opt.substr(strlen("sample-profile=")); + sample_profile = opt.substr(strlen("sample-profile=")); + } else if (opt == "cs-profile-generate") { + cs_pgo_gen = true; + } else if (opt.startswith("cs-profile-path=")) { + cs_profile_path = opt.substr(strlen("cs-profile-path=")); } else if (opt == "new-pass-manager") { new_pass_manager = true; } else if (opt == "debug-pass-manager") { @@ -892,6 +900,10 @@ static std::unique_ptr createLTO(IndexWriteCallback OnIndexWrite, if (!options::sample_profile.empty()) Conf.SampleProfile = options::sample_profile; + if (!options::cs_profile_path.empty()) + Conf.CSIRProfile = options::cs_profile_path; + Conf.RunCSIRInstr = options::cs_pgo_gen; + Conf.DwoDir = options::dwo_dir; // Set up optimization remarks handling. diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp index d9950815a61a..6cceb8e4cf81 100644 --- a/llvm/tools/llvm-lto2/llvm-lto2.cpp +++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp @@ -104,6 +104,15 @@ static cl::opt SamplePGOFile("lto-sample-profile-file", cl::desc("Specify a SamplePGO profile file")); +static cl::opt + CSPGOFile("lto-cspgo-profile-file", + cl::desc("Specify a context sensitive PGO profile file")); + +static cl::opt + RunCSIRInstr("lto-cspgo-gen", + cl::desc("Run PGO context sensitive IR instrumentation"), + cl::init(false), cl::Hidden); + static cl::opt UseNewPM("use-new-pm", cl::desc("Run LTO passes using the new pass manager"), @@ -214,6 +223,8 @@ static int run(int argc, char **argv) { Conf.RemarksWithHotness = OptRemarksWithHotness; Conf.SampleProfile = SamplePGOFile; + Conf.CSIRProfile = CSPGOFile; + Conf.RunCSIRInstr = RunCSIRInstr; // Run a custom pipeline, if asked for. Conf.OptPipeline = OptPipeline;