From e5b8de2f1f4b785fe4105c74aa055e9319ebd9ac Mon Sep 17 00:00:00 2001 From: Easwaran Raman Date: Wed, 17 Jan 2018 22:24:23 +0000 Subject: [PATCH] Add a ProfileCount class to represent entry counts. Summary: The class wraps a uint64_t and an enum to represent the type of profile count (real and synthetic) with some helper methods. Reviewers: davidxl Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D41883 llvm-svn: 322771 --- llvm/include/llvm/IR/Function.h | 40 +++++++++++++++++++--- llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp | 2 +- llvm/lib/Analysis/ProfileSummaryInfo.cpp | 8 ++--- llvm/lib/IR/Function.cpp | 33 +++++++++++++----- llvm/lib/Transforms/IPO/PartialInlining.cpp | 6 ++-- llvm/lib/Transforms/IPO/SampleProfile.cpp | 8 +++-- .../Transforms/IPO/SyntheticCountsPropagation.cpp | 4 ++- .../Instrumentation/PGOInstrumentation.cpp | 3 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 9 ++--- llvm/lib/Transforms/Utils/InlineFunction.cpp | 28 ++++++++------- llvm/unittests/IR/MetadataTest.cpp | 17 +++++++-- 11 files changed, 114 insertions(+), 44 deletions(-) diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h index 2e1cfc2..9204b77 100644 --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -234,21 +234,53 @@ public: getContext(), AttributeList::FunctionIndex, Kind)); } + enum ProfileCountType { PCT_Invalid, PCT_Real, PCT_Synthetic }; + + /// Class to represent profile counts. + /// + /// This class represents both real and synthetic profile counts. + class ProfileCount { + private: + uint64_t Count; + ProfileCountType PCT; + static ProfileCount Invalid; + + public: + ProfileCount() : Count(-1), PCT(PCT_Invalid) {} + ProfileCount(uint64_t Count, ProfileCountType PCT) + : Count(Count), PCT(PCT) {} + bool hasValue() const { return PCT != PCT_Invalid; } + uint64_t getCount() const { return Count; } + ProfileCountType getType() const { return PCT; } + bool isSynthetic() const { return PCT == PCT_Synthetic; } + explicit operator bool() { return hasValue(); } + bool operator!() const { return !hasValue(); } + // Update the count retaining the same profile count type. + ProfileCount &setCount(uint64_t C) { + Count = C; + return *this; + } + static ProfileCount getInvalid() { return ProfileCount(-1, PCT_Invalid); } + }; + /// \brief Set the entry count for this function. /// /// Entry count is the number of times this function was executed based on - /// pgo data. \p Synthetic indicates the count is synthesized by analysis and - /// not from a profile run. \p Imports points to a set of GUIDs that needs to + /// pgo data. \p Imports points to a set of GUIDs that needs to /// be imported by the function for sample PGO, to enable the same inlines as /// the profiled optimized binary. - void setEntryCount(uint64_t Count, bool Synthetic = false, + void setEntryCount(ProfileCount Count, + const DenseSet *Imports = nullptr); + + /// A convenience wrapper for setting entry count + void setEntryCount(uint64_t Count, ProfileCountType Type = PCT_Real, const DenseSet *Imports = nullptr); /// \brief Get the entry count for this function. /// /// Entry count is the number of times the function was executed based on /// pgo data. - Optional getEntryCount() const; + ProfileCount getEntryCount() const; /// Return true if the function is annotated with profile data. /// diff --git a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp index 7e32302..c9d27a2 100644 --- a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -567,7 +567,7 @@ BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F, if (!EntryCount) return None; // Use 128 bit APInt to do the arithmetic to avoid overflow. - APInt BlockCount(128, EntryCount.getValue()); + APInt BlockCount(128, EntryCount.getCount()); APInt BlockFreq(128, Freq); APInt EntryFreq(128, getEntryFreq()); BlockCount *= BlockFreq; diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp index 347d093..61c9411 100644 --- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp +++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp @@ -112,7 +112,7 @@ bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) { // FIXME: The heuristic used below for determining hotness is based on // preliminary SPEC tuning for inliner. This will eventually be a // convenience method that calls isHotCount. - return FunctionCount && isHotCount(FunctionCount.getValue()); + return FunctionCount && isHotCount(FunctionCount.getCount()); } /// Returns true if the function contains hot code. This can include a hot @@ -125,7 +125,7 @@ bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F, if (!F || !computeSummary()) return false; if (auto FunctionCount = F->getEntryCount()) - if (isHotCount(FunctionCount.getValue())) + if (isHotCount(FunctionCount.getCount())) return true; if (hasSampleProfile()) { @@ -154,7 +154,7 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F, if (!F || !computeSummary()) return false; if (auto FunctionCount = F->getEntryCount()) - if (!isColdCount(FunctionCount.getValue())) + if (!isColdCount(FunctionCount.getCount())) return false; if (hasSampleProfile()) { @@ -187,7 +187,7 @@ bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) { // FIXME: The heuristic used below for determining coldness is based on // preliminary SPEC tuning for inliner. This will eventually be a // convenience method that calls isHotCount. - return FunctionCount && isColdCount(FunctionCount.getValue()); + return FunctionCount && isColdCount(FunctionCount.getCount()); } /// Compute the hot and cold thresholds. diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index b413d38..24f2f3b 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -56,6 +56,7 @@ #include using namespace llvm; +using ProfileCount = Function::ProfileCount; // Explicit instantiations of SymbolTableListTraits since some of the methods // are not in the public header file... @@ -1320,27 +1321,43 @@ void Function::setValueSubclassDataBit(unsigned Bit, bool On) { setValueSubclassData(getSubclassDataFromValue() & ~(1 << Bit)); } -void Function::setEntryCount(uint64_t Count, bool Synthetic, +void Function::setEntryCount(ProfileCount Count, const DenseSet *S) { + assert(Count.hasValue()); +#if !defined(NDEBUG) + auto PrevCount = getEntryCount(); + assert(!PrevCount.hasValue() || PrevCount.getType() == Count.getType()); +#endif MDBuilder MDB(getContext()); - setMetadata(LLVMContext::MD_prof, - MDB.createFunctionEntryCount(Count, Synthetic, S)); + setMetadata( + LLVMContext::MD_prof, + MDB.createFunctionEntryCount(Count.getCount(), Count.isSynthetic(), S)); } -Optional Function::getEntryCount() const { +void Function::setEntryCount(uint64_t Count, Function::ProfileCountType Type, + const DenseSet *Imports) { + setEntryCount(ProfileCount(Count, Type), Imports); +} + +ProfileCount Function::getEntryCount() const { MDNode *MD = getMetadata(LLVMContext::MD_prof); if (MD && MD->getOperand(0)) - if (MDString *MDS = dyn_cast(MD->getOperand(0))) + if (MDString *MDS = dyn_cast(MD->getOperand(0))) { if (MDS->getString().equals("function_entry_count")) { ConstantInt *CI = mdconst::extract(MD->getOperand(1)); uint64_t Count = CI->getValue().getZExtValue(); // A value of -1 is used for SamplePGO when there were no samples. // Treat this the same as unknown. if (Count == (uint64_t)-1) - return None; - return Count; + return ProfileCount::getInvalid(); + return ProfileCount(Count, PCT_Real); + } else if (MDS->getString().equals("synthetic_function_entry_count")) { + ConstantInt *CI = mdconst::extract(MD->getOperand(1)); + uint64_t Count = CI->getValue().getZExtValue(); + return ProfileCount(Count, PCT_Synthetic); } - return None; + } + return ProfileCount::getInvalid(); } DenseSet Function::getImportGUIDs() const { diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp index a9cfd8d..9b1a61f 100644 --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -1384,7 +1384,8 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { if (CalleeEntryCount) computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap); - uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0); + uint64_t CalleeEntryCountV = + (CalleeEntryCount ? CalleeEntryCount.getCount() : 0); bool AnyInline = false; for (User *User : Users) { @@ -1433,7 +1434,8 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { if (AnyInline) { Cloner.IsFunctionInlined = true; if (CalleeEntryCount) - Cloner.OrigFunc->setEntryCount(CalleeEntryCountV); + Cloner.OrigFunc->setEntryCount( + CalleeEntryCount.setCount(CalleeEntryCountV)); auto &ORE = (*GetORE)(*Cloner.OrigFunc); ORE.emit([&]() { return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc) diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 7070234..a8d1be7 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -85,7 +85,7 @@ using namespace llvm; using namespace sampleprof; - +using ProfileCount = Function::ProfileCount; #define DEBUG_TYPE "sample-profile" // Command line option to specify the file to read samples from. This is @@ -1467,7 +1467,9 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { // Sets the GUIDs that are inlined in the profiled binary. This is used // for ThinLink to make correct liveness analysis, and also make the IR // match the profiled binary before annotation. - F.setEntryCount(Samples->getHeadSamples() + 1, false, &InlinedGUIDs); + F.setEntryCount( + ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real), + &InlinedGUIDs); // Compute dominance and loop info needed for propagation. computeDominanceAndLoopInfo(F); @@ -1587,7 +1589,7 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) // Initialize the entry count to -1, which will be treated conservatively // by getEntryCount as the same as unknown (None). If we have samples this // will be overwritten in emitAnnotations. - F.setEntryCount(-1); + F.setEntryCount(ProfileCount(-1, Function::PCT_Real)); std::unique_ptr OwnedORE; if (AM) { auto &FAM = diff --git a/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp b/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp index 0276d14..f599adf 100644 --- a/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp +++ b/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp @@ -41,6 +41,7 @@ using namespace llvm; using Scaled64 = ScaledNumber; +using ProfileCount = Function::ProfileCount; #define DEBUG_TYPE "synthetic-counts-propagation" @@ -121,7 +122,8 @@ PreservedAnalyses SyntheticCountsPropagation::run(Module &M, // Set the counts as metadata. for (auto Entry : Counts) - Entry.first->setEntryCount(Entry.second, true); + Entry.first->setEntryCount( + ProfileCount(Entry.second, Function::PCT_Synthetic)); return PreservedAnalyses::all(); } diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index cb4b3a9..ab3619e 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -119,6 +119,7 @@ #include using namespace llvm; +using ProfileCount = Function::ProfileCount; #define DEBUG_TYPE "pgo-instrumentation" @@ -1139,7 +1140,7 @@ void PGOUseFunc::populateCounters() { } #endif uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; - F.setEntryCount(FuncEntryCount); + F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); uint64_t FuncMaxCount = FuncEntryCount; for (auto &BB : F) { auto BI = findBBInfo(&BB); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 0812596..2fc987d 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -66,6 +66,7 @@ #include using namespace llvm; +using ProfileCount = Function::ProfileCount; #define DEBUG_TYPE "code-extractor" @@ -1163,10 +1164,10 @@ Function *CodeExtractor::extractCodeRegion() { // Update the entry count of the function. if (BFI) { - Optional EntryCount = - BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); - if (EntryCount.hasValue()) - newFunction->setEntryCount(EntryCount.getValue()); + auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); + if (Count.hasValue()) + newFunction->setEntryCount( + ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); } diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index a96f78e..5b4b45a 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -72,6 +72,7 @@ #include using namespace llvm; +using ProfileCount = Function::ProfileCount; static cl::opt EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), @@ -1431,29 +1432,29 @@ static void updateCallerBFI(BasicBlock *CallSiteBlock, /// Update the branch metadata for cloned call instructions. static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, - const Optional &CalleeEntryCount, + const ProfileCount &CalleeEntryCount, const Instruction *TheCall, ProfileSummaryInfo *PSI, BlockFrequencyInfo *CallerBFI) { - if (!CalleeEntryCount.hasValue() || CalleeEntryCount.getValue() < 1) + if (!CalleeEntryCount.hasValue() || CalleeEntryCount.isSynthetic() || + CalleeEntryCount.getCount() < 1) return; - Optional CallSiteCount = - PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None; + auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None; uint64_t CallCount = std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0, - CalleeEntryCount.getValue()); + CalleeEntryCount.getCount()); for (auto const &Entry : VMap) if (isa(Entry.first)) if (auto *CI = dyn_cast_or_null(Entry.second)) - CI->updateProfWeight(CallCount, CalleeEntryCount.getValue()); + CI->updateProfWeight(CallCount, CalleeEntryCount.getCount()); for (BasicBlock &BB : *Callee) // No need to update the callsite if it is pruned during inlining. if (VMap.count(&BB)) for (Instruction &I : BB) if (CallInst *CI = dyn_cast(&I)) - CI->updateProfWeight(CalleeEntryCount.getValue() - CallCount, - CalleeEntryCount.getValue()); + CI->updateProfWeight(CalleeEntryCount.getCount() - CallCount, + CalleeEntryCount.getCount()); } /// Update the entry count of callee after inlining. @@ -1467,18 +1468,19 @@ static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB, // callsite is M, the new callee count is set to N - M. M is estimated from // the caller's entry count, its entry block frequency and the block frequency // of the callsite. - Optional CalleeCount = Callee->getEntryCount(); + auto CalleeCount = Callee->getEntryCount(); if (!CalleeCount.hasValue() || !PSI) return; - Optional CallCount = PSI->getProfileCount(CallInst, CallerBFI); + auto CallCount = PSI->getProfileCount(CallInst, CallerBFI); if (!CallCount.hasValue()) return; // Since CallSiteCount is an estimate, it could exceed the original callee // count and has to be set to 0. - if (CallCount.getValue() > CalleeCount.getValue()) - Callee->setEntryCount(0); + if (CallCount.getValue() > CalleeCount.getCount()) + CalleeCount.setCount(0); else - Callee->setEntryCount(CalleeCount.getValue() - CallCount.getValue()); + CalleeCount.setCount(CalleeCount.getCount() - CallCount.getValue()); + Callee->setEntryCount(CalleeCount); } /// This function inlines the called function into the basic block of the diff --git a/llvm/unittests/IR/MetadataTest.cpp b/llvm/unittests/IR/MetadataTest.cpp index 672de55..51ca840 100644 --- a/llvm/unittests/IR/MetadataTest.cpp +++ b/llvm/unittests/IR/MetadataTest.cpp @@ -2436,9 +2436,20 @@ TEST_F(FunctionAttachmentTest, Verifier) { TEST_F(FunctionAttachmentTest, EntryCount) { Function *F = getFunction("foo"); EXPECT_FALSE(F->getEntryCount().hasValue()); - F->setEntryCount(12304); - EXPECT_TRUE(F->getEntryCount().hasValue()); - EXPECT_EQ(12304u, *F->getEntryCount()); + F->setEntryCount(12304, Function::PCT_Real); + auto Count = F->getEntryCount(); + EXPECT_TRUE(Count.hasValue()); + EXPECT_EQ(12304u, Count.getCount()); + EXPECT_EQ(Function::PCT_Real, Count.getType()); + + // Repeat the same for synthetic counts. + F = getFunction("bar"); + EXPECT_FALSE(F->getEntryCount().hasValue()); + F->setEntryCount(123, Function::PCT_Synthetic); + Count = F->getEntryCount(); + EXPECT_TRUE(Count.hasValue()); + EXPECT_EQ(123u, Count.getCount()); + EXPECT_EQ(Function::PCT_Synthetic, Count.getType()); } TEST_F(FunctionAttachmentTest, SubprogramAttachment) { -- 2.7.4