From 6c27c61d32fd2951a290c6d4363bd495f6feae96 Mon Sep 17 00:00:00 2001 From: Hiroshi Yamauchi Date: Wed, 8 Apr 2020 16:06:25 -0700 Subject: [PATCH] [PGO] Improve the working set size heuristics under the partial sample PGO. Summary: The working set size heuristics (ProfileSummaryInfo::hasHugeWorkingSetSize) under the partial sample PGO may not be accurate because the profile is partial and the number of hot profile counters in the ProfileSummary may not reflect the actual working set size of the program being compiled. To improve this, the (approximated) ratio of the the number of profile counters of the program being compiled to the number of profile counters in the partial sample profile is computed (which is called the partial profile ratio) and the working set size of the profile is scaled by this ratio to reflect the working set size of the program being compiled and used for the working set size heuristics. The partial profile ratio is approximated based on the number of the basic blocks in the program and the NumCounts field in the ProfileSummary and computed through the thin LTO indexing. This means that there is the limitation that the scaled working set size is available to the thin LTO post link passes only. Reviewers: davidxl Subscribers: mgorny, eraman, hiraditya, steven_wu, dexonsmith, arphaman, dang, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D79831 --- llvm/include/llvm/IR/Module.h | 5 ++ llvm/include/llvm/IR/ProfileSummary.h | 3 +- llvm/lib/Analysis/ProfileSummaryInfo.cpp | 38 ++++++++- llvm/lib/IR/Module.cpp | 21 +++++ llvm/lib/LTO/LTOBackend.cpp | 4 + llvm/lib/Transforms/IPO/FunctionImport.cpp | 6 ++ llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp | 97 +++++++++++++++++----- llvm/unittests/IR/ModuleTest.cpp | 37 +++++++++ 8 files changed, 185 insertions(+), 26 deletions(-) diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h index ead0030..36d5866 100644 --- a/llvm/include/llvm/IR/Module.h +++ b/llvm/include/llvm/IR/Module.h @@ -46,6 +46,7 @@ class FunctionType; class GVMaterializer; class LLVMContext; class MemoryBuffer; +class ModuleSummaryIndex; class Pass; class RandomNumberGenerator; template class SmallPtrSetImpl; @@ -882,6 +883,10 @@ public: /// Take ownership of the given memory buffer. void setOwnedMemoryBuffer(std::unique_ptr MB); + + /// Set the partial sample profile ratio in the profile summary module flag, + /// if applicable. + void setPartialSampleProfileRatio(const ModuleSummaryIndex &Index); }; /// Given "llvm.used" or "llvm.compiler.used" as a global name, collect diff --git a/llvm/include/llvm/IR/ProfileSummary.h b/llvm/include/llvm/IR/ProfileSummary.h index 00af0c5..889568e 100644 --- a/llvm/include/llvm/IR/ProfileSummary.h +++ b/llvm/include/llvm/IR/ProfileSummary.h @@ -59,7 +59,8 @@ private: bool Partial = false; /// This approximately represents the ratio of the number of profile counters /// of the program being built to the number of profile counters in the - /// partial sample profile. When 'Partial' is false, it is undefined. + /// partial sample profile. When 'Partial' is false, it is undefined. This is + /// currently only available under thin LTO mode. double PartialProfileRatio = 0; /// Return detailed summary as metadata. Metadata *getDetailedSummaryMD(LLVMContext &Context); diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp index 3360fd4..e3a76a6 100644 --- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp +++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp @@ -70,6 +70,23 @@ static cl::opt PartialProfile( "partial-profile", cl::Hidden, cl::init(false), cl::desc("Specify the current profile is used as a partial profile.")); +cl::opt ScalePartialSampleProfileWorkingSetSize( + "scale-partial-sample-profile-working-set-size", cl::Hidden, + cl::init(false), + cl::desc( + "If true, scale the working set size of the partial sample profile " + "by the partial profile ratio to reflect the size of the program " + "being compiled.")); + +static cl::opt PartialSampleProfileWorkingSetSizeScaleFactor( + "partial-sample-profile-working-set-size-scale-factor", cl::Hidden, + cl::init(0.008), + cl::desc("The scale factor used to scale the working set size of the " + "partial sample profile along with the partial profile ratio. " + "This includes the factor of the profile counter per block " + "and the factor to scale the working set size to use the same " + "shared thresholds as PGO.")); + // Find the summary entry for a desired percentile of counts. static const ProfileSummaryEntry &getEntryForPercentile(SummaryEntryVector &DS, uint64_t Percentile) { @@ -280,10 +297,23 @@ void ProfileSummaryInfo::computeThresholds() { ColdCountThreshold = ProfileSummaryColdCount; assert(ColdCountThreshold <= HotCountThreshold && "Cold count threshold cannot exceed hot count threshold!"); - HasHugeWorkingSetSize = - HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold; - HasLargeWorkingSetSize = - HotEntry.NumCounts > ProfileSummaryLargeWorkingSetSizeThreshold; + if (!hasPartialSampleProfile() || !ScalePartialSampleProfileWorkingSetSize) { + HasHugeWorkingSetSize = + HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold; + HasLargeWorkingSetSize = + HotEntry.NumCounts > ProfileSummaryLargeWorkingSetSizeThreshold; + } else { + // Scale the working set size of the partial sample profile to reflect the + // size of the program being compiled. + double PartialProfileRatio = Summary->getPartialProfileRatio(); + uint64_t ScaledHotEntryNumCounts = + static_cast(HotEntry.NumCounts * PartialProfileRatio * + PartialSampleProfileWorkingSetSizeScaleFactor); + HasHugeWorkingSetSize = + ScaledHotEntryNumCounts > ProfileSummaryHugeWorkingSetSizeThreshold; + HasLargeWorkingSetSize = + ScaledHotEntryNumCounts > ProfileSummaryLargeWorkingSetSizeThreshold; + } } Optional diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp index 1416cdc..3ea181a 100644 --- a/llvm/lib/IR/Module.cpp +++ b/llvm/lib/IR/Module.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/SymbolTableListTraits.h" #include "llvm/IR/Type.h" #include "llvm/IR/TypeFinder.h" @@ -673,3 +674,23 @@ GlobalVariable *llvm::collectUsedGlobalVariables( } return GV; } + +void Module::setPartialSampleProfileRatio(const ModuleSummaryIndex &Index) { + if (auto *SummaryMD = getProfileSummary(/*IsCS*/ false)) { + std::unique_ptr ProfileSummary( + ProfileSummary::getFromMD(SummaryMD)); + if (ProfileSummary) { + if (ProfileSummary->getKind() != ProfileSummary::PSK_Sample || + !ProfileSummary->isPartialProfile()) + return; + uint64_t BlockCount = Index.getBlockCount(); + uint32_t NumCounts = ProfileSummary->getNumCounts(); + if (!NumCounts) + return; + double Ratio = (double)BlockCount / NumCounts; + ProfileSummary->setPartialProfileRatio(Ratio); + setProfileSummary(ProfileSummary->getMD(getContext()), + ProfileSummary::PSK_Sample); + } + } +} diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index 22019e4..79c5281 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -541,6 +541,10 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream, return DiagFileOrErr.takeError(); auto DiagnosticOutputFile = std::move(*DiagFileOrErr); + // Set the partial sample profile ratio in the profile summary module flag of + // the module, if applicable. + Mod.setPartialSampleProfileRatio(CombinedIndex); + if (Conf.CodeGenOnly) { codegen(Conf, TM.get(), AddStream, Task, Mod); return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp index a73ba84..468bf19 100644 --- a/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -1232,6 +1232,12 @@ Expected FunctionImporter::importFunctions( // have loaded all the required metadata! UpgradeDebugInfo(*SrcModule); + // Set the partial sample profile ratio in the profile summary module flag + // of the imported source module, if applicable, so that the profile summary + // module flag will match with that of the destination module when it's + // imported. + SrcModule->setPartialSampleProfileRatio(Index); + // Link in the specified functions. if (renameModuleForThinLTO(*SrcModule, Index, ClearDSOLocalOnDeclarations, &GlobalsToImport)) diff --git a/llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp b/llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp index ae60c41..cbd2236 100644 --- a/llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp +++ b/llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp @@ -23,6 +23,8 @@ #include "llvm/Support/raw_ostream.h" #include "gtest/gtest.h" +extern llvm::cl::opt ScalePartialSampleProfileWorkingSetSize; + namespace llvm { namespace { @@ -42,7 +44,12 @@ protected: BPI.reset(new BranchProbabilityInfo(F, *LI)); return BlockFrequencyInfo(F, *BPI, *LI); } - std::unique_ptr makeLLVMModule(const char *ProfKind = nullptr) { + std::unique_ptr makeLLVMModule(const char *ProfKind = nullptr, + uint64_t NumCounts = 3, + uint64_t IsPartialProfile = 0, + double PartialProfileRatio = 0.0, + uint64_t HotNumCounts = 3, + uint64_t ColdNumCounts = 10) { const char *ModuleString = "define i32 @g(i32 %x) !prof !21 {{\n" " ret i32 0\n" @@ -83,27 +90,32 @@ protected: "!22 = !{{!\"function_entry_count\", i64 100}\n" "!23 = !{{!\"branch_weights\", i32 64, i32 4}\n" "{0}"; - const char *SummaryString = "!llvm.module.flags = !{{!1}" - "!1 = !{{i32 1, !\"ProfileSummary\", !2}" - "!2 = !{{!3, !4, !5, !6, !7, !8, !9, !10}" - "!3 = !{{!\"ProfileFormat\", !\"{0}\"}" - "!4 = !{{!\"TotalCount\", i64 10000}" - "!5 = !{{!\"MaxCount\", i64 10}" - "!6 = !{{!\"MaxInternalCount\", i64 1}" - "!7 = !{{!\"MaxFunctionCount\", i64 1000}" - "!8 = !{{!\"NumCounts\", i64 3}" - "!9 = !{{!\"NumFunctions\", i64 3}" - "!10 = !{{!\"DetailedSummary\", !11}" - "!11 = !{{!12, !13, !14}" - "!12 = !{{i32 10000, i64 1000, i32 1}" - "!13 = !{{i32 999000, i64 300, i32 3}" - "!14 = !{{i32 999999, i64 5, i32 10}"; + const char *SummaryString = + "!llvm.module.flags = !{{!1}\n" + "!1 = !{{i32 1, !\"ProfileSummary\", !2}\n" + "!2 = !{{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12}\n" + "!3 = !{{!\"ProfileFormat\", !\"{0}\"}\n" + "!4 = !{{!\"TotalCount\", i64 10000}\n" + "!5 = !{{!\"MaxCount\", i64 10}\n" + "!6 = !{{!\"MaxInternalCount\", i64 1}\n" + "!7 = !{{!\"MaxFunctionCount\", i64 1000}\n" + "!8 = !{{!\"NumCounts\", i64 {1}}\n" + "!9 = !{{!\"NumFunctions\", i64 3}\n" + "!10 = !{{!\"IsPartialProfile\", i64 {2}}\n" + "!11 = !{{!\"PartialProfileRatio\", double {3}}\n" + "!12 = !{{!\"DetailedSummary\", !13}\n" + "!13 = !{{!14, !15, !16}\n" + "!14 = !{{i32 10000, i64 1000, i32 1}\n" + "!15 = !{{i32 990000, i64 300, i32 {4}}\n" + "!16 = !{{i32 999999, i64 5, i32 {5}}\n"; SMDiagnostic Err; - if (ProfKind) - return parseAssemblyString( - formatv(ModuleString, formatv(SummaryString, ProfKind).str()).str(), - Err, C); - else + if (ProfKind) { + auto Summary = + formatv(SummaryString, ProfKind, NumCounts, IsPartialProfile, + PartialProfileRatio, HotNumCounts, ColdNumCounts) + .str(); + return parseAssemblyString(formatv(ModuleString, Summary).str(), Err, C); + } else return parseAssemblyString(formatv(ModuleString, "").str(), Err, C); } }; @@ -280,6 +292,7 @@ TEST_F(ProfileSummaryInfoTest, SampleProf) { ProfileSummaryInfo PSI = buildPSI(M.get()); EXPECT_TRUE(PSI.hasProfileSummary()); EXPECT_TRUE(PSI.hasSampleProfile()); + EXPECT_FALSE(PSI.hasPartialSampleProfile()); BasicBlock &BB0 = F->getEntryBlock(); BasicBlock *BB1 = BB0.getTerminator()->getSuccessor(0); @@ -373,5 +386,47 @@ TEST_F(ProfileSummaryInfoTest, SampleProfNoFuncEntryCount) { EXPECT_FALSE(PSI.isFunctionColdInCallGraphNthPercentile(990000, F, BFI)); } +TEST_F(ProfileSummaryInfoTest, PartialSampleProfWorkingSetSize) { + ScalePartialSampleProfileWorkingSetSize.setValue(true); + + // With PartialProfileRatio unset (zero.) + auto M1 = makeLLVMModule("SampleProfile", /*NumCounts*/ 3, + /*IsPartialProfile*/ 1, + /*PartialProfileRatio*/ 0.0, + /*HotNumCounts*/ 3, /*ColdNumCounts*/ 10); + ProfileSummaryInfo PSI1 = buildPSI(M1.get()); + EXPECT_TRUE(PSI1.hasProfileSummary()); + EXPECT_TRUE(PSI1.hasSampleProfile()); + EXPECT_TRUE(PSI1.hasPartialSampleProfile()); + EXPECT_FALSE(PSI1.hasHugeWorkingSetSize()); + EXPECT_FALSE(PSI1.hasLargeWorkingSetSize()); + + // With PartialProfileRatio set (non-zero) and a small working set size. + auto M2 = makeLLVMModule("SampleProfile", /*NumCounts*/ 27493235, + /*IsPartialProfile*/ 1, + /*PartialProfileRatio*/ 0.00000012, + /*HotNumCounts*/ 3102082, + /*ColdNumCounts*/ 18306149); + ProfileSummaryInfo PSI2 = buildPSI(M2.get()); + EXPECT_TRUE(PSI2.hasProfileSummary()); + EXPECT_TRUE(PSI2.hasSampleProfile()); + EXPECT_TRUE(PSI2.hasPartialSampleProfile()); + EXPECT_FALSE(PSI2.hasHugeWorkingSetSize()); + EXPECT_FALSE(PSI2.hasLargeWorkingSetSize()); + + // With PartialProfileRatio is set (non-zero) and a large working set size. + auto M3 = makeLLVMModule("SampleProfile", /*NumCounts*/ 27493235, + /*IsPartialProfile*/ 1, + /*PartialProfileRatio*/ 0.9, + /*HotNumCounts*/ 3102082, + /*ColdNumCounts*/ 18306149); + ProfileSummaryInfo PSI3 = buildPSI(M3.get()); + EXPECT_TRUE(PSI3.hasProfileSummary()); + EXPECT_TRUE(PSI3.hasSampleProfile()); + EXPECT_TRUE(PSI3.hasPartialSampleProfile()); + EXPECT_TRUE(PSI3.hasHugeWorkingSetSize()); + EXPECT_TRUE(PSI3.hasLargeWorkingSetSize()); +} + } // end anonymous namespace } // end namespace llvm diff --git a/llvm/unittests/IR/ModuleTest.cpp b/llvm/unittests/IR/ModuleTest.cpp index 7b34d5d..67338f7 100644 --- a/llvm/unittests/IR/ModuleTest.cpp +++ b/llvm/unittests/IR/ModuleTest.cpp @@ -9,6 +9,7 @@ #include "llvm/IR/Module.h" #include "llvm/AsmParser/Parser.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/Pass.h" #include "llvm/Support/RandomNumberGenerator.h" #include "gtest/gtest.h" @@ -121,4 +122,40 @@ TEST(ModuleTest, setProfileSummary) { delete PS; } +TEST(ModuleTest, setPartialSampleProfileRatio) { + const char *IRString = R"IR( + !llvm.module.flags = !{!0} + + !0 = !{i32 1, !"ProfileSummary", !1} + !1 = !{!2, !3, !4, !5, !6, !7, !8, !9, !10, !11} + !2 = !{!"ProfileFormat", !"SampleProfile"} + !3 = !{!"TotalCount", i64 10000} + !4 = !{!"MaxCount", i64 10} + !5 = !{!"MaxInternalCount", i64 1} + !6 = !{!"MaxFunctionCount", i64 1000} + !7 = !{!"NumCounts", i64 200} + !8 = !{!"NumFunctions", i64 3} + !9 = !{!"IsPartialProfile", i64 1} + !10 = !{!"PartialProfileRatio", double 0.0} + !11 = !{!"DetailedSummary", !12} + !12 = !{!13, !14, !15} + !13 = !{i32 10000, i64 1000, i32 1} + !14 = !{i32 990000, i64 300, i32 10} + !15 = !{i32 999999, i64 5, i32 100} + )IR"; + + SMDiagnostic Err; + LLVMContext Context; + std::unique_ptr M = parseAssemblyString(IRString, Err, Context); + ModuleSummaryIndex Index(/*HaveGVs*/ false); + const unsigned BlockCount = 100; + const unsigned NumCounts = 200; + Index.setBlockCount(BlockCount); + M->setPartialSampleProfileRatio(Index); + double Ratio = (double)BlockCount / NumCounts; + std::unique_ptr ProfileSummary( + ProfileSummary::getFromMD(M->getProfileSummary(/*IsCS*/ false))); + EXPECT_EQ(Ratio, ProfileSummary->getPartialProfileRatio()); +} + } // end namespace -- 2.7.4