From 21b1ad0340a7ba69c605ea1c218adb567b5190ae Mon Sep 17 00:00:00 2001 From: Wei Mi Date: Tue, 5 Jan 2021 23:24:43 -0800 Subject: [PATCH] [SampleFDO] Add the support to split the function profiles with context into separate sections. For ThinLTO, all the function profiles without context has been annotated to outline functions if possible in prelink phase. In postlink phase, profile annotation in postlink phase is only meaningful for function profile with context. If the profile is large, it is better to split the profile into two parts, one with context and one without, so the profile reading in postlink phase only has to read the part with context. To have the profile splitting, we extend the ExtBinary format to support different section arrangement. It will be flexible to add other section layout in the future without the need to create new class inheriting from ExtBinary class. Differential Revision: https://reviews.llvm.org/D94435 --- llvm/include/llvm/ProfileData/SampleProf.h | 4 +- llvm/include/llvm/ProfileData/SampleProfReader.h | 10 +++ llvm/include/llvm/ProfileData/SampleProfWriter.h | 95 ++++++++++++++++----- llvm/lib/ProfileData/SampleProfReader.cpp | 7 ++ llvm/lib/ProfileData/SampleProfWriter.cpp | 58 ++++++++++++- llvm/lib/Transforms/IPO/SampleProfile.cpp | 6 +- .../SampleProfile/Inputs/ctxsplit.extbinary.afdo | Bin 0 -> 467 bytes llvm/test/Transforms/SampleProfile/ctxsplit.ll | 59 +++++++++++++ 8 files changed, 215 insertions(+), 24 deletions(-) create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/ctxsplit.extbinary.afdo create mode 100644 llvm/test/Transforms/SampleProfile/ctxsplit.ll diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index c423466..c45ace9 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -164,7 +164,9 @@ struct SecHdrTableEntry { // will be saved in the higher 32 bits. enum class SecCommonFlags : uint32_t { SecFlagInValid = 0, - SecFlagCompress = (1 << 0) + SecFlagCompress = (1 << 0), + // Indicate the section contains only profile without context. + SecFlagFlat = (1 << 1) }; // Section specific flags are defined here. diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index 92fe825..3f52a2f 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -451,6 +451,10 @@ public: /// Return whether names in the profile are all MD5 numbers. virtual bool useMD5() { return false; } + /// Don't read profile without context if the flag is set. This is only meaningful + /// for ExtBinary format. + virtual void setSkipFlatProf(bool Skip) {} + SampleProfileReaderItaniumRemapper *getRemapper() { return Remapper.get(); } protected: @@ -666,6 +670,10 @@ protected: /// the lifetime of MD5StringBuf is not shorter than that of NameTable. std::unique_ptr> MD5StringBuf; + /// If SkipFlatProf is true, skip the sections with + /// SecFlagFlat flag. + bool SkipFlatProf = false; + public: SampleProfileReaderExtBinaryBase(std::unique_ptr B, LLVMContext &C, SampleProfileFormat Format) @@ -689,6 +697,8 @@ public: virtual std::unique_ptr getProfileSymbolList() override { return std::move(ProfSymList); }; + + virtual void setSkipFlatProf(bool Skip) override { SkipFlatProf = Skip; } }; class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase { diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h index fc568f0..e72963a 100644 --- a/llvm/include/llvm/ProfileData/SampleProfWriter.h +++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h @@ -15,6 +15,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/ErrorOr.h" @@ -28,6 +29,15 @@ namespace llvm { namespace sampleprof { +enum SectionLayout { + DefaultLayout, + // The layout splits profile with context information from profile without + // context information. When Thinlto is enabled, ThinLTO postlink phase only + // has to load profile with context information and can skip the other part. + CtxSplitLayout, + NumOfLayout, +}; + /// Sample-based profile writer. Base class. class SampleProfileWriter { public: @@ -60,6 +70,7 @@ public: virtual void setToCompressAllSections() {} virtual void setUseMD5() {} virtual void setPartialProfile() {} + virtual void resetSecLayout(SectionLayout SL) {} protected: SampleProfileWriter(std::unique_ptr &OS) @@ -144,6 +155,36 @@ class SampleProfileWriterRawBinary : public SampleProfileWriterBinary { using SampleProfileWriterBinary::SampleProfileWriterBinary; }; +const std::array, NumOfLayout> + ExtBinaryHdrLayoutTable = { + // Note that SecFuncOffsetTable section is written after SecLBRProfile + // in the profile, but is put before SecLBRProfile in SectionHdrLayout. + // This is because sample reader follows the order in SectionHdrLayout + // to read each section. To read function profiles on demand, sample + // reader need to get the offset of each function profile first. + // + // DefaultLayout + SmallVector({{SecProfSummary}, + {SecNameTable}, + {SecFuncOffsetTable}, + {SecLBRProfile}, + {SecProfileSymbolList}, + {SecFuncMetadata}}), + // CtxSplitLayout + SmallVector({{SecProfSummary}, + {SecNameTable}, + // profile with context + // for next two sections + {SecFuncOffsetTable}, + {SecLBRProfile}, + // profile without context + // for next two sections + {SecFuncOffsetTable}, + {SecLBRProfile}, + {SecProfileSymbolList}, + {SecFuncMetadata}}), +}; + class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary { using SampleProfileWriterBinary::SampleProfileWriterBinary; public: @@ -174,6 +215,19 @@ public: ProfSymList = PSL; }; + virtual void resetSecLayout(SectionLayout SL) override { + verifySecLayout(SL); +#ifndef NDEBUG + // Make sure resetSecLayout is called before any flag setting. + for (auto &Entry : SectionHdrLayout) { + assert(Entry.Flags == 0 && + "resetSecLayout has to be called before any flag setting"); + } +#endif + SecLayout = SL; + SectionHdrLayout = ExtBinaryHdrLayoutTable[SL]; + } + protected: uint64_t markSectionStart(SecType Type, uint32_t LayoutIdx); std::error_code addNewSection(SecType Sec, uint32_t LayoutIdx, @@ -185,11 +239,16 @@ protected: addSecFlag(Entry, Flag); } } + template + void addSectionFlag(uint32_t SectionIdx, SecFlagType Flag) { + addSecFlag(SectionHdrLayout[SectionIdx], Flag); + } // placeholder for subclasses to dispatch their own section writers. virtual std::error_code writeCustomSection(SecType Type) = 0; + // Verify the SecLayout is supported by the format. + virtual void verifySecLayout(SectionLayout SL) = 0; - virtual void initSectionHdrLayout() = 0; // specify the order to write sections. virtual std::error_code writeSections(const StringMap &ProfileMap) = 0; @@ -211,11 +270,13 @@ protected: std::error_code writeFuncOffsetTable(); std::error_code writeProfileSymbolListSection(); + SectionLayout SecLayout = DefaultLayout; // Specifiy the order of sections in section header table. Note // the order of sections in SecHdrTable may be different that the // order in SectionHdrLayout. sample Reader will follow the order // in SectionHdrLayout to read each section. - SmallVector SectionHdrLayout; + SmallVector SectionHdrLayout = + ExtBinaryHdrLayoutTable[DefaultLayout]; // Save the start of SecLBRProfile so we can compute the offset to the // start of SecLBRProfile for each Function's Profile and will keep it @@ -261,33 +322,25 @@ private: class SampleProfileWriterExtBinary : public SampleProfileWriterExtBinaryBase { public: SampleProfileWriterExtBinary(std::unique_ptr &OS) - : SampleProfileWriterExtBinaryBase(OS) { - initSectionHdrLayout(); - } + : SampleProfileWriterExtBinaryBase(OS) {} private: - virtual void initSectionHdrLayout() override { - // Note that SecFuncOffsetTable section is written after SecLBRProfile - // in the profile, but is put before SecLBRProfile in SectionHdrLayout. - // - // This is because sample reader follows the order of SectionHdrLayout to - // read each section, to read function profiles on demand sample reader - // need to get the offset of each function profile first. - // - // SecFuncOffsetTable section is written after SecLBRProfile in the - // profile because FuncOffsetTable needs to be populated while section - // SecLBRProfile is written. - SectionHdrLayout = { - {SecProfSummary, 0, 0, 0, 0}, {SecNameTable, 0, 0, 0, 0}, - {SecFuncOffsetTable, 0, 0, 0, 0}, {SecLBRProfile, 0, 0, 0, 0}, - {SecProfileSymbolList, 0, 0, 0, 0}, {SecFuncMetadata, 0, 0, 0, 0}}; - }; + std::error_code + writeDefaultLayout(const StringMap &ProfileMap); + std::error_code + writeCtxSplitLayout(const StringMap &ProfileMap); + virtual std::error_code writeSections(const StringMap &ProfileMap) override; virtual std::error_code writeCustomSection(SecType Type) override { return sampleprof_error::success; }; + + virtual void verifySecLayout(SectionLayout SL) override { + assert((SL == DefaultLayout || SL == CtxSplitLayout) && + "Unsupported layout"); + } }; // CompactBinary is a compact format of binary profile which both reduces diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index a8ffb37..e8ac06d 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -740,6 +740,10 @@ std::error_code SampleProfileReaderExtBinaryBase::readImpl() { if (!Entry.Size) continue; + // Skip sections without context when SkipFlatProf is true. + if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat)) + continue; + const uint8_t *SecStart = BufStart + Entry.Offset; uint64_t SecSize = Entry.Size; @@ -986,6 +990,9 @@ static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) { else Flags.append("{"); + if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat)) + Flags.append("flat,"); + switch (Entry.Type) { case SecNameTable: if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5)) diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp index 60cfe50..71dba62 100644 --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -19,6 +19,7 @@ #include "llvm/ProfileData/SampleProfWriter.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/Compression.h" @@ -264,7 +265,7 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection( return sampleprof_error::success; } -std::error_code SampleProfileWriterExtBinary::writeSections( +std::error_code SampleProfileWriterExtBinary::writeDefaultLayout( const StringMap &ProfileMap) { // The const indices passed to writeOneSection below are specifying the // positions of the sections in SectionHdrLayout. Look at @@ -285,6 +286,61 @@ std::error_code SampleProfileWriterExtBinary::writeSections( return sampleprof_error::success; } +static void +splitProfileMapToTwo(const StringMap &ProfileMap, + StringMap &ContextProfileMap, + StringMap &NoContextProfileMap) { + for (const auto &I : ProfileMap) { + if (I.second.getCallsiteSamples().size()) + ContextProfileMap.insert({I.first(), I.second}); + else + NoContextProfileMap.insert({I.first(), I.second}); + } +} + +std::error_code SampleProfileWriterExtBinary::writeCtxSplitLayout( + const StringMap &ProfileMap) { + StringMap ContextProfileMap, NoContextProfileMap; + splitProfileMapToTwo(ProfileMap, ContextProfileMap, NoContextProfileMap); + + if (auto EC = writeOneSection(SecProfSummary, 0, ProfileMap)) + return EC; + if (auto EC = writeOneSection(SecNameTable, 1, ProfileMap)) + return EC; + if (auto EC = writeOneSection(SecLBRProfile, 3, ContextProfileMap)) + return EC; + if (auto EC = writeOneSection(SecFuncOffsetTable, 2, ContextProfileMap)) + return EC; + // Mark the section to have no context. Note section flag needs to be set + // before writing the section. + addSectionFlag(5, SecCommonFlags::SecFlagFlat); + if (auto EC = writeOneSection(SecLBRProfile, 5, NoContextProfileMap)) + return EC; + // Mark the section to have no context. Note section flag needs to be set + // before writing the section. + addSectionFlag(4, SecCommonFlags::SecFlagFlat); + if (auto EC = writeOneSection(SecFuncOffsetTable, 4, NoContextProfileMap)) + return EC; + if (auto EC = writeOneSection(SecProfileSymbolList, 6, ProfileMap)) + return EC; + if (auto EC = writeOneSection(SecFuncMetadata, 7, ProfileMap)) + return EC; + + return sampleprof_error::success; +} + +std::error_code SampleProfileWriterExtBinary::writeSections( + const StringMap &ProfileMap) { + std::error_code EC; + if (SecLayout == DefaultLayout) + EC = writeDefaultLayout(ProfileMap); + else if (SecLayout == CtxSplitLayout) + EC = writeCtxSplitLayout(ProfileMap); + else + llvm_unreachable("Unsupported layout"); + return EC; +} + std::error_code SampleProfileWriterCompactBinary::write( const StringMap &ProfileMap) { if (std::error_code EC = SampleProfileWriter::write(ProfileMap)) diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 30da7b6..ef1ec9c 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1946,6 +1946,7 @@ bool SampleProfileLoader::doInitialization(Module &M, return false; } Reader = std::move(ReaderOrErr.get()); + Reader->setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink); Reader->collectFuncsFrom(M); ProfileIsValid = (Reader->read() == sampleprof_error::success); PSL = Reader->getProfileSymbolList(); @@ -2111,7 +2112,10 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) initialEntryCount = -1; } - F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real)); + // Initialize entry count when the function has no existing entry + // count value. + if (!F.getEntryCount().hasValue()) + F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real)); std::unique_ptr OwnedORE; if (AM) { auto &FAM = diff --git a/llvm/test/Transforms/SampleProfile/Inputs/ctxsplit.extbinary.afdo b/llvm/test/Transforms/SampleProfile/Inputs/ctxsplit.extbinary.afdo new file mode 100644 index 0000000000000000000000000000000000000000..8c27e21db1b46175228d5da1ee382d7ec80a0b82 GIT binary patch literal 467 zcmZp9a$)0_lT%g%r*kks03(!!Q9@9GOsD`8Oae$V193G}UJ@$Kf+P=O?t#jK)PjHl zm_!g$pz<0}A((!cJr{83hsp1U$_qmE!|Z|Ca2YBOfl z8?_l1^s)iT1G9MgwU{0(7Xp$ETP5aevMx9z2P6-iR{@d_?q~tYhF1n4@$Y6p%^&_+ z0?7^AY=GpAKOpi>vn`O^)&e5;gGuJ-G;M`Dar3L!?3K9fbLdA*&YfKkL~rGHykKYA z{Fs-CiD9EQFogTH7#Nuu8J_3@JvLvHfe}P7fd~dhCWam2j34xv7=biU3`j6AGBN-F DM1xKv literal 0 HcmV?d00001 diff --git a/llvm/test/Transforms/SampleProfile/ctxsplit.ll b/llvm/test/Transforms/SampleProfile/ctxsplit.ll new file mode 100644 index 0000000..b97f737 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/ctxsplit.ll @@ -0,0 +1,59 @@ +; Check the nonflattened part of the ctxsplit profile will be read in thinlto +; postlink phase while flattened part of the ctxsplit profile will not be read. +; RUN: opt < %s -passes='thinlto' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/ctxsplit.extbinary.afdo -S | FileCheck %s --check-prefix=POSTLINK +; +; Check both the flattened and nonflattened parts of the ctxsplit profile will +; be read in thinlto prelink phase. +; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/ctxsplit.extbinary.afdo -S | FileCheck %s --check-prefix=PRELINK +; +; Check both the flattened and nonflattened parts of the ctxsplit profile will +; be read in non-thinlto mode. +; RUN: opt < %s -passes='default' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/ctxsplit.extbinary.afdo -S | FileCheck %s --check-prefix=NOTHINLTO + +; POSTLINK: define dso_local i32 @goo() {{.*}} !prof ![[ENTRY1:[0-9]+]] { +; POSTLINK: define dso_local i32 @foo() {{.*}} !prof ![[ENTRY2:[0-9]+]] { +; POSTLINK: ![[ENTRY1]] = !{!"function_entry_count", i64 1001} +; POSTLINK: ![[ENTRY2]] = !{!"function_entry_count", i64 -1} +; PRELINK: define dso_local i32 @goo() {{.*}} !prof ![[ENTRY1:[0-9]+]] { +; PRELINK: define dso_local i32 @foo() {{.*}} !prof ![[ENTRY2:[0-9]+]] { +; PRELINK: ![[ENTRY1]] = !{!"function_entry_count", i64 1001} +; PRELINK: ![[ENTRY2]] = !{!"function_entry_count", i64 3001} +; NOTHINLTO: define dso_local i32 @goo() {{.*}} !prof ![[ENTRY1:[0-9]+]] { +; NOTHINLTO: define dso_local i32 @foo() {{.*}} !prof ![[ENTRY2:[0-9]+]] { +; NOTHINLTO: ![[ENTRY1]] = !{!"function_entry_count", i64 1001} +; NOTHINLTO: ![[ENTRY2]] = !{!"function_entry_count", i64 3001} + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: norecurse nounwind readnone uwtable +define dso_local i32 @goo() #0 !dbg !10 { +entry: + ret i32 -1, !dbg !11 +} + +; Function Attrs: norecurse nounwind readnone uwtable +define dso_local i32 @foo() #0 !dbg !7 { +entry: + ret i32 -1, !dbg !9 +} + +attributes #0 = { "use-sample-profile" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 8.0.0 (trunk 345241)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "a.c", directory: "") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 8.0.0 (trunk 345241)"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !2) +!9 = !DILocation(line: 2, column: 3, scope: !7) +!10 = distinct !DISubprogram(name: "goo", scope: !1, file: !1, line: 8, type: !8, isLocal: false, isDefinition: true, scopeLine: 8, isOptimized: true, unit: !0, retainedNodes: !2) +!11 = !DILocation(line: 10, column: 3, scope: !10) + -- 2.7.4