From: Rong Xu Date: Thu, 27 May 2021 18:34:22 +0000 (-0700) Subject: [SampleFDO] New hierarchical discriminator for FS SampleFDO (ProfileData part) X-Git-Tag: llvmorg-14-init~5042 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6745ffe4fa1c35778f88cffd671b7a7a936fb4d9;p=platform%2Fupstream%2Fllvm.git [SampleFDO] New hierarchical discriminator for FS SampleFDO (ProfileData part) This patch was split from https://reviews.llvm.org/D102246 [SampleFDO] New hierarchical discriminator for Flow Sensitive SampleFDO This is mainly for ProfileData part of change. It will load FS Profile when such profile is detected. For an extbinary format profile, create_llvm_prof tool will add a flag to profile summary section. For other format profiles, the users need to use an internal option (-profile-isfs) to tell the compiler that the profile uses FS discriminators. This patch also simplified the bit API used by FS discriminators. Differential Revision: https://reviews.llvm.org/D103041 --- diff --git a/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h b/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h index cda8c8b..6137411 100644 --- a/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h +++ b/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h @@ -41,6 +41,7 @@ namespace llvm { +using namespace sampleprof; class MIRAddFSDiscriminators : public MachineFunctionPass { MachineFunction *MF; unsigned LowBit; @@ -48,10 +49,11 @@ class MIRAddFSDiscriminators : public MachineFunctionPass { public: static char ID; - /// FS bits that will be used in this pass (numbers are 0 based and - /// inclusive). - MIRAddFSDiscriminators(unsigned LowBit = 0, unsigned HighBit = 0) - : MachineFunctionPass(ID), LowBit(LowBit), HighBit(HighBit) { + /// PassNum is the sequence number this pass is called, start from 1. + MIRAddFSDiscriminators(FSDiscriminatorPass P = FSDiscriminatorPass::Pass1) + : MachineFunctionPass(ID) { + LowBit = getFSPassBitBegin(P); + HighBit = getFSPassBitEnd(P); assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit"); } diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 61b22d0..d0fe1a2 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -15,6 +15,7 @@ #define LLVM_CODEGEN_PASSES_H #include "llvm/Support/CodeGen.h" +#include "llvm/Support/Discriminator.h" #include #include @@ -490,9 +491,10 @@ namespace llvm { /// Create IR Type Promotion pass. \see TypePromotion.cpp FunctionPass *createTypePromotionPass(); - /// Add Flow Sensitive Discriminators. - FunctionPass *createMIRAddFSDiscriminatorsPass(unsigned LowBit, - unsigned HighBit); + /// Add Flow Sensitive Discriminators. PassNum specifies the + /// sequence number of this pass (starting from 1). + FunctionPass * + createMIRAddFSDiscriminatorsPass(sampleprof::FSDiscriminatorPass P); /// Creates MIR Debugify pass. \see MachineDebugify.cpp ModulePass *createDebugifyMachineModulePass(); diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h index c9d4333..031f5c3 100644 --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -1741,7 +1741,7 @@ public: } /// Return the bits used for base discriminators. - static unsigned getBaseDiscriminatorBits() { return BASE_DIS_BIT_END; } + static unsigned getBaseDiscriminatorBits() { return getBaseFSBitEnd(); } /// Returns the base discriminator for a given encoded discriminator \p D. static unsigned getBaseDiscriminatorFromDiscriminator(unsigned D) { diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index 4fda543..cbdf5d5 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -190,7 +190,10 @@ enum class SecProfSummaryFlags : uint32_t { SecFlagPartial = (1 << 0), /// SecFlagContext means this is context-sensitive profile for /// CSSPGO - SecFlagFullContext = (1 << 1) + SecFlagFullContext = (1 << 1), + /// SecFlagFSDiscriminator means this profile uses flow-sensitive + /// discriminators. + SecFlagFSDiscriminator = (1 << 2) }; enum class SecFuncMetadataFlags : uint32_t { @@ -891,6 +894,9 @@ public: /// Whether the profile contains any ".__uniq." suffix in a name. static bool HasUniqSuffix; + /// If this profile uses flow sensitive discriminators. + static bool ProfileIsFS; + /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for /// all the function symbols defined or declared in current module. DenseMap *GUIDToFuncNameMap = nullptr; diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index 8203a1b..e6c70e5 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -236,6 +236,7 @@ #include "llvm/ProfileData/GCOV.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Discriminator.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SymbolRemappingReader.h" @@ -350,6 +351,26 @@ public: /// Read and validate the file header. virtual std::error_code readHeader() = 0; + /// Set the bits for FS discriminators. Parameter Pass specify the sequence + /// number, Pass == i is for the i-th round of adding FS discriminators. + /// Pass == 0 is for using base discriminators. + void setDiscriminatorMaskedBitFrom(FSDiscriminatorPass P) { + MaskedBitFrom = getFSPassBitEnd(P); + } + /// Set the bits for using base discriminators. + void setBaseDiscriminatorMask() { + setDiscriminatorMaskedBitFrom(FSDiscriminatorPass::Base); + } + + /// Get the bitmask the discriminators: For FS profiles, return the bit + /// mask for this pass. For non FS profiles, return (unsigned) -1. + uint32_t getDiscriminatorMask() const { + if (!ProfileIsFS) + return 0xFFFFFFFF; + assert((MaskedBitFrom != 0) && "MaskedBitFrom is not set properly"); + return getN1Bits(MaskedBitFrom); + } + /// The interface to read sample profiles from the associated file. std::error_code read() { if (std::error_code EC = readImpl()) @@ -505,6 +526,9 @@ protected: /// Number of context-sensitive profiles. uint32_t CSProfileCount = 0; + /// Whether the function profiles use FS discriminators. + bool ProfileIsFS = false; + /// \brief The format of sample. SampleProfileFormat Format = SPF_None; @@ -512,6 +536,10 @@ protected: /// is used by compiler. If SampleProfileReader is used by other /// tools which are not compiler, M is usually nullptr. const Module *M = nullptr; + + /// Zero out the discriminator bits higher than bit MaskedBitFrom (0 based). + /// The default is to keep all the bits. + uint32_t MaskedBitFrom = 31; }; class SampleProfileReaderText : public SampleProfileReader { diff --git a/llvm/include/llvm/Support/Discriminator.h b/llvm/include/llvm/Support/Discriminator.h index 3521a19..92191f7 100644 --- a/llvm/include/llvm/Support/Discriminator.h +++ b/llvm/include/llvm/Support/Discriminator.h @@ -13,6 +13,9 @@ #ifndef LLVM_SUPPORT_DISCRIMINATOR_H #define LLVM_SUPPORT_DISCRIMINATOR_H +#include "llvm/Support/Error.h" +#include + // Utility functions for encoding / decoding discriminators. /// With a given unsigned int \p U, use up to 13 bits to represent it. /// old_bit 1~5 --> new_bit 1~5 @@ -48,26 +51,83 @@ static inline unsigned encodingBits(unsigned C) { } // Some constants used in FS Discriminators. -#define BASE_DIS_BIT_BEG 0 -#define BASE_DIS_BIT_END 7 +// +namespace llvm { +namespace sampleprof { +enum class FSDiscriminatorPass : unsigned { + Base = 0, + Pass0 = 0, + Pass1 = 1, + Pass2 = 2, + Pass3 = 3, + Pass4 = 4, + PassLast = 4, +}; +} // namespace sampleprof + +using namespace sampleprof; + +// The number of bits reserved for the base discrimininator. The base +// discriminaitor starts from bit 0. +static const unsigned BaseDiscriminatorBitWidth = 8; + +// The number of bits reserved for each FS discriminator pass. +static const unsigned FSDiscriminatorBitWidth = 6; + +// Return the number of FS passes, excluding the pass adding the base +// discriminators. +// The number of passes for FS discriminators. Note that the total +// number of discriminaitor bits, i.e. +// BaseDiscriminatorBitWidth +// + FSDiscriminatorBitWidth * getNumFSPasses() +// needs to fit in an unsigned int type. +static inline unsigned getNumFSPasses() { + return static_cast(FSDiscriminatorPass::PassLast); +} + +// Return the ending bit for FSPass P. +static inline unsigned getFSPassBitEnd(FSDiscriminatorPass P) { + unsigned I = static_cast(P); + assert(I <= getNumFSPasses() && "Invalid FS discriminator pass number."); + return BaseDiscriminatorBitWidth + I * FSDiscriminatorBitWidth - 1; +} + +// Return the begining bit for FSPass P. +static inline unsigned getFSPassBitBegin(FSDiscriminatorPass P) { + if (P == FSDiscriminatorPass::Base) + return 0; + unsigned I = static_cast(P); + assert(I <= getNumFSPasses() && "Invalid FS discriminator pass number."); + return getFSPassBitEnd(static_cast(I - 1)) + 1; +} -#define PASS_1_DIS_BIT_BEG 8 -#define PASS_1_DIS_BIT_END 13 +// Return the beginning bit for the last FSPass. +static inline int getLastFSPassBitBegin() { + return getFSPassBitBegin(static_cast(getNumFSPasses())); +} -#define PASS_2_DIS_BIT_BEG 14 -#define PASS_2_DIS_BIT_END 19 +// Return the ending bit for the last FSPass. +static inline unsigned getLastFSPassBitEnd() { + return getFSPassBitEnd(static_cast(getNumFSPasses())); +} -#define PASS_3_DIS_BIT_BEG 20 -#define PASS_3_DIS_BIT_END 25 +// Return the beginning bit for the base (first) FSPass. +static inline unsigned getBaseFSBitBegin() { return 0; } -#define PASS_LAST_DIS_BIT_BEG 26 -#define PASS_LAST_DIS_BIT_END 31 +// Return the ending bit for the base (first) FSPass. +static inline unsigned getBaseFSBitEnd() { + return BaseDiscriminatorBitWidth - 1; +} -// Set bits range [0 .. n] to 1. Used in FS Discriminators. +// Set bits in range of [0 .. n] to 1. Used in FS Discriminators. static inline unsigned getN1Bits(int N) { - if (N >= 31) + // Work around the g++ bug that folding "(1U << (N + 1)) - 1" to 0. + if (N == 31) return 0xFFFFFFFF; - return (1 << (N + 1)) - 1; + assert((N < 32) && "N is invalid"); + return (1U << (N + 1)) - 1; } +} // namespace llvm + #endif /* LLVM_SUPPORT_DISCRIMINATOR_H */ diff --git a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp index d240def..b8d7d30 100644 --- a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp +++ b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp @@ -22,6 +22,7 @@ #include using namespace llvm; +using namespace sampleprof; #define DEBUG_TYPE "mirfs-discriminators" @@ -33,9 +34,8 @@ INITIALIZE_PASS(MIRAddFSDiscriminators, DEBUG_TYPE, char &llvm::MIRAddFSDiscriminatorsID = MIRAddFSDiscriminators::ID; -FunctionPass *llvm::createMIRAddFSDiscriminatorsPass(unsigned LowBit, - unsigned HighBit) { - return new MIRAddFSDiscriminators(LowBit, HighBit); +FunctionPass *llvm::createMIRAddFSDiscriminatorsPass(FSDiscriminatorPass P) { + return new MIRAddFSDiscriminators(P); } // Compute a hash value using debug line number, and the line numbers from the diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 2a82091..2d8ecd5 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1177,8 +1177,12 @@ void TargetPassConfig::addMachinePasses() { addPass(&PatchableFunctionID); if (EnableFSDiscriminator && !FSNoFinalDiscrim) - addPass(createMIRAddFSDiscriminatorsPass(PASS_LAST_DIS_BIT_BEG, - PASS_LAST_DIS_BIT_END)); + // Add FS discriminators here so that all the instruction duplicates + // in different BBs get their own discriminators. With this, we can "sum" + // the SampleFDO counters instead of using MAX. This will improve the + // SampleFDO profile quality. + addPass(createMIRAddFSDiscriminatorsPass( + sampleprof::FSDiscriminatorPass::PassLast)); addPreEmitPass(); diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp index 565d67e..52aeafc 100644 --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -42,6 +42,7 @@ bool FunctionSamples::ProfileIsProbeBased = false; bool FunctionSamples::ProfileIsCS = false; bool FunctionSamples::UseMD5 = false; bool FunctionSamples::HasUniqSuffix = true; +bool FunctionSamples::ProfileIsFS = false; } // namespace sampleprof } // namespace llvm @@ -232,9 +233,15 @@ const FunctionSamples *FunctionSamples::findFunctionSamples( const DILocation *PrevDIL = DIL; for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) { - S.push_back(std::make_pair( - LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()), - PrevDIL->getScope()->getSubprogram()->getLinkageName())); + unsigned Discriminator; + if (ProfileIsFS) + Discriminator = DIL->getDiscriminator(); + else + Discriminator = DIL->getBaseDiscriminator(); + + S.push_back( + std::make_pair(LineLocation(getOffset(DIL), Discriminator), + PrevDIL->getScope()->getSubprogram()->getLinkageName())); PrevDIL = DIL; } if (S.size() == 0) diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index 656638d..d59c2e8 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/SampleProf.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compression.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/LEB128.h" @@ -45,6 +46,15 @@ using namespace llvm; using namespace sampleprof; +#define DEBUG_TYPE "samplepgo-reader" + +// This internal option specifies if the profile uses FS discriminators. +// It only applies to text, binary and compact binary format profiles. +// For ext-binary format profiles, the flag is set in the summary. +static cl::opt ProfileIsFSDisciminator( + "profile-isfs", cl::Hidden, cl::init(false), + cl::desc("Profile uses flow senstive discriminators")); + /// Dump the function profile for \p FName. /// /// \param FName Name of the function to print. @@ -238,6 +248,7 @@ std::error_code SampleProfileReaderText::readImpl() { // top-level function profile. bool SeenMetadata = false; + ProfileIsFS = ProfileIsFSDisciminator; for (; !LineIt.is_at_eof(); ++LineIt) { if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#') continue; @@ -295,6 +306,10 @@ std::error_code SampleProfileReaderText::readImpl() { "Found non-metadata after metadata: " + *LineIt); return sampleprof_error::malformed; } + + // Here we handle FS discriminators. + Discriminator &= getDiscriminatorMask(); + while (InlineStack.size() > Depth) { InlineStack.pop_back(); } @@ -504,6 +519,9 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { if (std::error_code EC = NumCalls.getError()) return EC; + // Here we handle FS discriminators: + uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask(); + for (uint32_t J = 0; J < *NumCalls; ++J) { auto CalledFunction(readStringFromTable()); if (std::error_code EC = CalledFunction.getError()) @@ -513,11 +531,11 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { if (std::error_code EC = CalledFunctionSamples.getError()) return EC; - FProfile.addCalledTargetSamples(*LineOffset, *Discriminator, + FProfile.addCalledTargetSamples(*LineOffset, DiscriminatorVal, *CalledFunction, *CalledFunctionSamples); } - FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples); + FProfile.addBodySamples(*LineOffset, DiscriminatorVal, *NumSamples); } // Read all the samples for inlined function calls. @@ -538,8 +556,11 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { if (std::error_code EC = FName.getError()) return EC; + // Here we handle FS discriminators: + uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask(); + FunctionSamples &CalleeProfile = FProfile.functionSamplesAt( - LineLocation(*LineOffset, *Discriminator))[std::string(*FName)]; + LineLocation(*LineOffset, DiscriminatorVal))[std::string(*FName)]; CalleeProfile.setName(*FName); if (std::error_code EC = readProfile(CalleeProfile)) return EC; @@ -575,6 +596,7 @@ SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) { } std::error_code SampleProfileReaderBinary::readImpl() { + ProfileIsFS = ProfileIsFSDisciminator; while (!at_eof()) { if (std::error_code EC = readFuncProfile(Data)) return EC; @@ -595,6 +617,8 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection( Summary->setPartialProfile(true); if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) FunctionSamples::ProfileIsCS = ProfileIsCS = true; + if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) + FunctionSamples::ProfileIsFS = ProfileIsFS = true; break; case SecNameTable: { FixedLengthMD5 = @@ -860,7 +884,7 @@ std::error_code SampleProfileReaderCompactBinary::readImpl() { // Collect functions used by current module if the Reader has been // given a module. bool LoadFuncsToBeUsed = collectFuncsFromModule(); - + ProfileIsFS = ProfileIsFSDisciminator; std::vector OffsetsToUse; if (!LoadFuncsToBeUsed) { // load all the function profiles. @@ -1105,6 +1129,8 @@ static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) { Flags.append("partial,"); if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) Flags.append("context,"); + if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) + Flags.append("fs-discriminator,"); break; default: break; @@ -1521,6 +1547,7 @@ std::error_code SampleProfileReaderGCC::readOneFunctionProfile( /// This format is generated by the Linux Perf conversion tool at /// https://github.com/google/autofdo. std::error_code SampleProfileReaderGCC::readImpl() { + assert(!ProfileIsFSDisciminator && "Gcc profiles not support FSDisciminator"); // Read the string table. if (std::error_code EC = readNameTable()) return EC; diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp index 73565f2..535f879 100644 --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -252,6 +252,8 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection( addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFullContext); if (Type == SecFuncMetadata && FunctionSamples::ProfileIsCS) addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagHasAttribute); + if (Type == SecProfSummary && FunctionSamples::ProfileIsFS) + addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFSDiscriminator); uint64_t SectionStart = markSectionStart(Type, LayoutIdx); switch (Type) { diff --git a/llvm/lib/Target/X86/X86InsertPrefetch.cpp b/llvm/lib/Target/X86/X86InsertPrefetch.cpp index 004e6fa..1fb02a0 100644 --- a/llvm/lib/Target/X86/X86InsertPrefetch.cpp +++ b/llvm/lib/Target/X86/X86InsertPrefetch.cpp @@ -167,6 +167,7 @@ bool X86InsertPrefetch::doInitialization(Module &M) { return false; } Reader = std::move(ReaderOrErr.get()); + Reader->setBaseDiscriminatorMask(); Reader->read(); return true; } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index e48ed0b..9558741 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1769,6 +1769,7 @@ bool SampleProfileLoader::doInitialization(Module &M, // set module before reading the profile so reader may be able to only // read the function profiles which are used by the current module. Reader->setModule(&M); + Reader->setBaseDiscriminatorMask(); if (std::error_code EC = Reader->read()) { std::string Msg = "profile reading failed: " + EC.message(); Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); diff --git a/llvm/test/Transforms/SampleProfile/Inputs/fsafdo.extbinary.afdo b/llvm/test/Transforms/SampleProfile/Inputs/fsafdo.extbinary.afdo new file mode 100644 index 0000000..e7c6fdb Binary files /dev/null and b/llvm/test/Transforms/SampleProfile/Inputs/fsafdo.extbinary.afdo differ diff --git a/llvm/test/Transforms/SampleProfile/Inputs/fsafdo.prof b/llvm/test/Transforms/SampleProfile/Inputs/fsafdo.prof new file mode 100644 index 0000000..2afe584 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/fsafdo.prof @@ -0,0 +1,35 @@ +work:33383580:1068858 + 1: 981870 + 5: 981870 +foo:22388581:3449 + 0: 3449 + 2.1: 204820 + 4: 213086 bar:205247 + 4.2013265920: 222893 bar:218378 + 4.2281701376: 214552 bar:217479 + 4.2550136832: 210692 bar:220056 + 5: 213086 + 5.1207959552: 210692 + 5.1610612736: 202301 + 5.2952790016: 222893 + 6: 4780 + 6.268435456: 202301 work:198259 + 6.1073741824: 222893 work:231680 + 6.2147483648: 4780 + 7: 219065 + 7.134217728: 217053 + 7.2013265920: 183304 + 7.3758096384: 222101 + 8: 4780 + 8.2818572288: 222101 work:238765 + 8.3489660928: 183304 work:181615 + 8.4160749568: 217053 work:218539 + 10: 3281 +bar:7622325:861160 + 2: 846925 + 3: 846925 +main:16419:0 + 0: 0 + 2.1: 3280 + 3: 3299 foo:3449 + 5: 0 diff --git a/llvm/test/Transforms/SampleProfile/fsafdo_test.ll b/llvm/test/Transforms/SampleProfile/fsafdo_test.ll new file mode 100644 index 0000000..eb836f7 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/fsafdo_test.ll @@ -0,0 +1,230 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/fsafdo.extbinary.afdo | opt -analyze -branch-prob -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s -sample-profile -profile-isfs -sample-profile-file=%S/Inputs/fsafdo.prof | opt -analyze -branch-prob -enable-new-pm=0 | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +@sum = dso_local local_unnamed_addr global i32 0, align 4 + +declare i32 @bar(i32 %i) #0 +declare void @work(i32 %i) #2 + +define dso_local void @foo() #0 !dbg !29 { +; CHECK: Printing analysis {{.*}} for function 'foo': + +entry: + br label %for.cond1.preheader, !dbg !30 +; CHECK: edge entry -> for.cond1.preheader probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +for.cond1.preheader: + %j.012 = phi i32 [ 0, %entry ], [ %inc11, %if.end9.3 ] + %mul = mul nuw nsw i32 %j.012, 48 + %call = tail call i32 @bar(i32 %mul), !dbg !32 + %0 = and i32 %call, 1, !dbg !33 + %tobool.not = icmp eq i32 %0, 0, !dbg !33 + br i1 %tobool.not, label %if.end, label %if.then, !dbg !35 +; CHECK: edge for.cond1.preheader -> if.end probability is 0x3f6262b8 / 0x80000000 = 49.52% +; CHECK: edge for.cond1.preheader -> if.then probability is 0x409d9d48 / 0x80000000 = 50.48% + + +if.then: + %mul4 = shl nsw i32 %call, 1, !dbg !36 + tail call void @work(i32 %mul4), !dbg !37 + br label %if.end, !dbg !38 +; CHECK: edge if.then -> if.end probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +if.end: + %1 = and i32 %call, 3, !dbg !39 + %tobool6.not = icmp eq i32 %1, 0, !dbg !39 + br i1 %tobool6.not, label %if.end9, label %if.then7, !dbg !40 +; CHECK: edge if.end -> if.end9 probability is 0x22c6bac3 / 0x80000000 = 27.17% +; CHECK: edge if.end -> if.then7 probability is 0x5d39453d / 0x80000000 = 72.83% + + +if.then7: + %mul8 = mul nsw i32 %call, 3, !dbg !41 + tail call void @work(i32 %mul8), !dbg !42 + br label %if.end9, !dbg !43 +; CHECK: edge if.then7 -> if.end9 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +if.end9: + %add.1 = or i32 %mul, 1, !dbg !44 + %call.1 = tail call i32 @bar(i32 %add.1), !dbg !32 + %2 = and i32 %call.1, 1, !dbg !33 + %tobool.not.1 = icmp eq i32 %2, 0, !dbg !33 + br i1 %tobool.not.1, label %if.end.1, label %if.then.1, !dbg !35 +; CHECK: edge if.end9 -> if.end.1 probability is 0x3f6262b8 / 0x80000000 = 49.52% +; CHECK: edge if.end9 -> if.then.1 probability is 0x409d9d48 / 0x80000000 = 50.48% + +for.end12: + ret void, !dbg !45 + +if.then.1: + %mul4.1 = shl nsw i32 %call.1, 1, !dbg !36 + tail call void @work(i32 %mul4.1), !dbg !37 + br label %if.end.1, !dbg !38 +; CHECK: edge if.then.1 -> if.end.1 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +if.end.1: + %3 = and i32 %call.1, 3, !dbg !39 + %tobool6.not.1 = icmp eq i32 %3, 0, !dbg !39 + br i1 %tobool6.not.1, label %if.end9.1, label %if.then7.1, !dbg !40 +; CHECK: edge if.end.1 -> if.end9.1 probability is 0x22c6bac3 / 0x80000000 = 27.17% +; CHECK: edge if.end.1 -> if.then7.1 probability is 0x5d39453d / 0x80000000 = 72.83% + +if.then7.1: + %mul8.1 = mul nsw i32 %call.1, 3, !dbg !41 + tail call void @work(i32 %mul8.1), !dbg !42 + br label %if.end9.1, !dbg !43 +; CHECK: edge if.then7.1 -> if.end9.1 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +if.end9.1: + %add.2 = or i32 %mul, 2, !dbg !44 + %call.2 = tail call i32 @bar(i32 %add.2), !dbg !32 + %4 = and i32 %call.2, 1, !dbg !33 + %tobool.not.2 = icmp eq i32 %4, 0, !dbg !33 + br i1 %tobool.not.2, label %if.end.2, label %if.then.2, !dbg !35 +; CHECK: edge if.end9.1 -> if.end.2 probability is 0x3f6262b8 / 0x80000000 = 49.52% +; CHECK: edge if.end9.1 -> if.then.2 probability is 0x409d9d48 / 0x80000000 = 50.48% + +if.then.2: + %mul4.2 = shl nsw i32 %call.2, 1, !dbg !36 + tail call void @work(i32 %mul4.2), !dbg !37 + br label %if.end.2, !dbg !38 +; CHECK: edge if.then.2 -> if.end.2 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +if.end.2: + %5 = and i32 %call.2, 3, !dbg !39 + %tobool6.not.2 = icmp eq i32 %5, 0, !dbg !39 + br i1 %tobool6.not.2, label %if.end9.2, label %if.then7.2, !dbg !40 +; CHECK: edge if.end.2 -> if.end9.2 probability is 0x22c6bac3 / 0x80000000 = 27.17% +; CHECK: edge if.end.2 -> if.then7.2 probability is 0x5d39453d / 0x80000000 = 72.83% + +if.then7.2: + %mul8.2 = mul nsw i32 %call.2, 3, !dbg !41 + tail call void @work(i32 %mul8.2), !dbg !42 + br label %if.end9.2, !dbg !43 +; CHECK: edge if.then7.2 -> if.end9.2 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +if.end9.2: + %add.3 = or i32 %mul, 3, !dbg !44 + %call.3 = tail call i32 @bar(i32 %add.3), !dbg !32 + %6 = and i32 %call.3, 1, !dbg !33 + %tobool.not.3 = icmp eq i32 %6, 0, !dbg !33 + br i1 %tobool.not.3, label %if.end.3, label %if.then.3, !dbg !35 +; CHECK: edge if.end9.2 -> if.end.3 probability is 0x3f6262b8 / 0x80000000 = 49.52% +; CHECK: edge if.end9.2 -> if.then.3 probability is 0x409d9d48 / 0x80000000 = 50.48% + +if.then.3: + %mul4.3 = shl nsw i32 %call.3, 1, !dbg !36 + tail call void @work(i32 %mul4.3), !dbg !37 + br label %if.end.3, !dbg !38 +; CHECK: edge if.then.3 -> if.end.3 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +if.end.3: + %7 = and i32 %call.3, 3, !dbg !39 + %tobool6.not.3 = icmp eq i32 %7, 0, !dbg !39 + br i1 %tobool6.not.3, label %if.end9.3, label %if.then7.3, !dbg !40 +; CHECK: edge if.end.3 -> if.end9.3 probability is 0x22c6bac3 / 0x80000000 = 27.17% +; CHECK: edge if.end.3 -> if.then7.3 probability is 0x5d39453d / 0x80000000 = 72.83% + +if.then7.3: + %mul8.3 = mul nsw i32 %call.3, 3, !dbg !41 + tail call void @work(i32 %mul8.3), !dbg !42 + br label %if.end9.3, !dbg !43 +; CHECK: edge if.then7.3 -> if.end9.3 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +if.end9.3: + %inc11 = add nuw nsw i32 %j.012, 1, !dbg !46 + %exitcond.not = icmp eq i32 %inc11, 48, !dbg !48 + br i1 %exitcond.not, label %for.end12, label %for.cond1.preheader, !dbg !30, !llvm.loop !49 +; CHECK: edge if.end9.3 -> for.end12 probability is 0x00834dd9 / 0x80000000 = 0.40% +; CHECK: edge if.end9.3 -> for.cond1.preheader probability is 0x7f7cb227 / 0x80000000 = 99.60% [HOT edge] +} + +define dso_local i32 @main() #3 !dbg !52 { +entry: + br label %for.body, !dbg !53 + +for.body: + %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + tail call void @foo(), !dbg !55 + %inc = add nuw nsw i32 %i.03, 1, !dbg !56 + %exitcond.not = icmp eq i32 %inc, 10000000, !dbg !58 + br i1 %exitcond.not, label %for.end, label %for.body, !dbg !53, !llvm.loop !60 + +for.end: + ret i32 0, !dbg !63 +} + + +attributes #0 = { noinline nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile"} +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { nofree noinline norecurse nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "unroll.c", directory: "a/") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!7 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !2) +!9 = !DILocation(line: 4, column: 3, scope: !7) +!10 = !DILocation(line: 5, column: 5, scope: !7) +!11 = !{!12, !12, i64 0} +!12 = !{!"int", !13, i64 0} +!13 = !{!"omnipotent char", !14, i64 0} +!14 = !{!"Simple C/C++ TBAA"} +!15 = !DILocation(line: 6, column: 10, scope: !7) +!16 = !DILocation(line: 7, column: 1, scope: !7) +!17 = !DILocation(line: 6, column: 3, scope: !18) +!18 = !DILexicalBlockFile(scope: !7, file: !1, discriminator: 1) +!19 = distinct !DISubprogram(name: "work", scope: !1, file: !1, line: 10, type: !8, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!20 = !DILocation(line: 11, column: 7, scope: !19) +!21 = !DILocation(line: 11, column: 11, scope: !22) +!22 = !DILexicalBlockFile(scope: !19, file: !1, discriminator: 1) +!23 = !DILocation(line: 11, column: 11, scope: !24) +!24 = !DILexicalBlockFile(scope: !19, file: !1, discriminator: 2) +!25 = !DILocation(line: 11, column: 7, scope: !26) +!26 = !DILexicalBlockFile(scope: !19, file: !1, discriminator: 3) +!27 = !DILocation(line: 0, scope: !22) +!28 = !DILocation(line: 15, column: 1, scope: !19) +!29 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 17, type: !8, scopeLine: 17, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!30 = !DILocation(line: 19, column: 3, scope: !31) +!31 = !DILexicalBlockFile(scope: !29, file: !1, discriminator: 2) +!32 = !DILocation(line: 21, column: 16, scope: !31) +!33 = !DILocation(line: 22, column: 14, scope: !34) +!34 = !DILexicalBlockFile(scope: !29, file: !1, discriminator: 1) +!35 = !DILocation(line: 22, column: 11, scope: !31) +!36 = !DILocation(line: 23, column: 16, scope: !29) +!37 = !DILocation(line: 23, column: 9, scope: !34) +!38 = !DILocation(line: 23, column: 9, scope: !31) +!39 = !DILocation(line: 24, column: 14, scope: !34) +!40 = !DILocation(line: 24, column: 11, scope: !31) +!41 = !DILocation(line: 25, column: 16, scope: !29) +!42 = !DILocation(line: 25, column: 9, scope: !34) +!43 = !DILocation(line: 25, column: 9, scope: !31) +!44 = !DILocation(line: 21, column: 21, scope: !34) +!45 = !DILocation(line: 27, column: 1, scope: !29) +!46 = !DILocation(line: 19, column: 24, scope: !47) +!47 = !DILexicalBlockFile(scope: !29, file: !1, discriminator: 3) +!48 = !DILocation(line: 19, column: 17, scope: !34) +!49 = distinct !{!49, !50, !51} +!50 = !DILocation(line: 19, column: 3, scope: !29) +!51 = !DILocation(line: 26, column: 3, scope: !29) +!52 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 29, type: !8, scopeLine: 29, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!53 = !DILocation(line: 31, column: 3, scope: !54) +!54 = !DILexicalBlockFile(scope: !52, file: !1, discriminator: 2) +!55 = !DILocation(line: 32, column: 5, scope: !52) +!56 = !DILocation(line: 31, column: 30, scope: !57) +!57 = !DILexicalBlockFile(scope: !52, file: !1, discriminator: 3) +!58 = !DILocation(line: 31, column: 17, scope: !59) +!59 = !DILexicalBlockFile(scope: !52, file: !1, discriminator: 1) +!60 = distinct !{!60, !61, !62} +!61 = !DILocation(line: 31, column: 3, scope: !52) +!62 = !DILocation(line: 33, column: 3, scope: !52) +!63 = !DILocation(line: 34, column: 1, scope: !52) diff --git a/llvm/unittests/ProfileData/SampleProfTest.cpp b/llvm/unittests/ProfileData/SampleProfTest.cpp index ddb25a9..fef64d9 100644 --- a/llvm/unittests/ProfileData/SampleProfTest.cpp +++ b/llvm/unittests/ProfileData/SampleProfTest.cpp @@ -9,6 +9,7 @@ #include "llvm/ProfileData/SampleProf.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" @@ -61,6 +62,7 @@ struct SampleProfTest : ::testing::Test { ASSERT_TRUE(NoError(ReaderOrErr.getError())); Reader = std::move(ReaderOrErr.get()); Reader->setModule(&M); + Reader->setBaseDiscriminatorMask(); } TempFile createRemapFile() {