For profile staleness report, before it only counts for the top-level function samples in the nested profile, the samples in the inlinees are ignored. This could affect the quality of the metrics when there are heavily inlined functions. This change adds a feature to flatten the nested profile and we're changing to use flatten profile as the input for stale profile detection and matching.
Example for profile flattening:
```
Original profile:
_Z3bazi:20301:1000
1: 1000
3: 2000
5: inline1:1600
1: 600
3: inline2:500
1: 500
Flattened profile:
_Z3bazi:18701:1000
1: 1000
3: 2000
5: 600 inline1:600
inline1:1100:600
1: 600
3: 500 inline2: 500
inline2:500:500
1: 500
```
This feature could be useful for offline analysis, like understanding the hotness of each individual function. So I'm adding the support to `llvm-profdata merge` under `--gen-flattened-profile`.
Reviewed By: hoy, wenlei
Differential Revision: https://reviews.llvm.org/D146452
coverage for the optimized target. This option can only be used with
sample-based profile in extbinary format.
+.. option:: --convert-sample-profile-layout=[nest|flat]
+
+ Convert the merged profile into a profile with a new layout. Supported
+ layout are ``nest``(Nested profile, the input should be CS flat profile) and
+ ``flat``(Profile with nested inlinees flattened out).
+
.. option:: --supplement-instr-with-sample=<file>
Supplement an instrumentation profile with sample profile. The sample profile
SPF_Binary = 0xff
};
+enum SampleProfileLayout {
+ SPL_None = 0,
+ SPL_Nest = 0x1,
+ SPL_Flat = 0x2,
+};
+
static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) {
return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) |
uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) |
void setTotalSamples(uint64_t Num) { TotalSamples = Num; }
+ void setHeadSamples(uint64_t Num) { TotalHeadSamples = Num; }
+
sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) {
bool Overflowed;
TotalHeadSamples =
return CallsiteSamples;
}
+ CallsiteSampleMap &getCallsiteSamples() { return CallsiteSamples; }
+
/// Return the maximum of sample counts in a function body. When SkipCallSite
/// is false, which is the default, the return count includes samples in the
/// inlined functions. When SkipCallSite is true, the return count only
SampleProfileMap &ProfileMap;
};
-// CSProfileConverter converts a full context-sensitive flat sample profile into
-// a nested context-sensitive sample profile.
-class CSProfileConverter {
+/// Helper class for profile conversion.
+///
+/// It supports full context-sensitive profile to nested profile conversion,
+/// nested profile to flatten profile conversion, etc.
+class ProfileConverter {
public:
- CSProfileConverter(SampleProfileMap &Profiles);
- void convertProfiles();
+ ProfileConverter(SampleProfileMap &Profiles);
+ // Convert a full context-sensitive flat sample profile into a nested sample
+ // profile.
+ void convertCSProfiles();
struct FrameNode {
FrameNode(StringRef FName = StringRef(),
FunctionSamples *FSamples = nullptr,
StringRef CalleeName);
};
+ static void flattenProfile(SampleProfileMap &ProfileMap,
+ bool ProfileIsCS = false) {
+ SampleProfileMap TmpProfiles;
+ flattenProfile(ProfileMap, TmpProfiles, ProfileIsCS);
+ ProfileMap = std::move(TmpProfiles);
+ }
+
+ static void flattenProfile(const SampleProfileMap &InputProfiles,
+ SampleProfileMap &OutputProfiles,
+ bool ProfileIsCS = false) {
+ if (ProfileIsCS) {
+ for (const auto &I : InputProfiles)
+ OutputProfiles[I.second.getName()].merge(I.second);
+ // Retain the profile name and clear the full context for each function
+ // profile.
+ for (auto &I : OutputProfiles)
+ I.second.setContext(SampleContext(I.first));
+ } else {
+ for (const auto &I : InputProfiles)
+ flattenNestedProfile(OutputProfiles, I.second);
+ }
+ }
+
private:
+ static void flattenNestedProfile(SampleProfileMap &OutputProfiles,
+ const FunctionSamples &FS) {
+ // To retain the context, checksum, attributes of the original profile, make
+ // a copy of it if no profile is found.
+ SampleContext &Context = FS.getContext();
+ auto Ret = OutputProfiles.emplace(Context, FS);
+ FunctionSamples &Profile = Ret.first->second;
+ if (Ret.second) {
+ // When it's the copy of the old profile, just clear all the inlinees'
+ // samples.
+ Profile.getCallsiteSamples().clear();
+ // We recompute TotalSamples later, so here set to zero.
+ Profile.setTotalSamples(0);
+ } else {
+ for (const auto &Line : FS.getBodySamples()) {
+ Profile.addBodySamples(Line.first.LineOffset, Line.first.Discriminator,
+ Line.second.getSamples());
+ }
+ }
+
+ assert(Profile.getCallsiteSamples().empty() &&
+ "There should be no inlinees' profiles after flattening.");
+
+ // TotalSamples might not be equal to the sum of all samples from
+ // BodySamples and CallsiteSamples. So here we use "TotalSamples =
+ // Original_TotalSamples - All_of_Callsite_TotalSamples +
+ // All_of_Callsite_HeadSamples" to compute the new TotalSamples.
+ uint64_t TotalSamples = FS.getTotalSamples();
+
+ for (const auto &I : FS.getCallsiteSamples()) {
+ for (const auto &Callee : I.second) {
+ const auto &CalleeProfile = Callee.second;
+ // Add body sample.
+ Profile.addBodySamples(I.first.LineOffset, I.first.Discriminator,
+ CalleeProfile.getHeadSamplesEstimate());
+ // Add callsite sample.
+ Profile.addCalledTargetSamples(
+ I.first.LineOffset, I.first.Discriminator, CalleeProfile.getName(),
+ CalleeProfile.getHeadSamplesEstimate());
+ // Update total samples.
+ TotalSamples = TotalSamples >= CalleeProfile.getTotalSamples()
+ ? TotalSamples - CalleeProfile.getTotalSamples()
+ : 0;
+ TotalSamples += CalleeProfile.getHeadSamplesEstimate();
+ // Recursively convert callee profile.
+ flattenNestedProfile(OutputProfiles, CalleeProfile);
+ }
+ }
+ Profile.addTotalSamples(TotalSamples);
+
+ Profile.setHeadSamples(Profile.getHeadSamplesEstimate());
+ }
+
// Nest all children profiles into the profile of Node.
- void convertProfiles(FrameNode &Node);
+ void convertCSProfiles(FrameNode &Node);
FrameNode *getOrCreateContextPath(const SampleContext &Context);
SampleProfileMap &ProfileMap;
OS << Sym << "\n";
}
-CSProfileConverter::FrameNode *
-CSProfileConverter::FrameNode::getOrCreateChildFrame(
- const LineLocation &CallSite, StringRef CalleeName) {
+ProfileConverter::FrameNode *
+ProfileConverter::FrameNode::getOrCreateChildFrame(const LineLocation &CallSite,
+ StringRef CalleeName) {
uint64_t Hash = FunctionSamples::getCallSiteHash(CalleeName, CallSite);
auto It = AllChildFrames.find(Hash);
if (It != AllChildFrames.end()) {
return &AllChildFrames[Hash];
}
-CSProfileConverter::CSProfileConverter(SampleProfileMap &Profiles)
+ProfileConverter::ProfileConverter(SampleProfileMap &Profiles)
: ProfileMap(Profiles) {
for (auto &FuncSample : Profiles) {
FunctionSamples *FSamples = &FuncSample.second;
}
}
-CSProfileConverter::FrameNode *
-CSProfileConverter::getOrCreateContextPath(const SampleContext &Context) {
+ProfileConverter::FrameNode *
+ProfileConverter::getOrCreateContextPath(const SampleContext &Context) {
auto Node = &RootFrame;
LineLocation CallSiteLoc(0, 0);
for (auto &Callsite : Context.getContextFrames()) {
return Node;
}
-void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) {
+void ProfileConverter::convertCSProfiles(ProfileConverter::FrameNode &Node) {
// Process each child profile. Add each child profile to callsite profile map
// of the current node `Node` if `Node` comes with a profile. Otherwise
// promote the child profile to a standalone profile.
auto *NodeProfile = Node.FuncSamples;
for (auto &It : Node.AllChildFrames) {
auto &ChildNode = It.second;
- convertProfiles(ChildNode);
+ convertCSProfiles(ChildNode);
auto *ChildProfile = ChildNode.FuncSamples;
if (!ChildProfile)
continue;
}
}
-void CSProfileConverter::convertProfiles() { convertProfiles(RootFrame); }
+void ProfileConverter::convertCSProfiles() { convertCSProfiles(RootFrame); }
cl::desc("Compute stale profile statistical metrics and write it into the "
"native object file(.llvm_stats section)."));
+static cl::opt<bool> FlattenProfileForMatching(
+ "flatten-profile-for-matching", cl::Hidden, cl::init(true),
+ cl::desc(
+ "Use flattened profile for stale profile detection and matching."));
+
static cl::opt<bool> ProfileSampleAccurate(
"profile-sample-accurate", cl::Hidden, cl::init(false),
cl::desc("If the sample profile is accurate, we will mark all un-sampled "
Module &M;
SampleProfileReader &Reader;
const PseudoProbeManager *ProbeManager;
+ SampleProfileMap FlattenedProfiles;
// Profile mismatching statstics.
uint64_t TotalProfiledCallsites = 0;
public:
SampleProfileMatcher(Module &M, SampleProfileReader &Reader,
const PseudoProbeManager *ProbeManager)
- : M(M), Reader(Reader), ProbeManager(ProbeManager) {}
+ : M(M), Reader(Reader), ProbeManager(ProbeManager) {
+ if (FlattenProfileForMatching) {
+ ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
+ FunctionSamples::ProfileIsCS);
+ }
+ }
+
+ FunctionSamples *getFlattenedSamplesFor(const Function &F) {
+ StringRef CanonFName = FunctionSamples::getCanonicalFnName(F);
+ auto It = FlattenedProfiles.find(CanonFName);
+ if (It != FlattenedProfiles.end())
+ return &It->second;
+ return nullptr;
+ }
+
void detectProfileMismatch();
void detectProfileMismatch(const Function &F, const FunctionSamples &FS);
};
for (auto &F : M) {
if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
continue;
- FunctionSamples *FS = Reader.getSamplesFor(F);
+ FunctionSamples *FS = nullptr;
+ if (FlattenProfileForMatching)
+ FS = getFlattenedSamplesFor(F);
+ else
+ FS = Reader.getSamplesFor(F);
if (!FS)
continue;
detectProfileMismatch(F, *FS);
--- /dev/null
+[main]:30:0
+ 0: 0
+ 1.1: 0
+ 3: 10 matched:10
+ 4: 10
+ 5: 10 bar_mismatch:10
+ 7: 5 foo:5
+ 8: 0
+[main:7 @ foo]:15:5
+ 1: 5
+ 2: 5
+ 3: 5 inlinee_mismatch:5
+[bar]:10:10
+ 1: 10
+[matched]:10:10
+ 1: 10
+[main:7 @ foo:3 @ inlinee_mismatch]:5:5
+ 1: 5
4: 10
5: 10 bar_mismatch:10
8: 0
- 7: foo:10
+ 7: foo:15
1: 5
2: 5
+ 3: inlinee_mismatch:5
+ 1: 5
bar:10:10
1: 10
matched:10:10
; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/profile-context-tracker.prof -o %t.md5
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.md5 -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE
-; RUN: llvm-profdata merge --sample --text --gen-cs-nested-profile %S/Inputs/profile-context-tracker.prof -o %t.prof
+; RUN: llvm-profdata merge --sample --text --convert-sample-profile-layout=nest %S/Inputs/profile-context-tracker.prof -o %t.prof
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE
; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, so we get less inlining for given profile
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/csspgo-use-preinliner.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=0 -S 2>&1 | FileCheck %s --check-prefix=DEFAULT
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/csspgo-use-preinliner.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=1 -S 2>&1 | FileCheck %s --check-prefix=PREINLINE
-; RUN: llvm-profdata merge --sample --text --gen-cs-nested-profile -generate-merged-base-profiles=0 %S/Inputs/csspgo-use-preinliner.prof -o %t.prof
+; RUN: llvm-profdata merge --sample --text --convert-sample-profile-layout=nest -generate-merged-base-profiles=0 %S/Inputs/csspgo-use-preinliner.prof -o %t.prof
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=0 -S 2>&1 | FileCheck %s --check-prefix=DEFAULT
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=1 -S 2>&1 | FileCheck %s --check-prefix=PREINLINE
--- /dev/null
+; REQUIRES: x86_64-linux
+; RUN: opt < %S/profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=1 -S 2>%t -o %t.ll
+; RUN: FileCheck %s --input-file %t
+; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD
+
+; RUN: opt < %S/profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch-cs.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=1 -S 2>%t -o %t.ll
+; RUN: FileCheck %s --input-file %t
+; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD
+
+
+; CHECK: (3/4) of callsites' profile are invalid and (20/30) of samples are discarded due to callsite location mismatch.
+
+; CHECK-MD: ![[#]] = !{!"NumMismatchedCallsites", i64 3, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 20, !"TotalCallsiteSamples", i64 30}
; REQUIRES: x86_64-linux
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -S 2>%t -o %t.ll
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=0 -S 2>%t -o %t.ll
; RUN: FileCheck %s --input-file %t
; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD
; RUN: llc < %t.ll -filetype=obj -o %t.obj
--- /dev/null
+[baz]:150:10
+ 1: 10
+ 3: 20
+ 5: 20 foo:20
+[foo]:102:1
+ 1: 1
+ 3: 1
+[main]:91:1
+ 4: 1
+ 4.2: 1
+ 7: 1
+ 9: 3 bar:2 foo:1
+ 10: 3 baz:2 foo:1
+[main:10 @ foo]:2:1
+ 3: 1 bar:1
+ 4: 1
+[bar]:1:1
+ 1: 1
+[main:10 @ foo:3 @ bar]:1:1
+ 1: 1
--- /dev/null
+baz:160:10
+ 1: 10
+ 3: 20
+ 5: foo:30
+ 1: 20
+ 3: bar:10
+ 1: 10
+ !CFGChecksum: 4
+ !Attributes: 4
+ !CFGChecksum: 3
+ !Attributes: 3
+ !CFGChecksum: 1
+ !Attributes: 1
+main:110:1
+ 4: 1
+ 4.2: 1
+ 7: 1
+ 9: 3 bar:2 foo:1
+ 10: foo:2
+ 4: 1
+ 3: bar:1
+ 1: 1
+ !CFGChecksum: 4
+ !Attributes: 4
+ !CFGChecksum: 3
+ !Attributes: 3
+ 10: baz:20
+ 10: 1
+ 6: bar:3
+ 1: 2
+ 7: 1
+ !CFGChecksum: 4
+ !Attributes: 4
+ !CFGChecksum: 2
+ !Attributes: 2
+foo:102:1
+ 1: 1
+ 3: 1
+ !CFGChecksum: 3
+ !Attributes: 3
+bar:1:1
+ 1: 1
+ !CFGChecksum: 4
+ !Attributes: 4
-RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=0
+RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample-preinline.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=0
RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace
-RUN: llvm-profdata merge --sample --text -output=%t.probe.proftext %S/Inputs/cs-sample-preinline-probe.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=0
+RUN: llvm-profdata merge --sample --text -output=%t.probe.proftext %S/Inputs/cs-sample-preinline-probe.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=0
RUN: FileCheck %s < %t.probe.proftext --match-full-lines --strict-whitespace -check-prefix=PROBE
-RUN: llvm-profdata merge --sample --extbinary -output=%t.profbin %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=0
+RUN: llvm-profdata merge --sample --extbinary -output=%t.profbin %S/Inputs/cs-sample-preinline.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=0
RUN: llvm-profdata merge --sample --text -output=%t2.proftext %t.profbin
RUN: FileCheck %s < %t2.proftext --match-full-lines --strict-whitespace
RUN: llvm-profdata show --sample -show-sec-info-only %t.profbin | FileCheck %s -check-prefix=PREINLINE
-RUN: llvm-profdata merge --sample --text -output=%t3.proftext %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1
+RUN: llvm-profdata merge --sample --text -output=%t3.proftext %S/Inputs/cs-sample-preinline.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=1
RUN: FileCheck %s < %t3.proftext --match-full-lines --strict-whitespace -check-prefix=RECOUNT
-RUN: llvm-profdata merge --sample --extbinary -output=%t2.profbin %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1
+RUN: llvm-profdata merge --sample --extbinary -output=%t2.profbin %S/Inputs/cs-sample-preinline.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=1
RUN: llvm-profdata show -sample -detailed-summary %S/Inputs/cs-sample-preinline.proftext | FileCheck %s -check-prefix=SUMMARY
RUN: llvm-profdata show -sample -detailed-summary %t2.profbin | FileCheck %s -check-prefix=SUMMARY-NEST
RUN: llvm-profdata show -sample -detailed-summary %t3.proftext | FileCheck %s -check-prefix=SUMMARY-NEST
--- /dev/null
+; RUN: llvm-profdata merge --sample --convert-sample-profile-layout=flat --text %S/Inputs/sample-flatten-profile.proftext -o - | FileCheck %s --match-full-lines --strict-whitespace
+; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/sample-flatten-profile.proftext -o %t2 && llvm-profdata merge --sample --convert-sample-profile-layout=flat --text %t2 -o - | FileCheck %s --match-full-lines --strict-whitespace
+
+; RUN: llvm-profdata merge --sample --convert-sample-profile-layout=flat --text %S/Inputs/sample-flatten-profile-cs.proftext -o - | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=CHECK-CS
+; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/sample-flatten-profile-cs.proftext -o %t2 && llvm-profdata merge --sample --convert-sample-profile-layout=flat --text %t2 -o - | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=CHECK-CS
+
+; CHECK:baz:169:10
+; CHECK-NEXT: 1: 10
+; CHECK-NEXT: 3: 20
+; CHECK-NEXT: 5: 20 foo:20
+; CHECK-NEXT: 6: 2 bar:2
+; CHECK-NEXT: 10: 1
+; CHECK-NEXT: !CFGChecksum: 1
+; CHECK-NEXT: !Attributes: 1
+; CHECK-NEXT:foo:134:21
+; CHECK-NEXT: 1: 21
+; CHECK-NEXT: 3: 12 bar:11
+; CHECK-NEXT: 4: 1
+; CHECK-NEXT: !CFGChecksum: 3
+; CHECK-NEXT: !Attributes: 3
+; CHECK-NEXT:main:91:1
+; CHECK-NEXT: 4: 1
+; CHECK-NEXT: 4.2: 1
+; CHECK-NEXT: 7: 1
+; CHECK-NEXT: 9: 3 bar:2 foo:1
+; CHECK-NEXT: 10: 3 baz:2 foo:1
+; CHECK-NEXT: !CFGChecksum: 2
+; CHECK-NEXT: !Attributes: 2
+; CHECK-NEXT:bar:15:14
+; CHECK-NEXT: 1: 14
+; CHECK-NEXT: 7: 1
+; CHECK-NEXT: !CFGChecksum: 4
+; CHECK-NEXT: !Attributes: 4
+
+; CHECK-CS:baz:150:10
+; CHECK-CS-NEXT: 1: 10
+; CHECK-CS-NEXT: 3: 20
+; CHECK-CS-NEXT: 5: 20 foo:20
+; CHECK-CS-NEXT:foo:104:2
+; CHECK-CS-NEXT: 1: 1
+; CHECK-CS-NEXT: 3: 2 bar:1
+; CHECK-CS-NEXT: 4: 1
+; CHECK-CS-NEXT:main:91:1
+; CHECK-CS-NEXT: 4: 1
+; CHECK-CS-NEXT: 4.2: 1
+; CHECK-CS-NEXT: 7: 1
+; CHECK-CS-NEXT: 9: 3 bar:2 foo:1
+; CHECK-CS-NEXT: 10: 3 baz:2 foo:1
+; CHECK-CS-NEXT:bar:2:2
+; CHECK-CS-NEXT: 1: 2
mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
StringRef OutputFilename, ProfileFormat OutputFormat,
StringRef ProfileSymbolListFile, bool CompressAllSections,
- bool UseMD5, bool GenPartialProfile, bool GenCSNestedProfile,
+ bool UseMD5, bool GenPartialProfile,
+ SampleProfileLayout ProfileLayout,
bool SampleMergeColdContext, bool SampleTrimColdContext,
bool SampleColdContextFrameDepth, FailureMode FailMode,
bool DropProfileSymbolList, size_t OutputSizeLimit) {
SampleMergeColdContext, SampleColdContextFrameDepth, false);
}
- if (ProfileIsCS && GenCSNestedProfile) {
- CSProfileConverter CSConverter(ProfileMap);
- CSConverter.convertProfiles();
+ if (ProfileLayout == llvm::sampleprof::SPL_Flat) {
+ ProfileConverter::flattenProfile(ProfileMap, FunctionSamples::ProfileIsCS);
+ ProfileIsCS = FunctionSamples::ProfileIsCS = false;
+ } else if (ProfileIsCS && ProfileLayout == llvm::sampleprof::SPL_Nest) {
+ ProfileConverter CSConverter(ProfileMap);
+ CSConverter.convertCSProfiles();
ProfileIsCS = FunctionSamples::ProfileIsCS = false;
}
"instr-prof-cold-threshold", cl::init(0), cl::Hidden,
cl::desc("User specified cold threshold for instr profile which will "
"override the cold threshold got from profile summary. "));
- cl::opt<bool> GenCSNestedProfile(
- "gen-cs-nested-profile", cl::Hidden, cl::init(false),
- cl::desc("Generate nested function profiles for CSSPGO"));
+ cl::opt<SampleProfileLayout> ProfileLayout(
+ "convert-sample-profile-layout",
+ cl::desc("Convert the generated profile to a profile with a new layout"),
+ cl::init(SPL_None),
+ cl::values(
+ clEnumValN(SPL_Nest, "nest",
+ "Nested profile, the input should be CS flat profile"),
+ clEnumValN(SPL_Flat, "flat",
+ "Profile with nested inlinee flatten out")));
cl::opt<std::string> DebugInfoFilename(
"debug-info", cl::init(""),
cl::desc("Use the provided debug info to correlate the raw profile."));
OutputFilename, OutputFormat, OutputSparse, NumThreads,
FailureMode, ProfiledBinary);
else
- mergeSampleProfile(
- WeightedInputs, Remapper.get(), OutputFilename, OutputFormat,
- ProfileSymbolListFile, CompressAllSections, UseMD5, GenPartialProfile,
- GenCSNestedProfile, SampleMergeColdContext, SampleTrimColdContext,
- SampleColdContextFrameDepth, FailureMode, DropProfileSymbolList,
- OutputSizeLimit);
+ mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename,
+ OutputFormat, ProfileSymbolListFile, CompressAllSections,
+ UseMD5, GenPartialProfile, ProfileLayout,
+ SampleMergeColdContext, SampleTrimColdContext,
+ SampleColdContextFrameDepth, FailureMode,
+ DropProfileSymbolList, OutputSizeLimit);
return 0;
}
calculateAndShowDensity(ContextLessProfiles);
if (GenCSNestedProfile) {
- CSProfileConverter CSConverter(ProfileMap);
- CSConverter.convertProfiles();
+ ProfileConverter CSConverter(ProfileMap);
+ CSConverter.convertCSProfiles();
FunctionSamples::ProfileIsCS = false;
}
}