The change adds support for triming and merging cold context when mergine CSSPGO profiles using llvm-profdata. This is similar to the context profile trimming in llvm-profgen, however the flexibility to trim cold context after profile is generated can be useful.
Differential Revision: https://reviews.llvm.org/D100528
public:
ProfileSummaryInfo(const Module &M) : M(&M) { refresh(); }
- ProfileSummaryInfo(std::unique_ptr<ProfileSummary> PSI)
- : M(nullptr), Summary(std::move(PSI)) {}
-
ProfileSummaryInfo(ProfileSummaryInfo &&Arg) = default;
/// If no summary is present, attempt to refresh.
/// Find the summary entry for a desired percentile of counts.
static const ProfileSummaryEntry &
getEntryForPercentile(SummaryEntryVector &DS, uint64_t Percentile);
+ static uint64_t getHotCountThreshold(SummaryEntryVector &DS);
+ static uint64_t getColdCountThreshold(SummaryEntryVector &DS);
};
class InstrProfSummaryBuilder final : public ProfileSummaryBuilder {
#include "llvm/Support/CommandLine.h"
using namespace llvm;
-// The following two parameters determine the threshold for a count to be
-// considered hot/cold. These two parameters are percentile values (multiplied
-// by 10000). If the counts are sorted in descending order, the minimum count to
-// reach ProfileSummaryCutoffHot gives the threshold to determine a hot count.
-// Similarly, the minimum count to reach ProfileSummaryCutoffCold gives the
-// threshold for determining cold count (everything <= this threshold is
-// considered cold).
-
-static cl::opt<int> ProfileSummaryCutoffHot(
- "profile-summary-cutoff-hot", cl::Hidden, cl::init(990000), cl::ZeroOrMore,
- cl::desc("A count is hot if it exceeds the minimum count to"
- " reach this percentile of total counts."));
-
-static cl::opt<int> ProfileSummaryCutoffCold(
- "profile-summary-cutoff-cold", cl::Hidden, cl::init(999999), cl::ZeroOrMore,
- cl::desc("A count is cold if it is below the minimum count"
- " to reach this percentile of total counts."));
-
-static cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
- "profile-summary-huge-working-set-size-threshold", cl::Hidden,
- cl::init(15000), cl::ZeroOrMore,
- cl::desc("The code working set size is considered huge if the number of"
- " blocks required to reach the -profile-summary-cutoff-hot"
- " percentile exceeds this count."));
-
-static cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold(
- "profile-summary-large-working-set-size-threshold", cl::Hidden,
- cl::init(12500), cl::ZeroOrMore,
- cl::desc("The code working set size is considered large if the number of"
- " blocks required to reach the -profile-summary-cutoff-hot"
- " percentile exceeds this count."));
-
-// The next two options override the counts derived from summary computation and
-// are useful for debugging purposes.
-static cl::opt<int> ProfileSummaryHotCount(
- "profile-summary-hot-count", cl::ReallyHidden, cl::ZeroOrMore,
- cl::desc("A fixed hot count that overrides the count derived from"
- " profile-summary-cutoff-hot"));
-
-static cl::opt<int> ProfileSummaryColdCount(
- "profile-summary-cold-count", cl::ReallyHidden, cl::ZeroOrMore,
- cl::desc("A fixed cold count that overrides the count derived from"
- " profile-summary-cutoff-cold"));
+// Knobs for profile summary based thresholds.
+extern cl::opt<int> ProfileSummaryCutoffHot;
+extern cl::opt<int> ProfileSummaryCutoffCold;
+extern cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold;
+extern cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold;
+extern cl::opt<int> ProfileSummaryHotCount;
+extern cl::opt<int> ProfileSummaryColdCount;
static cl::opt<bool> PartialProfile(
"partial-profile", cl::Hidden, cl::init(false),
auto &DetailedSummary = Summary->getDetailedSummary();
auto &HotEntry = ProfileSummaryBuilder::getEntryForPercentile(
DetailedSummary, ProfileSummaryCutoffHot);
- HotCountThreshold = HotEntry.MinCount;
- if (ProfileSummaryHotCount.getNumOccurrences() > 0)
- HotCountThreshold = ProfileSummaryHotCount;
- auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile(
- DetailedSummary, ProfileSummaryCutoffCold);
- ColdCountThreshold = ColdEntry.MinCount;
- if (ProfileSummaryColdCount.getNumOccurrences() > 0)
- ColdCountThreshold = ProfileSummaryColdCount;
+ HotCountThreshold =
+ ProfileSummaryBuilder::getHotCountThreshold(DetailedSummary);
+ ColdCountThreshold =
+ ProfileSummaryBuilder::getColdCountThreshold(DetailedSummary);
assert(ColdCountThreshold <= HotCountThreshold &&
"Cold count threshold cannot exceed hot count threshold!");
if (!hasPartialSampleProfile() || !ScalePartialSampleProfileWorkingSetSize) {
"profile-summary-contextless", cl::Hidden, cl::init(false), cl::ZeroOrMore,
cl::desc("Merge context profiles before calculating thresholds."));
+// The following two parameters determine the threshold for a count to be
+// considered hot/cold. These two parameters are percentile values (multiplied
+// by 10000). If the counts are sorted in descending order, the minimum count to
+// reach ProfileSummaryCutoffHot gives the threshold to determine a hot count.
+// Similarly, the minimum count to reach ProfileSummaryCutoffCold gives the
+// threshold for determining cold count (everything <= this threshold is
+// considered cold).
+cl::opt<int> ProfileSummaryCutoffHot(
+ "profile-summary-cutoff-hot", cl::Hidden, cl::init(990000), cl::ZeroOrMore,
+ cl::desc("A count is hot if it exceeds the minimum count to"
+ " reach this percentile of total counts."));
+
+cl::opt<int> ProfileSummaryCutoffCold(
+ "profile-summary-cutoff-cold", cl::Hidden, cl::init(999999), cl::ZeroOrMore,
+ cl::desc("A count is cold if it is below the minimum count"
+ " to reach this percentile of total counts."));
+
+cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
+ "profile-summary-huge-working-set-size-threshold", cl::Hidden,
+ cl::init(15000), cl::ZeroOrMore,
+ cl::desc("The code working set size is considered huge if the number of"
+ " blocks required to reach the -profile-summary-cutoff-hot"
+ " percentile exceeds this count."));
+
+cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold(
+ "profile-summary-large-working-set-size-threshold", cl::Hidden,
+ cl::init(12500), cl::ZeroOrMore,
+ cl::desc("The code working set size is considered large if the number of"
+ " blocks required to reach the -profile-summary-cutoff-hot"
+ " percentile exceeds this count."));
+
+// The next two options override the counts derived from summary computation and
+// are useful for debugging purposes.
+cl::opt<int> ProfileSummaryHotCount(
+ "profile-summary-hot-count", cl::ReallyHidden, cl::ZeroOrMore,
+ cl::desc("A fixed hot count that overrides the count derived from"
+ " profile-summary-cutoff-hot"));
+
+cl::opt<int> ProfileSummaryColdCount(
+ "profile-summary-cold-count", cl::ReallyHidden, cl::ZeroOrMore,
+ cl::desc("A fixed cold count that overrides the count derived from"
+ " profile-summary-cutoff-cold"));
+
// A set of cutoff values. Each value, when divided by ProfileSummary::Scale
// (which is 1000000) is a desired percentile of total counts.
static const uint32_t DefaultCutoffsData[] = {
}
}
+uint64_t ProfileSummaryBuilder::getHotCountThreshold(SummaryEntryVector &DS) {
+ auto &HotEntry =
+ ProfileSummaryBuilder::getEntryForPercentile(DS, ProfileSummaryCutoffHot);
+ uint64_t HotCountThreshold = HotEntry.MinCount;
+ if (ProfileSummaryHotCount.getNumOccurrences() > 0)
+ HotCountThreshold = ProfileSummaryHotCount;
+ return HotCountThreshold;
+}
+
+uint64_t ProfileSummaryBuilder::getColdCountThreshold(SummaryEntryVector &DS) {
+ auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile(
+ DS, ProfileSummaryCutoffCold);
+ uint64_t ColdCountThreshold = ColdEntry.MinCount;
+ if (ProfileSummaryColdCount.getNumOccurrences() > 0)
+ ColdCountThreshold = ProfileSummaryColdCount;
+ return ColdCountThreshold;
+}
+
std::unique_ptr<ProfileSummary> SampleProfileSummaryBuilder::getSummary() {
computeDetailedSummary();
return std::make_unique<ProfileSummary>(
--- /dev/null
+RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample.proftext
+RUN: diff -b %t.proftext %S/Inputs/cs-sample.proftext
+
+RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample.proftext -sample-merge-cold-context -profile-summary-cold-count=500
+RUN: FileCheck %s --input-file %t.proftext --check-prefixes=CHECK-TRIM,CHECK-MERGE
+RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample.proftext -sample-merge-cold-context -sample-trim-cold-context -profile-summary-cold-count=500
+RUN: FileCheck %s --input-file %t.proftext --check-prefixes=CHECK-TRIM,CHECK-END
+RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample.proftext -sample-merge-cold-context -sample-trim-cold-context -profile-summary-cutoff-cold=990000
+RUN: FileCheck %s --input-file %t.proftext --check-prefixes=CHECK-TRIM,CHECK-END
+
+CHECK-TRIM: [main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]:1467299:11
+CHECK-TRIM-NEXT: 0: 6
+CHECK-TRIM-NEXT: 1: 6
+CHECK-TRIM-NEXT: 3: 287884
+CHECK-TRIM-NEXT: 4: 287864 _Z3fibi:315608
+CHECK-TRIM-NEXT: 15: 23
+CHECK-TRIM-NEXT: !Attributes: 0
+CHECK-TRIM-NEXT: [main:3.1 @ _Z5funcBi:1 @ _Z8funcLeafi]:500853:20
+CHECK-TRIM-NEXT: 0: 15
+CHECK-TRIM-NEXT: 1: 15
+CHECK-TRIM-NEXT: 3: 74946
+CHECK-TRIM-NEXT: 4: 74941 _Z3fibi:82359
+CHECK-TRIM-NEXT: 10: 23324
+CHECK-TRIM-NEXT: 11: 23327 _Z3fibi:25228
+CHECK-TRIM-NEXT: 15: 11
+CHECK-TRIM-NEXT: !Attributes: 1
+CHECK-END-NOT: [
+CHECK-MERGE: [_Z5funcBi]:360:32
+CHECK-MERGE-NEXT: 0: 32
+CHECK-MERGE-NEXT: 1: 32 _Z8funcLeafi:20
+CHECK-MERGE-NEXT: 3: 12
+CHECK-MERGE-NEXT: !Attributes: 0
+CHECK-MERGE-NEXT:[main]:308:12
+CHECK-MERGE-NEXT: 2: 24
+CHECK-MERGE-NEXT: 3: 28 _Z5funcAi:18
+CHECK-MERGE-NEXT: 3.1: 28 _Z5funcBi:30
+CHECK-MERGE-NEXT: !Attributes: 0
+CHECK-MERGE-NEXT:[_Z5funcAi]:99:11
+CHECK-MERGE-NEXT: 0: 10
+CHECK-MERGE-NEXT: 1: 10 _Z8funcLeafi:11
+CHECK-MERGE-NEXT: 3: 24
+CHECK-MERGE-NEXT: !Attributes: 0
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-PREINL
; Test preinliner threshold that prevents all possible inlining and merges everything into base profile.
-; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --csspgo-preinliner=1 -sample-profile-hot-inline-threshold=0
+; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --csspgo-preinliner=1 -sample-profile-cold-inline-threshold=0
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-NO-PREINL
; CHECK-DEFAULT: [main:1 @ foo]:309:0
mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
StringRef OutputFilename, ProfileFormat OutputFormat,
StringRef ProfileSymbolListFile, bool CompressAllSections,
- bool UseMD5, bool GenPartialProfile, FailureMode FailMode) {
+ bool UseMD5, bool GenPartialProfile,
+ bool SampleMergeColdContext, bool SampleTrimColdContext,
+ FailureMode FailMode) {
using namespace sampleprof;
StringMap<FunctionSamples> ProfileMap;
SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
if (ReaderList)
WriterList.merge(*ReaderList);
}
+
+ if (ProfileIsCS && (SampleMergeColdContext || SampleTrimColdContext)) {
+ // Use threshold calculated from profile summary unless specified.
+ SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
+ auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
+ uint64_t SampleProfColdThreshold =
+ ProfileSummaryBuilder::getColdCountThreshold(
+ (Summary->getDetailedSummary()));
+
+ // Trim and merge cold context profile using cold threshold above;
+ SampleContextTrimmer(ProfileMap)
+ .trimAndMergeColdContextProfiles(SampleProfColdThreshold,
+ SampleTrimColdContext,
+ SampleMergeColdContext);
+ }
+
auto WriterOrErr =
SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]);
if (std::error_code EC = WriterOrErr.getError())
"use-md5", cl::init(false), cl::Hidden,
cl::desc("Choose to use MD5 to represent string in name table (only "
"meaningful for -extbinary)"));
+ cl::opt<bool> SampleMergeColdContext(
+ "sample-merge-cold-context", cl::init(false), cl::Hidden,
+ cl::desc(
+ "Merge context sample profiles whose count is below cold threshold"));
+ cl::opt<bool> SampleTrimColdContext(
+ "sample-trim-cold-context", cl::init(false), cl::Hidden,
+ cl::desc(
+ "Trim context sample profiles whose count is below cold threshold"));
cl::opt<bool> GenPartialProfile(
"gen-partial-profile", cl::init(false), cl::Hidden,
cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
else
mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename,
OutputFormat, ProfileSymbolListFile, CompressAllSections,
- UseMD5, GenPartialProfile, FailureMode);
+ UseMD5, GenPartialProfile, SampleMergeColdContext,
+ SampleTrimColdContext, FailureMode);
return 0;
}
// Run global pre-inliner to adjust/merge context profile based on estimated
// inline decisions.
- CSPreInliner(ProfileMap, PSI->getHotCountThreshold(),
- PSI->getColdCountThreshold())
- .run();
+ CSPreInliner(ProfileMap, HotCountThreshold, ColdCountThreshold).run();
// Trim and merge cold context profile using cold threshold above;
SampleContextTrimmer(ProfileMap)
.trimAndMergeColdContextProfiles(
- CSProfColdThreshold, CSProfTrimColdContext, CSProfMergeColdContext);
+ ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext);
}
void CSProfileGenerator::computeSummaryAndThreshold() {
SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
- PSI.reset(new ProfileSummaryInfo(std::move(Summary)));
+ HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
+ (Summary->getDetailedSummary()));
+ ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
+ (Summary->getDetailedSummary()));
// Use threshold calculated from profile summary unless specified.
- if (!CSProfColdThreshold.getNumOccurrences()) {
- CSProfColdThreshold = PSI->getColdCountThreshold();
+ if (CSProfColdThreshold.getNumOccurrences()) {
+ ColdCountThreshold = CSProfColdThreshold;
}
}
#include "ErrorHandling.h"
#include "PerfReader.h"
#include "ProfiledBinary.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/ProfileData/SampleProfWriter.h"
#include <memory>
#include <unordered_set>
void write(std::unique_ptr<SampleProfileWriter> Writer,
StringMap<FunctionSamples> &ProfileMap) override;
- // Profile summary to answer isHotCount and isColdCount queries.
- std::unique_ptr<ProfileSummaryInfo> PSI;
+ // Thresholds from profile summary to answer isHotCount/isColdCount queries.
+ uint64_t HotCountThreshold;
+ uint64_t ColdCountThreshold;
// String table owning context strings created from profile generation.
std::unordered_set<std::string> ContextStrings;