coverage for the optimized target. This option can only be used with
sample-based profile in extbinary format.
+.. option:: -supplement-instr-with-sample=path_to_sample_profile
+
+ Supplement an instrumentation profile with sample profile. The sample profile
+ is the input of the flag. Output will be in instrumentation format (only works
+ with -instr).
+
+.. option:: -zero-counter-threshold=threshold_float_number
+
+ For the function which is cold in instr profile but hot in sample profile, if
+ the ratio of the number of zero counters divided by the the total number of
+ counters is above the threshold, the profile of the function will be regarded
+ as being harmful for performance and will be dropped.
+
+.. option:: -instr-prof-cold-threshold=threshold_int_number
+
+ User specified cold threshold for instr profile which will override the cold
+ threshold got from profile summary.
+
+.. option:: -suppl-min-size-threshold=threshold_int_number
+
+ If the size of a function is smaller than the threshold, assume it can be
+ inlined by PGO early inliner and it will not be adjusted based on sample
+ profile.
+
EXAMPLES
^^^^^^^^
Basic Usage
/// Optionally scale merged counts by \p Weight.
void merge(InstrProfValueSiteRecord &Input, uint64_t Weight,
function_ref<void(instrprof_error)> Warn);
- /// Scale up value profile data counts.
- void scale(uint64_t Weight, function_ref<void(instrprof_error)> Warn);
+ /// Scale up value profile data counts by N (Numerator) / D (Denominator).
+ void scale(uint64_t N, uint64_t D, function_ref<void(instrprof_error)> Warn);
/// Compute the overlap b/w this record and Input record.
void overlap(InstrProfValueSiteRecord &Input, uint32_t ValueKind,
function_ref<void(instrprof_error)> Warn);
/// Scale up profile counts (including value profile data) by
- /// \p Weight.
- void scale(uint64_t Weight, function_ref<void(instrprof_error)> Warn);
+ /// a factor of (N / D).
+ void scale(uint64_t N, uint64_t D, function_ref<void(instrprof_error)> Warn);
/// Sort value profile data (per site) by count.
void sortValueData() {
uint64_t Weight,
function_ref<void(instrprof_error)> Warn);
- // Scale up value profile data count.
- void scaleValueProfData(uint32_t ValueKind, uint64_t Weight,
+ // Scale up value profile data count by N (Numerator) / D (Denominator).
+ void scaleValueProfData(uint32_t ValueKind, uint64_t N, uint64_t D,
function_ref<void(instrprof_error)> Warn);
};
InstrProfWriter(bool Sparse = false, bool InstrEntryBBEnabled = false);
~InstrProfWriter();
+ StringMap<ProfilingData> &getProfileData() { return FunctionData; }
+
/// Add function counts for the given function. If there are already counts
/// for this function and the hash and number of counts match, each counter is
/// summed. Optionally scale counts by \p Weight.
}
}
-void InstrProfValueSiteRecord::scale(uint64_t Weight,
+void InstrProfValueSiteRecord::scale(uint64_t N, uint64_t D,
function_ref<void(instrprof_error)> Warn) {
for (auto I = ValueData.begin(), IE = ValueData.end(); I != IE; ++I) {
bool Overflowed;
- I->Count = SaturatingMultiply(I->Count, Weight, &Overflowed);
+ I->Count = SaturatingMultiply(I->Count, N, &Overflowed) / D;
if (Overflowed)
Warn(instrprof_error::counter_overflow);
}
}
void InstrProfRecord::scaleValueProfData(
- uint32_t ValueKind, uint64_t Weight,
+ uint32_t ValueKind, uint64_t N, uint64_t D,
function_ref<void(instrprof_error)> Warn) {
for (auto &R : getValueSitesForKind(ValueKind))
- R.scale(Weight, Warn);
+ R.scale(N, D, Warn);
}
-void InstrProfRecord::scale(uint64_t Weight,
+void InstrProfRecord::scale(uint64_t N, uint64_t D,
function_ref<void(instrprof_error)> Warn) {
+ assert(D != 0 && "D cannot be 0");
for (auto &Count : this->Counts) {
bool Overflowed;
- Count = SaturatingMultiply(Count, Weight, &Overflowed);
+ Count = SaturatingMultiply(Count, N, &Overflowed) / D;
if (Overflowed)
Warn(instrprof_error::counter_overflow);
}
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
- scaleValueProfData(Kind, Weight, Warn);
+ scaleValueProfData(Kind, N, D, Warn);
}
// Map indirect call target name hash to name string.
// We've never seen a function with this name and hash, add it.
Dest = std::move(I);
if (Weight > 1)
- Dest.scale(Weight, MapWarn);
+ Dest.scale(Weight, 1, MapWarn);
} else {
// We're updating a function we've seen before.
Dest.merge(I, Weight, MapWarn);
}
void InstrProfSummaryBuilder::addEntryCount(uint64_t Count) {
- addCount(Count);
NumFunctions++;
+
+ // Skip invalid count.
+ if (Count == (uint64_t)-1)
+ return;
+
+ addCount(Count);
if (Count > MaxFunctionCount)
MaxFunctionCount = Count;
}
void InstrProfSummaryBuilder::addInternalCount(uint64_t Count) {
+ // Skip invalid count.
+ if (Count == (uint64_t)-1)
+ return;
+
addCount(Count);
if (Count > MaxInternalBlockCount)
MaxInternalBlockCount = Count;
FreqAttr(FFA_Normal), IsCS(IsCS) {}
// Read counts for the instrumented BB from profile.
- bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros);
+ bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
+ bool &AllMinusOnes);
// Populate the counts for all BBs.
void populateCounters();
// Read the profile from ProfileFileName and assign the value to the
// instrumented BB and the edges. This function also updates ProgramMaxCount.
// Return true if the profile are successfully read, and false on errors.
-bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros) {
+bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
+ bool &AllMinusOnes) {
auto &Ctx = M->getContext();
Expected<InstrProfRecord> Result =
PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash);
IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
+ AllMinusOnes = (CountFromProfile.size() > 0);
uint64_t ValueSum = 0;
for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
ValueSum += CountFromProfile[I];
+ if (CountFromProfile[I] != (uint64_t)-1)
+ AllMinusOnes = false;
}
AllZeros = (ValueSum == 0);
SplitIndirectBrCriticalEdges(F, BPI, BFI);
PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
InstrumentFuncEntry);
+ // When AllMinusOnes is true, it means the profile for the function
+ // is unrepresentative and this function is actually hot. Set the
+ // entry count of the function to be multiple times of hot threshold
+ // and drop all its internal counters.
+ bool AllMinusOnes = false;
bool AllZeros = false;
- if (!Func.readCounters(PGOReader.get(), AllZeros))
+ if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes))
continue;
if (AllZeros) {
F.setEntryCount(ProfileCount(0, Function::PCT_Real));
ColdFunctions.push_back(&F);
continue;
}
+ const unsigned MultiplyFactor = 3;
+ if (AllMinusOnes) {
+ uint64_t HotThreshold = PSI->getHotCountThreshold();
+ if (HotThreshold)
+ F.setEntryCount(
+ ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real));
+ HotFunctions.push_back(&F);
+ continue;
+ }
Func.populateCounters();
Func.setBranchWeights();
Func.annotateValueSites();
--- /dev/null
+test_simple_for:4000:4000
+ 1: 1000
+ 2: 1000
+ 3: 1000
+ 4: 1000
+
+moo:10:10
+ 1: 2
+ 2: 2
+ 3: 2
+ 4: 2
+ 5: 2
--- /dev/null
+# :ir is the flag to indicate this is IR level profile.
+:ir
+test_simple_for
+34137660316
+2
+0
+0
+
+foo
+2582734
+4
+1000
+270
+180
+760
--- /dev/null
+; Supplement instr profile suppl-profile.proftext with sample profile
+; sample-profile.proftext.
+; RUN: llvm-profdata merge -instr -suppl-min-size-threshold=0 \
+; RUN: -supplement-instr-with-sample=%p/Inputs/sample-profile.proftext \
+; RUN: %S/Inputs/suppl-profile.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s
+; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Check test_simple_for has a non-zero entry count and doesn't have any other
+; prof metadata.
+; CHECK: @test_simple_for(i32 %n) {{.*}} !prof ![[ENTRY_COUNT:[0-9]+]]
+; CHECK-NOT: !prof !
+; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 540}
+define i32 @test_simple_for(i32 %n) {
+entry:
+ br label %for.cond
+
+for.cond:
+ %i = phi i32 [ 0, %entry ], [ %inc1, %for.inc ]
+ %sum = phi i32 [ 1, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp slt i32 %i, %n
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ %inc = add nsw i32 %sum, 1
+ br label %for.inc
+
+for.inc:
+ %inc1 = add nsw i32 %i, 1
+ br label %for.cond
+
+for.end:
+ ret i32 %sum
+}
--- /dev/null
+:ir
+foo
+7
+5
+12
+13
+0
+0
+0
+
+goo
+5
+3
+0
+0
+0
+
+moo
+9
+4
+3000
+1000
+2000
+500
+
--- /dev/null
+foo:2000:2000
+ 1: 2000
+goo:3000:1500
+ 1: 1200
+ 2: 800
+ 3: 1000
+moo:1000:1000
+ 1: 1000
+hoo:50:1
+ 1: 1
+ 2: 2
+ 3: 3
+ 4: 4
+ 5: 5
+ 6: 6
+ 7: 7
+ 8: 8
1- Merge profile having maximum counts with itself and verify overflow detected and saturation occurred
RUN: llvm-profdata merge -instr %p/Inputs/overflow-instr.proftext %p/Inputs/overflow-instr.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=MERGE_OVERFLOW
-RUN: llvm-profdata show -instr %t.out | FileCheck %s --check-prefix=SHOW_OVERFLOW
+RUN: llvm-profdata show -instr -all-functions -counts %t.out | FileCheck %s --check-prefix=SHOW_OVERFLOW
MERGE_OVERFLOW: {{.*}}: overflow: Counter overflow
-SHOW_OVERFLOW: Total functions: 1
-SHOW_OVERFLOW-NEXT: Maximum function count: 18446744073709551615
-SHOW_OVERFLOW-NEXT: Maximum internal block count: 18446744073709551615
+SHOW_OVERFLOW: Function count: 18446744073709551615
+SHOW_OVERFLOW-NEXT: Block counts: [18446744073709551615, 18446744073709551615]
2- Merge profile having maximum counts by itself and verify no overflow
RUN: llvm-profdata merge -instr %p/Inputs/overflow-instr.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=MERGE_NO_OVERFLOW -allow-empty
-RUN: llvm-profdata show -instr %t.out | FileCheck %s --check-prefix=SHOW_NO_OVERFLOW
+RUN: llvm-profdata show -instr -all-functions -counts %t.out | FileCheck %s --check-prefix=SHOW_NO_OVERFLOW
MERGE_NO_OVERFLOW-NOT: {{.*}}: overflow: Counter overflow
-SHOW_NO_OVERFLOW: Total functions: 1
-SHOW_NO_OVERFLOW-NEXT: Maximum function count: 18446744073709551615
-SHOW_NO_OVERFLOW-NEXT: Maximum internal block count: 18446744073709551615
+SHOW_NO_OVERFLOW: Function count: 18446744073709551615
+SHOW_NO_OVERFLOW-NEXT: Block counts: [9223372036854775808, 18446744073709551615]
--- /dev/null
+Some basic tests for supplementing instrumentation profile with sample profile.
+
+Test all of goo's counters will be set to -1.
+RUN: llvm-profdata merge \
+RUN: -supplement-instr-with-sample=%p/Inputs/mix_sample.proftext \
+RUN: -suppl-min-size-threshold=0 %p/Inputs/mix_instr.proftext -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=MIX1
+
+MIX1: foo:
+MIX1-NEXT: Hash: 0x0000000000000007
+MIX1-NEXT: Counters: 5
+MIX1-NEXT: Block counts: [12, 13, 0, 0, 0]
+MIX1: goo:
+MIX1-NEXT: Hash: 0x0000000000000005
+MIX1-NEXT: Counters: 3
+MIX1-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615]
+MIX1: moo:
+MIX1-NEXT: Hash: 0x0000000000000009
+MIX1-NEXT: Counters: 4
+MIX1-NEXT: Block counts: [3000, 1000, 2000, 500]
+
+Test when the zero counter ratio of foo is higher than zero-counter-threshold.
+RUN: llvm-profdata merge \
+RUN: -supplement-instr-with-sample=%p/Inputs/mix_sample.proftext \
+RUN: -suppl-min-size-threshold=0 -zero-counter-threshold=0.5 \
+RUN: -instr-prof-cold-threshold=30 %p/Inputs/mix_instr.proftext -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=MIX2
+
+MIX2: foo:
+MIX2-NEXT: Hash: 0x0000000000000007
+MIX2-NEXT: Counters: 5
+MIX2-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615, 18446744073709551615, 18446744073709551615]
+MIX2: goo:
+MIX2-NEXT: Hash: 0x0000000000000005
+MIX2-NEXT: Counters: 3
+MIX2-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615]
+MIX2: moo:
+MIX2-NEXT: Hash: 0x0000000000000009
+MIX2-NEXT: Counters: 4
+MIX2-NEXT: Block counts: [3000, 1000, 2000, 500]
+
+Test when the zero counter ratio of foo is lower than zero-counter-threshold.
+RUN: llvm-profdata merge \
+RUN: -supplement-instr-with-sample=%p/Inputs/mix_sample.proftext \
+RUN: -suppl-min-size-threshold=0 -zero-counter-threshold=0.7 \
+RUN: -instr-prof-cold-threshold=30 %p/Inputs/mix_instr.proftext -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=MIX3
+
+MIX3: foo:
+MIX3-NEXT: Hash: 0x0000000000000007
+MIX3-NEXT: Counters: 5
+MIX3-NEXT: Block counts: [1384, 1500, 0, 0, 0]
+MIX3: goo:
+MIX3-NEXT: Hash: 0x0000000000000005
+MIX3-NEXT: Counters: 3
+MIX3-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615]
+MIX3: moo:
+MIX3-NEXT: Hash: 0x0000000000000009
+MIX3-NEXT: Counters: 4
+MIX3-NEXT: Block counts: [3000, 1000, 2000, 500]
+
+Test foo's profile won't be adjusted because its size is smaller
+than suppl-min-size-threshold.
+RUN: llvm-profdata merge \
+RUN: -supplement-instr-with-sample=%p/Inputs/mix_sample.proftext \
+RUN: -suppl-min-size-threshold=2 -zero-counter-threshold=0.7 \
+RUN: -instr-prof-cold-threshold=30 %p/Inputs/mix_instr.proftext -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=MIX4
+
+MIX4: foo:
+MIX4-NEXT: Hash: 0x0000000000000007
+MIX4-NEXT: Counters: 5
+MIX4-NEXT: Block counts: [12, 13, 0, 0, 0]
+MIX4: goo:
+MIX4-NEXT: Hash: 0x0000000000000005
+MIX4-NEXT: Counters: 3
+MIX4-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615]
+MIX4: moo:
+MIX4-NEXT: Hash: 0x0000000000000009
+MIX4-NEXT: Counters: 4
+MIX4-NEXT: Block counts: [3000, 1000, 2000, 500]
+
+Test profile summary won't be affected by -1 counter.
+RUN: llvm-profdata merge \
+RUN: -supplement-instr-with-sample=%p/Inputs/mix_sample.proftext \
+RUN: -suppl-min-size-threshold=0 %p/Inputs/mix_instr.proftext -o %t
+RUN: llvm-profdata show %t -detailed-summary | FileCheck %s --check-prefix=MIX5
+
+MIX5: Instrumentation level: IR
+MIX5-NEXT: Total functions: 3
+MIX5-NEXT: Maximum function count: 3000
+MIX5-NEXT: Maximum internal block count: 2000
+MIX5-NEXT: Total number of blocks: 9
+MIX5-NEXT: Total count: 6525
+MIX5-NEXT: Detailed summary:
+MIX5-NEXT: 3 blocks with count >= 1000 account for 80 percentage of the total counts.
+MIX5-NEXT: 3 blocks with count >= 1000 account for 90 percentage of the total counts.
+MIX5-NEXT: 4 blocks with count >= 500 account for 95 percentage of the total counts.
+MIX5-NEXT: 4 blocks with count >= 500 account for 99 percentage of the total counts.
+MIX5-NEXT: 6 blocks with count >= 12 account for 99.9 percentage of the total counts.
+MIX5-NEXT: 6 blocks with count >= 12 account for 99.99 percentage of the total counts.
+MIX5-NEXT: 6 blocks with count >= 12 account for 99.999 percentage of the total counts.
writeInstrProfile(OutputFilename, OutputFormat, Contexts[0]->Writer);
}
+/// The profile entry for a function in instrumentation profile.
+struct InstrProfileEntry {
+ uint64_t MaxCount = 0;
+ float ZeroCounterRatio = 0.0;
+ InstrProfRecord *ProfRecord;
+ InstrProfileEntry(InstrProfRecord *Record);
+ InstrProfileEntry() = default;
+};
+
+InstrProfileEntry::InstrProfileEntry(InstrProfRecord *Record) {
+ ProfRecord = Record;
+ uint64_t CntNum = Record->Counts.size();
+ uint64_t ZeroCntNum = 0;
+ for (size_t I = 0; I < CntNum; ++I) {
+ MaxCount = std::max(MaxCount, Record->Counts[I]);
+ ZeroCntNum += !Record->Counts[I];
+ }
+ ZeroCounterRatio = (float)ZeroCntNum / CntNum;
+}
+
+/// Either set all the counters in the instr profile entry \p IFE to -1
+/// in order to drop the profile or scale up the counters in \p IFP to
+/// be above hot threshold. We use the ratio of zero counters in the
+/// profile of a function to decide the profile is helpful or harmful
+/// for performance, and to choose whether to scale up or drop it.
+static void updateInstrProfileEntry(InstrProfileEntry &IFE,
+ uint64_t HotInstrThreshold,
+ float ZeroCounterThreshold) {
+ InstrProfRecord *ProfRecord = IFE.ProfRecord;
+ if (!IFE.MaxCount || IFE.ZeroCounterRatio > ZeroCounterThreshold) {
+ // If all or most of the counters of the function are zero, the
+ // profile is unaccountable and shuld be dropped. Reset all the
+ // counters to be -1 and PGO profile-use will drop the profile.
+ // All counters being -1 also implies that the function is hot so
+ // PGO profile-use will also set the entry count metadata to be
+ // above hot threshold.
+ for (size_t I = 0; I < ProfRecord->Counts.size(); ++I)
+ ProfRecord->Counts[I] = -1;
+ return;
+ }
+
+ // Scale up the MaxCount to be multiple times above hot threshold.
+ const unsigned MultiplyFactor = 3;
+ uint64_t Numerator = HotInstrThreshold * MultiplyFactor;
+ uint64_t Denominator = IFE.MaxCount;
+ ProfRecord->scale(Numerator, Denominator, [&](instrprof_error E) {
+ warn(toString(make_error<InstrProfError>(E)));
+ });
+}
+
+const uint64_t ColdPercentileIdx = 15;
+const uint64_t HotPercentileIdx = 11;
+
+/// Adjust the instr profile in \p WC based on the sample profile in
+/// \p Reader.
+static void
+adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
+ std::unique_ptr<sampleprof::SampleProfileReader> &Reader,
+ unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
+ unsigned InstrProfColdThreshold) {
+ // Function to its entry in instr profile.
+ StringMap<InstrProfileEntry> InstrProfileMap;
+ InstrProfSummaryBuilder IPBuilder(ProfileSummaryBuilder::DefaultCutoffs);
+ for (auto &PD : WC->Writer.getProfileData()) {
+ // Populate IPBuilder.
+ for (const auto &PDV : PD.getValue()) {
+ InstrProfRecord Record = PDV.second;
+ IPBuilder.addRecord(Record);
+ }
+
+ // If a function has multiple entries in instr profile, skip it.
+ if (PD.getValue().size() != 1)
+ continue;
+
+ // Initialize InstrProfileMap.
+ InstrProfRecord *R = &PD.getValue().begin()->second;
+ InstrProfileMap[PD.getKey()] = InstrProfileEntry(R);
+ }
+
+ ProfileSummary InstrPS = *IPBuilder.getSummary();
+ ProfileSummary SamplePS = Reader->getSummary();
+
+ // Compute cold thresholds for instr profile and sample profile.
+ uint64_t ColdSampleThreshold =
+ ProfileSummaryBuilder::getEntryForPercentile(
+ SamplePS.getDetailedSummary(),
+ ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
+ .MinCount;
+ uint64_t HotInstrThreshold =
+ ProfileSummaryBuilder::getEntryForPercentile(
+ InstrPS.getDetailedSummary(),
+ ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
+ .MinCount;
+ uint64_t ColdInstrThreshold =
+ InstrProfColdThreshold
+ ? InstrProfColdThreshold
+ : ProfileSummaryBuilder::getEntryForPercentile(
+ InstrPS.getDetailedSummary(),
+ ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
+ .MinCount;
+
+ // Find hot/warm functions in sample profile which is cold in instr profile
+ // and adjust the profiles of those functions in the instr profile.
+ for (const auto &PD : Reader->getProfiles()) {
+ StringRef FName = PD.getKey();
+ const sampleprof::FunctionSamples &FS = PD.getValue();
+ auto It = InstrProfileMap.find(FName);
+ if (FS.getHeadSamples() > ColdSampleThreshold &&
+ It != InstrProfileMap.end() &&
+ It->second.MaxCount <= ColdInstrThreshold &&
+ FS.getBodySamples().size() >= SupplMinSizeThreshold) {
+ updateInstrProfileEntry(It->second, HotInstrThreshold,
+ ZeroCounterThreshold);
+ }
+ }
+}
+
+/// The main function to supplement instr profile with sample profile.
+/// \Inputs contains the instr profile. \p SampleFilename specifies the
+/// sample profile. \p OutputFilename specifies the output profile name.
+/// \p OutputFormat specifies the output profile format. \p OutputSparse
+/// specifies whether to generate sparse profile. \p SupplMinSizeThreshold
+/// specifies the minimal size for the functions whose profile will be
+/// adjusted. \p ZeroCounterThreshold is the threshold to check whether
+/// a function contains too many zero counters and whether its profile
+/// should be dropped. \p InstrProfColdThreshold is the user specified
+/// cold threshold which will override the cold threshold got from the
+/// instr profile summary.
+static void supplementInstrProfile(
+ const WeightedFileVector &Inputs, StringRef SampleFilename,
+ StringRef OutputFilename, ProfileFormat OutputFormat, bool OutputSparse,
+ unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
+ unsigned InstrProfColdThreshold) {
+ if (OutputFilename.compare("-") == 0)
+ exitWithError("Cannot write indexed profdata format to stdout.");
+ if (Inputs.size() != 1)
+ exitWithError("Expect one input to be an instr profile.");
+ if (Inputs[0].Weight != 1)
+ exitWithError("Expect instr profile doesn't have weight.");
+
+ StringRef InstrFilename = Inputs[0].Filename;
+
+ // Read sample profile.
+ LLVMContext Context;
+ auto ReaderOrErr =
+ sampleprof::SampleProfileReader::create(SampleFilename.str(), Context);
+ if (std::error_code EC = ReaderOrErr.getError())
+ exitWithErrorCode(EC, SampleFilename);
+ auto Reader = std::move(ReaderOrErr.get());
+ if (std::error_code EC = Reader->read())
+ exitWithErrorCode(EC, SampleFilename);
+
+ // Read instr profile.
+ std::mutex ErrorLock;
+ SmallSet<instrprof_error, 4> WriterErrorCodes;
+ auto WC = std::make_unique<WriterContext>(OutputSparse, ErrorLock,
+ WriterErrorCodes);
+ loadInput(Inputs[0], nullptr, WC.get());
+ if (WC->Errors.size() > 0)
+ exitWithError(std::move(WC->Errors[0].first), InstrFilename);
+
+ adjustInstrProfile(WC, Reader, SupplMinSizeThreshold, ZeroCounterThreshold,
+ InstrProfColdThreshold);
+ writeInstrProfile(OutputFilename, OutputFormat, WC->Writer);
+}
+
/// Make a copy of the given function samples with all symbol names remapped
/// by the provided symbol remapper.
static sampleprof::FunctionSamples
cl::opt<bool> GenPartialProfile(
"gen-partial-profile", cl::init(false), cl::Hidden,
cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
+ cl::opt<std::string> SupplInstrWithSample(
+ "supplement-instr-with-sample", cl::init(""), cl::Hidden,
+ cl::desc("Supplement an instr profile with sample profile, to correct "
+ "the profile unrepresentativeness issue. The sample "
+ "profile is the input of the flag. Output will be in instr "
+ "format (The flag only works with -instr)"));
+ cl::opt<float> ZeroCounterThreshold(
+ "zero-counter-threshold", cl::init(0.7), cl::Hidden,
+ cl::desc("For the function which is cold in instr profile but hot in "
+ "sample profile, if the ratio of the number of zero counters "
+ "divided by the the total number of counters is above the "
+ "threshold, the profile of the function will be regarded as "
+ "being harmful for performance and will be dropped. "));
+ cl::opt<unsigned> SupplMinSizeThreshold(
+ "suppl-min-size-threshold", cl::init(10), cl::Hidden,
+ cl::desc("If the size of a function is smaller than the threshold, "
+ "assume it can be inlined by PGO early inliner and it won't "
+ "be adjusted based on sample profile. "));
+ cl::opt<unsigned> InstrProfColdThreshold(
+ "instr-prof-cold-threshold", cl::init(0), cl::Hidden,
+ cl::desc("User specified cold threshold for instr profile which will "
+ "override the cold threshold got from profile summary. "));
cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
if (!RemappingFile.empty())
Remapper = SymbolRemapper::create(RemappingFile);
+ if (!SupplInstrWithSample.empty()) {
+ if (ProfileKind != instr)
+ exitWithError(
+ "-supplement-instr-with-sample can only work with -instr. ");
+
+ supplementInstrProfile(WeightedInputs, SupplInstrWithSample, OutputFilename,
+ OutputFormat, OutputSparse, SupplMinSizeThreshold,
+ ZeroCounterThreshold, InstrProfColdThreshold);
+ return 0;
+ }
+
if (ProfileKind == instr)
mergeInstrProfile(WeightedInputs, Remapper.get(), OutputFilename,
OutputFormat, OutputSparse, NumThreads, FailureMode);
uint64_t FuncMax = 0;
uint64_t FuncSum = 0;
for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) {
+ if (Func.Counts[I] == (uint64_t)-1)
+ continue;
FuncMax = std::max(FuncMax, Func.Counts[I]);
FuncSum += Func.Counts[I];
}