/// Return the total number of samples collected inside the function.
uint64_t getTotalSamples() const { return TotalSamples; }
- /// Return the total number of branch samples that have the function as the
- /// branch target. This should be equivalent to the sample of the first
- /// instruction of the symbol. But as we directly get this info for raw
- /// profile without referring to potentially inaccurate debug info, this
+ /// For top-level functions, return the total number of branch samples that
+ /// have the function as the branch target (or 0 otherwise). This is the raw
+ /// data fetched from the profile. This should be equivalent to the sample of
+ /// the first instruction of the symbol. But as we directly get this info for
+ /// raw profile without referring to potentially inaccurate debug info, this
/// gives more accurate profile data and is preferred for standalone symbols.
uint64_t getHeadSamples() const { return TotalHeadSamples; }
- /// Return the sample count of the first instruction of the function.
+ /// Return an estimate of the sample count of the function entry basic block.
/// The function can be either a standalone symbol or an inlined function.
- uint64_t getEntrySamples() const {
+ /// For Context-Sensitive profiles, this will prefer returning the head
+ /// samples (i.e. getHeadSamples()), if non-zero. Otherwise it estimates from
+ /// the function body's samples or callsite samples.
+ uint64_t getHeadSamplesEstimate() const {
if (FunctionSamples::ProfileIsCS && getHeadSamples()) {
// For CS profile, if we already have more accurate head samples
// counted by branch sample from caller, use them as entry samples.
// An indirect callsite may be promoted to several inlined direct calls.
// We need to get the sum of them.
for (const auto &N_FS : CallsiteSamples.begin()->second)
- Count += N_FS.second.getEntrySamples();
+ Count += N_FS.second.getHeadSamplesEstimate();
}
// Return at least 1 if total sample is not 0.
return Count ? Count : TotalSamples > 0;
if (!CalleeSamples || !CallerSamples) {
Weight = 0;
} else {
- uint64_t CalleeEntryCount = CalleeSamples->getEntrySamples();
+ uint64_t CalleeEntryCount = CalleeSamples->getHeadSamplesEstimate();
uint64_t CallsiteCount = 0;
LineLocation Callsite = Callee->getCallSiteLoc();
if (auto CallTargets = CallerSamples->findCallTargetMapAt(Callsite)) {
for (const auto &InlinedSamples : CallsiteSamples.second) {
addProfiledFunction(InlinedSamples.first);
addProfiledCall(Samples.getFuncName(), InlinedSamples.first,
- InlinedSamples.second.getEntrySamples());
+ InlinedSamples.second.getHeadSamplesEstimate());
addProfiledCalls(InlinedSamples.second);
}
}
auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) {
assert(L && R && "Expect non-null FunctionSamples");
- if (L->getEntrySamples() != R->getEntrySamples())
- return L->getEntrySamples() > R->getEntrySamples();
+ if (L->getHeadSamplesEstimate() != R->getHeadSamplesEstimate())
+ return L->getHeadSamplesEstimate() > R->getHeadSamplesEstimate();
return FunctionSamples::getGUID(L->getName()) <
FunctionSamples::getGUID(R->getName());
};
// as that already includes both inlined callee and non-inlined ones..
Sum = 0;
for (const auto *const FS : CalleeSamples) {
- Sum += FS->getEntrySamples();
+ Sum += FS->getHeadSamplesEstimate();
R.push_back(FS);
}
llvm::sort(R, FSCompare);
if (M->empty())
return R;
for (const auto &NameFS : *M) {
- Sum += NameFS.second.getEntrySamples();
+ Sum += NameFS.second.getHeadSamplesEstimate();
R.push_back(&NameFS.second);
}
llvm::sort(R, FSCompare);
bool PreInline =
UsePreInlinerDecision &&
CalleeSample->getContext().hasAttribute(ContextShouldBeInlined);
- if (!PreInline && CalleeSample->getEntrySamples() < Threshold)
+ if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold)
continue;
StringRef Name = CalleeSample->getFuncName();
assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
"GUIDToFuncNameMap has to be populated");
AllCandidates.push_back(CB);
- if (FS->getEntrySamples() > 0 || FunctionSamples::ProfileIsCS)
+ if (FS->getHeadSamplesEstimate() > 0 ||
+ FunctionSamples::ProfileIsCS)
LocalNotInlinedCallSites.try_emplace(CB, FS);
if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
Hot = true;
if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList))
continue;
- Candidate = {I, FS, FS->getEntrySamples(), 1.0};
+ Candidate = {I, FS, FS->getHeadSamplesEstimate(), 1.0};
if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) {
LocalNotInlinedCallSites.erase(I);
LocalChanged = true;
Factor = Probe->Factor;
uint64_t CallsiteCount =
- CalleeSamples ? CalleeSamples->getEntrySamples() * Factor : 0;
+ CalleeSamples ? CalleeSamples->getHeadSamplesEstimate() * Factor : 0;
*NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
return true;
}
continue;
}
uint64_t EntryCountDistributed =
- FS->getEntrySamples() * Candidate.CallsiteDistribution;
+ FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution;
// In addition to regular inline cost check, we also need to make sure
// ICP isn't introducing excessive speculative checks even if individual
// target looks beneficial to promote and inline. That means we should
++NumCSNotInlined;
const FunctionSamples *FS = Pair.getSecond();
- if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) {
+ if (FS->getTotalSamples() == 0 && FS->getHeadSamplesEstimate() == 0) {
continue;
}
// Use entry samples as head samples during the merge, as inlinees
// don't have head samples.
const_cast<FunctionSamples *>(FS)->addHeadSamples(
- FS->getEntrySamples());
+ FS->getHeadSamplesEstimate());
// Note that we have to do the merge right after processing function.
// This allows OutlineFS's profile to be used for annotation during
} else {
auto pair =
notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
- pair.first->second.entryCount += FS->getEntrySamples();
+ pair.first->second.entryCount += FS->getHeadSamplesEstimate();
}
}
}
if (const FunctionSamplesMap *M =
FS->findFunctionSamplesMapAt(CallSite)) {
for (const auto &NameFS : *M)
- Sum += NameFS.second.getEntrySamples();
+ Sum += NameFS.second.getHeadSamplesEstimate();
}
}
if (Sum)
(ProfileTotalSample > 0)
? (Func.getTotalSamples() * 100.0) / ProfileTotalSample
: 0;
- PrintValues.emplace_back(HotFuncInfo(
- Func.getContext().toString(), Func.getTotalSamples(),
- TotalSamplePercent, FuncPair.second.second, Func.getEntrySamples()));
+ PrintValues.emplace_back(
+ HotFuncInfo(Func.getContext().toString(), Func.getTotalSamples(),
+ TotalSamplePercent, FuncPair.second.second,
+ Func.getHeadSamplesEstimate()));
}
dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount,
Profiles.size(), HotFuncSample, ProfileTotalSample,
// Call site count is more reliable, so we look up the corresponding call
// target profile in caller's context profile to retrieve call site count.
- uint64_t CalleeEntryCount = CalleeSamples->getEntrySamples();
+ uint64_t CalleeEntryCount = CalleeSamples->getHeadSamplesEstimate();
uint64_t CallsiteCount = 0;
LineLocation Callsite = CalleeNode->getCallSiteLoc();
if (auto CallTargets = CallerSamples->findCallTargetMapAt(Callsite)) {
FunctionSamples &CallerProfile = *getOrCreateFunctionSamples(CallerNode);
// Since we don't have call count for inlined functions, we
// estimate it from inlinee's profile using entry body sample.
- uint64_t EstimatedCallCount = CalleeProfile->getEntrySamples();
+ uint64_t EstimatedCallCount = CalleeProfile->getHeadSamplesEstimate();
// If we don't have samples with location, use 1 to indicate live.
if (!EstimatedCallCount && !CalleeProfile->getBodySamples().size())
EstimatedCallCount = 1;