/// Return the total number of samples collected inside the function.
uint64_t getTotalSamples() const { return TotalSamples; }
- /// Return the total number of samples collected at the head of the
- /// function.
+ /// Return the total number of branch samples that have the function as the
+ /// branch target. This should be equivalent to the sample of the first
+ /// instruction of the symbol. But as we directly get this info for raw
+ /// profile without referring to potentially inaccurate debug info, this
+ /// gives more accurate profile data and is preferred for standalone symbols.
uint64_t getHeadSamples() const { return TotalHeadSamples; }
+ /// Return the sample count of the first instruction of the function.
+ /// The function can be either a standalone symbol or an inlined function.
+ uint64_t getEntrySamples() const {
+ // Use either BodySamples or CallsiteSamples which ever has the smaller
+ // lineno.
+ if (!BodySamples.empty() &&
+ (CallsiteSamples.empty() ||
+ BodySamples.begin()->first < CallsiteSamples.begin()->first))
+ return BodySamples.begin()->second.getSamples();
+ if (!CallsiteSamples.empty()) {
+ uint64_t T = 0;
+ // An indirect callsite may be promoted to several inlined direct calls.
+ // We need to get the sum of them.
+ for (const auto &N_FS : CallsiteSamples.begin()->second)
+ T += N_FS.second.getEntrySamples();
+ return T;
+ }
+ return 0;
+ }
+
/// Return all the samples collected in the body of the function.
const BodySampleMap &getBodySamples() const { return BodySamples; }
ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
std::vector<const FunctionSamples *>
- findIndirectCallFunctionSamples(const Instruction &I) const;
+ findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
bool inlineCallInstruction(Instruction *I);
bool inlineHotFunctions(Function &F,
}
/// Returns a vector of FunctionSamples that are the indirect call targets
-/// of \p Inst. The vector is sorted by the total number of samples.
+/// of \p Inst. The vector is sorted by the total number of samples. Stores
+/// the total call count of the indirect call in \p Sum.
std::vector<const FunctionSamples *>
SampleProfileLoader::findIndirectCallFunctionSamples(
- const Instruction &Inst) const {
+ const Instruction &Inst, uint64_t &Sum) const {
const DILocation *DIL = Inst.getDebugLoc();
std::vector<const FunctionSamples *> R;
if (FS == nullptr)
return R;
+ uint32_t LineOffset = getOffset(DIL);
+ uint32_t Discriminator = DIL->getBaseDiscriminator();
+
+ auto T = FS->findCallTargetMapAt(LineOffset, Discriminator);
+ Sum = 0;
+ if (T)
+ for (const auto &T_C : T.get())
+ Sum += T_C.second;
if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(
LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()))) {
if (M->size() == 0)
return R;
for (const auto &NameFS : *M) {
+ Sum += NameFS.second.getEntrySamples();
R.push_back(&NameFS.second);
}
std::sort(R.begin(), R.end(),
[](const FunctionSamples *L, const FunctionSamples *R) {
- return L->getTotalSamples() > R->getTotalSamples();
+ return L->getEntrySamples() > R->getEntrySamples();
});
}
return R;
if (CallSite(I).isIndirectCall()) {
if (PromotedInsns.count(I))
continue;
- for (const auto *FS : findIndirectCallFunctionSamples(*I)) {
+ uint64_t Sum;
+ for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
if (IsThinLTOPreLink) {
FS->findImportedFunctions(ImportGUIDs, F.getParent(),
Samples->getTotalSamples() *
!R->getValue()->isDeclaration() &&
R->getValue()->getSubprogram() &&
isLegalToPromote(I, R->getValue(), &Reason)) {
- // The indirect target was promoted and inlined in the profile,
- // as a result, we do not have profile info for the branch
- // probability. We set the probability to 80% taken to indicate
- // that the static call is likely taken.
+ uint64_t C = FS->getEntrySamples();
Instruction *DI = promoteIndirectCall(
- I, R->getValue(), 80, 100, false, ORE);
+ I, R->getValue(), C, Sum, false, ORE);
+ Sum -= C;
PromotedInsns.insert(I);
// If profile mismatches, we should not attempt to inline DI.
if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
store i64* (i32*)* %0, i64* (i32*)** %2
%3 = load i64* (i32*)*, i64* (i32*)** %2
; CHECK: icmp {{.*}} @foo_inline2
+; CHECK: br {{.*}} !prof ![[BR1:[0-9]+]]
; CHECK: if.true.direct_targ:
; CHECK-NOT: call
; CHECK: if.false.orig_indirect:
; CHECK: icmp {{.*}} @foo_inline1
+; CHECK: br {{.*}} !prof ![[BR2:[0-9]+]]
; CHECK: if.true.direct_targ1:
; CHECK-NOT: call
; CHECK: if.false.orig_indirect2:
!4 = !DILocation(line: 4, scope: !3)
!5 = !DILocation(line: 6, scope: !3)
; CHECK: ![[PROF]] = !{!"VP", i32 0, i64 3457, i64 9191153033785521275, i64 2059, i64 -1069303473483922844, i64 1398}
+; CHECK: ![[BR1]] = !{!"branch_weights", i32 4000, i32 4000}
+; CHECK: ![[BR2]] = !{!"branch_weights", i32 3000, i32 1000}
; CHECK: ![[VP]] = !{!"VP", i32 0, i64 1000, i64 -6391416044382067764, i64 1000}
!6 = distinct !DISubprogram(name: "test_inline", scope: !1, file: !1, line: 6, unit: !0)
!7 = !DILocation(line: 7, scope: !6)