"by inlining from sample profile loader."),
cl::Hidden);
+extern cl::opt<unsigned> MaxNumPromotions;
+
namespace {
using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
// Attempt to promote indirect call and also inline the promoted call
bool tryPromoteAndInlineCandidate(
Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
- uint64_t &Sum, DenseSet<Instruction *> &PromotedInsns,
- SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
+ uint64_t &Sum, SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
bool inlineHotFunctions(Function &F,
DenseSet<GlobalValue::GUID> &InlinedGUIDs);
InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
return it.first->second;
}
+/// If the profile count for the promotion candidate \p Candidate is 0,
+/// it means \p Candidate has already been promoted for \p Inst.
+static bool isPromotedBefore(const Instruction &Inst, StringRef Candidate) {
+ uint32_t NumVals = 0;
+ uint64_t TotalCount = 0;
+ std::unique_ptr<InstrProfValueData[]> ValueData =
+ std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
+ bool Valid =
+ getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
+ ValueData.get(), NumVals, TotalCount, true);
+ if (Valid) {
+ for (uint32_t I = 0; I < NumVals; I++) {
+ // If the promotion candidate has 0 count in the metadata, it
+ // means the candidate has been promoted for this indirect call.
+ if (ValueData[I].Value == Function::getGUID(Candidate))
+ return ValueData[I].Count == 0;
+ }
+ }
+ return false;
+}
+
+/// Update indirect call target profile metadata for \p Inst. If \p Total
+/// is given, set TotalCount of call targets counts to \p Total, otherwise
+/// keep the original value in metadata.
+static void
+updateIDTMetaData(Instruction &Inst,
+ const SmallVectorImpl<InstrProfValueData> &CallTargets,
+ uint64_t Total = 0) {
+ DenseMap<uint64_t, uint64_t> ValueCountMap;
+
+ uint32_t NumVals = 0;
+ uint64_t TotalCount = 0;
+ std::unique_ptr<InstrProfValueData[]> ValueData =
+ std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
+ bool Valid =
+ getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
+ ValueData.get(), NumVals, TotalCount, true);
+ if (Valid) {
+ for (uint32_t I = 0; I < NumVals; I++)
+ ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
+ }
+
+ for (const auto &Data : CallTargets) {
+ auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count);
+ if (Pair.second)
+ continue;
+ // Update existing profile count of the call target if it is not 0.
+ // If it is 0, the call target has been promoted so keep it as 0.
+ if (Pair.first->second != 0)
+ Pair.first->second = Data.Count;
+ else {
+ assert(Total >= Data.Count && "Total should be >= Data.Count");
+ Total -= Data.Count;
+ }
+ }
+
+ SmallVector<InstrProfValueData, 8> NewCallTargets;
+ for (const auto &ValueCount : ValueCountMap) {
+ NewCallTargets.emplace_back(
+ InstrProfValueData{ValueCount.first, ValueCount.second});
+ }
+ llvm::sort(NewCallTargets,
+ [](const InstrProfValueData &L, const InstrProfValueData &R) {
+ if (L.Count != R.Count)
+ return L.Count > R.Count;
+ return L.Value > R.Value;
+ });
+ annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst,
+ NewCallTargets, Total ? Total : TotalCount,
+ IPVK_IndirectCallTarget, NewCallTargets.size());
+}
+
/// Attempt to promote indirect call and also inline the promoted call.
///
/// \param F Caller function.
/// \param Candidate ICP and inline candidate.
/// \param Sum Sum of target counts for indirect call.
-/// \param PromotedInsns Map to keep track of indirect call already processed.
/// \param InlinedCallSite Output vector for new call sites exposed after
/// inlining.
bool SampleProfileLoader::tryPromoteAndInlineCandidate(
Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
- DenseSet<Instruction *> &PromotedInsns,
SmallVector<CallBase *, 8> *InlinedCallSite) {
+ auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName();
+ auto R = SymbolMap.find(CalleeFunctionName);
+ if (R == SymbolMap.end() || !R->getValue())
+ return false;
+
+ auto &CI = *Candidate.CallInstr;
+ if (isPromotedBefore(CI, R->getValue()->getName()))
+ return false;
+
const char *Reason = "Callee function not available";
// R->getValue() != &F is to prevent promoting a recursive call.
// If it is a recursive call, we do not inline it as it could bloat
// clone the caller first, and inline the cloned caller if it is
// recursive. As llvm does not inline recursive calls, we will
// simply ignore it instead of handling it explicitly.
- auto R = SymbolMap.find(Candidate.CalleeSamples->getFuncName());
- if (R != SymbolMap.end() && R->getValue() &&
- !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
+ if (!R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
R->getValue()->hasFnAttribute("use-sample-profile") &&
- R->getValue() != &F &&
- isLegalToPromote(*Candidate.CallInstr, R->getValue(), &Reason)) {
- auto *DI =
- &pgo::promoteIndirectCall(*Candidate.CallInstr, R->getValue(),
- Candidate.CallsiteCount, Sum, false, ORE);
+ R->getValue() != &F && isLegalToPromote(CI, R->getValue(), &Reason)) {
+ // For promoted target, save 0 count in the value profile metadata so
+ // the target won't be promoted again.
+ SmallVector<InstrProfValueData, 1> SortedCallTargets = {
+ InstrProfValueData{Function::getGUID(R->getValue()->getName()), 0}};
+ updateIDTMetaData(CI, SortedCallTargets);
+
+ auto *DI = &pgo::promoteIndirectCall(
+ CI, R->getValue(), Candidate.CallsiteCount, Sum, false, ORE);
if (DI) {
Sum -= Candidate.CallsiteCount;
// Prorate the indirect callsite distribution.
// profile will be used to prorate callsites from the callee if
// inlined. Once not inlined, the direct callsite distribution should
// be prorated so that the it will reflect the real callsite counts.
- setProbeDistributionFactor(*Candidate.CallInstr,
- Candidate.CallsiteDistribution * Sum /
- SumOrigin);
- PromotedInsns.insert(Candidate.CallInstr);
+ setProbeDistributionFactor(CI, Candidate.CallsiteDistribution * Sum /
+ SumOrigin);
Candidate.CallInstr = DI;
if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
/// \returns True if there is any inline happened.
bool SampleProfileLoader::inlineHotFunctions(
Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
- DenseSet<Instruction *> PromotedInsns;
-
// ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
// Profile symbol list is ignored when profile-sample-accurate is on.
assert((!ProfAccForSymsInList ||
if (CalledFunction == &F)
continue;
if (I->isIndirectCall()) {
- if (PromotedInsns.count(I))
- continue;
uint64_t Sum;
for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
uint64_t SumOrigin = Sum;
continue;
Candidate = {I, FS, FS->getEntrySamples(), 1.0};
- if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
- PromotedInsns)) {
+ if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) {
LocalNotInlinedCallSites.erase(I);
LocalChanged = true;
}
bool SampleProfileLoader::inlineHotFunctionsWithPriority(
Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
- DenseSet<Instruction *> PromotedInsns;
assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now");
// ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
if (CalledFunction == &F)
continue;
if (I->isIndirectCall()) {
- if (PromotedInsns.count(I))
- continue;
uint64_t Sum;
auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
uint64_t SumOrigin = Sum;
Candidate = {I, FS, EntryCountDistributed,
Candidate.CallsiteDistribution};
if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
- PromotedInsns, &InlinedCallSites)) {
+ &InlinedCallSites)) {
for (auto *CB : InlinedCallSites) {
if (getInlineCandidate(&NewCandidate, CB))
CQueue.emplace(NewCandidate);
Sum += NameFS.second.getEntrySamples();
}
}
- annotateValueSite(*I.getParent()->getParent()->getParent(), I,
- SortedCallTargets, Sum, IPVK_IndirectCallTarget,
- SortedCallTargets.size());
+ updateIDTMetaData(I, SortedCallTargets, Sum);
} else if (!isa<IntrinsicInst>(&I)) {
I.setMetadata(LLVMContext::MD_prof,
MDB.createBranchWeights(
--- /dev/null
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/norepeated-icp.prof -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@.str = private unnamed_addr constant [5 x i8] c"hoo\0A\00", align 1
+@p = dso_local global void ()* null, align 8
+@str = private unnamed_addr constant [4 x i8] c"hoo\00", align 1
+
+; Function Attrs: uwtable mustprogress
+define dso_local void @_Z3hoov() #0 !dbg !7 {
+entry:
+ %puts = call i32 @puts(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @str, i64 0, i64 0)), !dbg !9
+ ret void, !dbg !10
+}
+
+; Function Attrs: nofree nounwind
+declare dso_local noundef i32 @printf(i8* nocapture noundef readonly, ...) #1
+
+; Function Attrs: uwtable mustprogress
+define dso_local void @_Z3goov() #0 !dbg !11 {
+entry:
+ %0 = load void ()*, void ()** @p, align 8, !dbg !12, !tbaa !13
+ call void %0(), !dbg !17
+ ret void, !dbg !18
+}
+
+; Check the indirect call in _Z3goov inlined into _Z3foov won't be indirect
+; call promoted for _Z3hoov twice in _Z3foov.
+; CHECK-LABEL: @_Z3foov(
+; CHECK: icmp eq void ()* {{.*}} @_Z3hoov
+; CHECK-NOT: icmp eq void ()* {{.*}} @_Z3hoov
+; CHECK: ret void
+
+; Function Attrs: uwtable mustprogress
+define dso_local void @_Z3foov() #0 !dbg !19 {
+entry:
+ call void @_Z3goov(), !dbg !20
+ ret void, !dbg !21
+}
+
+; Function Attrs: nofree nounwind
+declare noundef i32 @puts(i8* nocapture noundef readonly) #2
+
+attributes #0 = { uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-sample-profile" "use-soft-float"="false" }
+attributes #1 = { nofree nounwind "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nofree nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!1 = !DIFile(filename: "1.cc", directory: "")
+!2 = !{}
+!3 = !{i32 7, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{!""}
+!7 = distinct !DISubprogram(name: "hoo", linkageName: "_Z3hoov", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
+!8 = !DISubroutineType(types: !2)
+!9 = !DILocation(line: 2, column: 3, scope: !7)
+!10 = !DILocation(line: 3, column: 1, scope: !7)
+!11 = distinct !DISubprogram(name: "goo", linkageName: "_Z3goov", scope: !1, file: !1, line: 6, type: !8, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
+!12 = !DILocation(line: 7, column: 5, scope: !11)
+!13 = !{!14, !14, i64 0}
+!14 = !{!"any pointer", !15, i64 0}
+!15 = !{!"omnipotent char", !16, i64 0}
+!16 = !{!"Simple C++ TBAA"}
+!17 = !DILocation(line: 7, column: 3, scope: !11)
+!18 = !DILocation(line: 8, column: 1, scope: !11)
+!19 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 10, type: !8, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
+!20 = !DILocation(line: 11, column: 3, scope: !19)
+!21 = !DILocation(line: 12, column: 3, scope: !19)