From acfd0a345619c9293ac8fa5f690fa78b27a10814 Mon Sep 17 00:00:00 2001 From: Hongtao Yu Date: Thu, 12 May 2022 22:08:18 -0700 Subject: [PATCH] [llvm-profgen] Update callsite body samples by summing up all call target samples. Current profile generation caculcates callsite body samples and call target samples separately. The former is done based on LBR range samples while the latter is done based on branch samples. Note that there's a subtle difference. LBR ranges is formed from two consecutive branch samples. Therefore the last entry in a LBR record will not be counted towards body samples while there's still a chance for it to be counted towards call targets if it is a function call. I'm making sense of the call body samples by updating it to the aggregation of call targets. Reviewed By: wenlei Differential Revision: https://reviews.llvm.org/D122609 --- llvm/include/llvm/ProfileData/SampleProf.h | 20 ++++++++++ llvm/test/tools/llvm-profgen/inline-noprobe2.test | 4 +- llvm/test/tools/llvm-profgen/noinline-noprobe.test | 2 +- llvm/test/tools/llvm-profgen/update-samples.test | 45 ++++++++++++++++++++++ llvm/tools/llvm-profgen/ProfileGenerator.cpp | 23 ++++++++--- llvm/tools/llvm-profgen/ProfileGenerator.h | 5 +++ 6 files changed, 90 insertions(+), 9 deletions(-) create mode 100644 llvm/test/tools/llvm-profgen/update-samples.test diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index a39a4d8..8d8813e 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -387,6 +387,13 @@ public: return SortCallTargets(CallTargets); } + uint64_t getCallTargetSum() const { + uint64_t Sum = 0; + for (const auto &I : CallTargets) + Sum += I.second; + return Sum; + } + /// Sort call targets in descending order of call frequency. static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets) { SortedCallTargetSet SortedTargets; @@ -779,6 +786,19 @@ public: return BodySamples[LineLocation(Index, 0)].merge(S, Weight); } + // Accumulate all call target samples to update the body samples. + void updateCallsiteSamples() { + for (auto &I : BodySamples) { + uint64_t TargetSamples = I.second.getCallTargetSum(); + // It's possible that the body sample count can be greater than the call + // target sum. E.g, if some call targets are external targets, they won't + // be considered valid call targets, but the body sample count which is + // from lbr ranges can actually include them. + if (TargetSamples > I.second.getSamples()) + I.second.addSamples(TargetSamples - I.second.getSamples()); + } + } + // Accumulate all body samples to set total samples. void updateTotalSamples() { setTotalSamples(0); diff --git a/llvm/test/tools/llvm-profgen/inline-noprobe2.test b/llvm/test/tools/llvm-profgen/inline-noprobe2.test index 4d41243..97c96f0 100644 --- a/llvm/test/tools/llvm-profgen/inline-noprobe2.test +++ b/llvm/test/tools/llvm-profgen/inline-noprobe2.test @@ -59,7 +59,7 @@ ;CHECK: 6.1: 17 ;CHECK: 6.3: 17 ;CHECK: 7: 0 -;CHECK: 8: 0 quick_sort:1 +;CHECK: 8: 1 quick_sort:1 ;CHECK: 9: 0 ;CHECK: 11: 0 ;CHECK: 14: 0 @@ -97,7 +97,7 @@ ;CHECK: quick_sort:903:25 ;CHECK: 1: 24 ;CHECK: 2: 12 partition_pivot_last:7 partition_pivot_first:5 -;CHECK: 3: 11 quick_sort:12 +;CHECK: 3: 12 quick_sort:12 ;CHECK: 4: 12 quick_sort:12 ;CHECK: 6: 24 ;CHECK: 65507: 12 diff --git a/llvm/test/tools/llvm-profgen/noinline-noprobe.test b/llvm/test/tools/llvm-profgen/noinline-noprobe.test index f5bbc29..19a5878 100644 --- a/llvm/test/tools/llvm-profgen/noinline-noprobe.test +++ b/llvm/test/tools/llvm-profgen/noinline-noprobe.test @@ -11,7 +11,7 @@ ;CHECK: 0: 0 ;CHECK: 1: 0 ;CHECK: 2: 19 -;CHECK: 3: 19 bar:21 +;CHECK: 3: 21 bar:21 ;CHECK: 4: 0 ;CHECK: 5: 0 ;CHECK: bar:926:21 diff --git a/llvm/test/tools/llvm-profgen/update-samples.test b/llvm/test/tools/llvm-profgen/update-samples.test new file mode 100644 index 0000000..d3ae425 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/update-samples.test @@ -0,0 +1,45 @@ +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noprobe.perfscript --binary=%S/Inputs/noprobe.perfbin --output=%t1 +; RUN: FileCheck %s --input-file %t1 --check-prefix=CALLSITE +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noprobe.perfscript --binary=%S/Inputs/noprobe.perfbin --output=%t2 --update-total-samples=1 +; RUN: FileCheck %s --input-file %t2 --check-prefix=TOTAL + + +;CALLSITE: foo:1241:0 +;CALLSITE: 0: 0 +;CALLSITE: 1: 0 +;CALLSITE: 2: 19 +;CALLSITE: 3: 21 bar:21 +;CALLSITE: 4: 0 +;CALLSITE: 5: 0 + +;TOTAL: foo:40:0 +;TOTAL: 0: 0 +;TOTAL: 1: 0 +;TOTAL: 2: 19 +;TOTAL: 3: 21 bar:21 +;TOTAL: 4: 0 +;TOTAL: 5: 0 + + +; original code: +; clang -O3 -g -fdebug-info-for-profiling test.c -fno-inline -o a.out +#include + +int bar(int x, int y) { + if (x % 3) { + return x - y; + } + return x + y; +} + +void foo() { + int s, i = 0; + while (i++ < 4000 * 4000) + if (i % 91) s = bar(i, s); else s += 30; + printf("sum is %d\n", s); +} + +int main() { + foo(); + return 0; +} diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index be5a581..65b2395 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -381,15 +381,26 @@ void ProfileGeneratorBase::updateBodySamplesforFunctionProfile( } void ProfileGeneratorBase::updateTotalSamples() { - if (!UpdateTotalSamples) - return; - for (auto &Item : ProfileMap) { FunctionSamples &FunctionProfile = Item.second; FunctionProfile.updateTotalSamples(); } } +void ProfileGeneratorBase::updateCallsiteSamples() { + for (auto &Item : ProfileMap) { + FunctionSamples &FunctionProfile = Item.second; + FunctionProfile.updateCallsiteSamples(); + } +} + +void ProfileGeneratorBase::updateFunctionSamples() { + updateCallsiteSamples(); + + if (UpdateTotalSamples) + updateTotalSamples(); +} + void ProfileGeneratorBase::collectProfiledFunctions() { std::unordered_set ProfiledFunctions; if (SampleCounters) { @@ -491,7 +502,7 @@ void ProfileGenerator::generateLineNumBasedProfile() { // Fill in boundary sample counts as well as call site samples for calls populateBoundarySamplesForAllFunctions(SC.BranchCounter); - updateTotalSamples(); + updateFunctionSamples(); } void ProfileGenerator::generateProbeBasedProfile() { @@ -505,7 +516,7 @@ void ProfileGenerator::generateProbeBasedProfile() { // Fill in boundary sample counts as well as call site samples for calls populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter); - updateTotalSamples(); + updateFunctionSamples(); } void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions( @@ -785,7 +796,7 @@ void CSProfileGenerator::generateLineNumBasedProfile() { // body sample. populateInferredFunctionSamples(); - updateTotalSamples(); + updateFunctionSamples(); } void CSProfileGenerator::populateBodySamplesForFunction( diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h index 410b08f..a6a9a10 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -100,8 +100,13 @@ protected: void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc, uint64_t Count); + + void updateFunctionSamples(); + void updateTotalSamples(); + void updateCallsiteSamples(); + StringRef getCalleeNameForOffset(uint64_t TargetOffset); void computeSummaryAndThreshold(); -- 2.7.4