From: wlei Date: Thu, 5 Aug 2021 03:20:58 +0000 (-0700) Subject: [llvm-profgen] Fix bug of loop scope mismatch X-Git-Tag: upstream/15.0.7~34533 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a8a38ef3d99ce2b180f9c5ff968e5b930a99b10b;p=platform%2Fupstream%2Fllvm.git [llvm-profgen] Fix bug of loop scope mismatch One performance issue happened in profile generation and it turned out the line 525 loop is the bottleneck. Moving the code outside of loop scope can fix this issue. The run time is improved from 30+mins to ~30s. Reviewed By: hoy, wenlei Differential Revision: https://reviews.llvm.org/D107529 --- diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index 57853f23..83d9f3c 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -8,6 +8,7 @@ #include "ProfileGenerator.h" #include "llvm/ProfileData/ProfileCommon.h" +#include static cl::opt OutputFilename("output", cl::value_desc("output"), cl::Required, @@ -520,7 +521,8 @@ void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes( // Extract the top frame probes by looking up each address among the range in // the Address2ProbeMap extractProbesFromRange(RangeCounter, ProbeCounter, Binary); - std::unordered_map + std::unordered_map> FrameSamples; for (auto PI : ProbeCounter) { const MCDecodedPseudoProbe *Probe = PI.first; @@ -530,7 +532,7 @@ void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes( // Record the current frame and FunctionProfile whenever samples are // collected for non-danglie probes. This is for reporting all of the // zero count probes of the frame later. - FrameSamples[Probe->getInlineTreeNode()] = &FunctionProfile; + FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile); FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); FunctionProfile.addTotalSamples(Count); if (Probe->isEntry()) { @@ -559,12 +561,13 @@ void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes( FunctionProfile.getContext().getNameWithoutContext(), Count); } } + } - // Assign zero count for remaining probes without sample hits to - // differentiate from probes optimized away, of which the counts are unknown - // and will be inferred by the compiler. - for (auto &I : FrameSamples) { - auto *FunctionProfile = I.second; + // Assign zero count for remaining probes without sample hits to + // differentiate from probes optimized away, of which the counts are unknown + // and will be inferred by the compiler. + for (auto &I : FrameSamples) { + for (auto *FunctionProfile : I.second) { for (auto *Probe : I.first->getProbes()) { FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0); }