From e8c245dcd3b9f4237cdfb9dbcc6669d0b81e7936 Mon Sep 17 00:00:00 2001 From: Wenlei He Date: Mon, 18 Oct 2021 17:44:45 -0700 Subject: [PATCH] [llvm-profgen] Skip duplication factor outside of body sample computation We incorrectly use duplication factor for total samples even though we already accumulate samples instead of taking MAX. It causes profile to have bloated total samples for functions with loop unrolled or vectorized. The change fix the issue for total sample, head sample and call target samples. Differential Revision: https://reviews.llvm.org/D112042 --- llvm/test/tools/llvm-profgen/inline-noprobe2.test | 4 ++-- llvm/tools/llvm-profgen/ProfileGenerator.cpp | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/llvm/test/tools/llvm-profgen/inline-noprobe2.test b/llvm/test/tools/llvm-profgen/inline-noprobe2.test index 6a523dd..147c509 100644 --- a/llvm/test/tools/llvm-profgen/inline-noprobe2.test +++ b/llvm/test/tools/llvm-profgen/inline-noprobe2.test @@ -46,7 +46,7 @@ ;CHECK-NEXT: 1: 6 ;CHECK-NEXT: 2: 6 ;CHECK-NEXT: 3: 6 -;CHECK-NEXT: partition_pivot_last:647:7 +;CHECK-NEXT: partition_pivot_last:389:7 ;CHECK-NEXT: 1: 6 ;CHECK-NEXT: 2: 6 ;CHECK-NEXT: 3: 6 @@ -62,7 +62,7 @@ ;CHECK-NEXT: 6: 5 ;CHECK-NEXT: 7: 5 -;CHECK-NEXT: 5: swap:116 +;CHECK-NEXT: 5: swap:61 ;w/o duplication factor : 1: 9 ;w/o duplication factor : 2: 9 diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index 5d1cde3..e98554a 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -274,6 +274,12 @@ void ProfileGeneratorBase::updateBodySamplesforFunctionProfile( uint64_t Count) { // Use the maximum count of samples with same line location uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator); + + // Use duplication factor to compensated for loop unroll/vectorization. + // Note that this is only needed when we're taking MAX of the counts at + // the location instead of SUM. + Count *= getDuplicationFactor(LeafLoc.Location.Discriminator); + ErrorOr R = FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator); @@ -384,12 +390,10 @@ void ProfileGenerator::populateBodySamplesForAllFunctions( const SampleContextFrameVector &FrameVec = Binary->getFrameLocationStack(Offset); if (!FrameVec.empty()) { - uint64_t DC = Count * getDuplicationFactor( - FrameVec.back().Location.Discriminator); FunctionSamples &FunctionProfile = - getLeafProfileAndAddTotalSamples(FrameVec, DC); + getLeafProfileAndAddTotalSamples(FrameVec, Count); updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(), - DC); + Count); } // Move to next IP within the range. IP.advance(); @@ -430,7 +434,6 @@ void ProfileGenerator::populateBoundarySamplesForAllFunctions( const SampleContextFrameVector &FrameVec = Binary->getFrameLocationStack(SourceOffset); if (!FrameVec.empty()) { - Count *= getDuplicationFactor(FrameVec.back().Location.Discriminator); FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(FrameVec, Count); FunctionProfile.addCalledTargetSamples( @@ -545,10 +548,8 @@ void CSProfileGenerator::populateBodySamplesForFunction( auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset); if (LeafLoc.hasValue()) { // Recording body sample for this specific context - uint64_t DC = - Count * getDuplicationFactor(LeafLoc->Location.Discriminator); - updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, DC); - FunctionProfile.addTotalSamples(DC); + updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count); + FunctionProfile.addTotalSamples(Count); } // Move to next IP within the range @@ -575,7 +576,6 @@ void CSProfileGenerator::populateBoundarySamplesForFunction( auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset); if (!LeafLoc.hasValue()) continue; - Count *= getDuplicationFactor(LeafLoc->Location.Discriminator); FunctionProfile.addCalledTargetSamples( LeafLoc->Location.LineOffset, getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName, -- 2.7.4