From f58df39529060196b8ead812521b3a98ccd8abc3 Mon Sep 17 00:00:00 2001 From: Dehao Chen Date: Thu, 3 Aug 2017 17:11:41 +0000 Subject: [PATCH] Do not want to use BFI to get profile count for sample pgo Summary: For SamplePGO, we already record the callsite count in the call instruction itself. So we do not want to use BFI to get profile count as it is less accurate. Reviewers: tejohnson, davidxl, eraman Reviewed By: eraman Subscribers: sanjoy, llvm-commits, mehdi_amini Differential Revision: https://reviews.llvm.org/D36025 llvm-svn: 309964 --- llvm/lib/Analysis/ProfileSummaryInfo.cpp | 20 ++++++- ...ion-summary-callgraph-sample-profile-summary.ll | 31 +++++++++++ ...ion-summary-callgraph-sample-profile-summary.ll | 61 +++++++++++----------- llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp | 10 ++-- 4 files changed, 86 insertions(+), 36 deletions(-) create mode 100644 llvm/test/Bitcode/Inputs/thinlto-function-summary-callgraph-sample-profile-summary.ll diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp index 12b86da..3c2e04d 100644 --- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp +++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp @@ -39,6 +39,12 @@ static cl::opt ProfileSummaryCutoffCold( cl::desc("A count is cold if it is below the minimum count" " to reach this percentile of total counts.")); +static cl::opt AccurateSampleProfile( + "accurate-sample-profile", cl::Hidden, cl::init(false), + cl::desc("If the sample profile is accurate, we will mark all un-sampled " + "callsite as cold. Otherwise, treat un-sampled callsites as if " + "we have no profile.")); + // Find the minimum count to reach a desired percentile of counts. static uint64_t getMinCountForPercentile(SummaryEntryVector &DS, uint64_t Percentile) { @@ -78,10 +84,12 @@ ProfileSummaryInfo::getProfileCount(const Instruction *Inst, if (hasSampleProfile()) { // In sample PGO mode, check if there is a profile metadata on the // instruction. If it is present, determine hotness solely based on that, - // since the sampled entry count may not be accurate. + // since the sampled entry count may not be accurate. If there is no + // annotated on the instruction, return None. uint64_t TotalCount; if (Inst->extractProfTotalWeight(TotalCount)) return TotalCount; + return None; } if (BFI) return BFI->getBlockProfileCount(Inst->getParent()); @@ -199,7 +207,15 @@ bool ProfileSummaryInfo::isHotCallSite(const CallSite &CS, bool ProfileSummaryInfo::isColdCallSite(const CallSite &CS, BlockFrequencyInfo *BFI) { auto C = getProfileCount(CS.getInstruction(), BFI); - return C && isColdCount(*C); + if (C) + return isColdCount(*C); + + // In SamplePGO, if the caller has been sampled, and there is no profile + // annotatedon the callsite, we consider the callsite as cold. + // If there is no profile for the caller, and we know the profile is + // accurate, we consider the callsite as cold. + return (hasSampleProfile() && + (CS.getCaller()->getEntryCount() || AccurateSampleProfile)); } INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info", diff --git a/llvm/test/Bitcode/Inputs/thinlto-function-summary-callgraph-sample-profile-summary.ll b/llvm/test/Bitcode/Inputs/thinlto-function-summary-callgraph-sample-profile-summary.ll new file mode 100644 index 0000000..ca69314 --- /dev/null +++ b/llvm/test/Bitcode/Inputs/thinlto-function-summary-callgraph-sample-profile-summary.ll @@ -0,0 +1,31 @@ +; ModuleID = 'thinlto-function-summary-callgraph-profile-summary2.ll' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @hot1() #1 { + ret void +} +define void @hot2() #1 { + ret void +} +define void @hot3() #1 { + ret void +} +define void @cold1() #1 { + ret void +} +define void @cold2() #1 { + ret void +} +define void @cold3() #1 { + ret void +} +define void @none1() #1 { + ret void +} +define void @none2() #1 { + ret void +} +define void @none3() #1 { + ret void +} diff --git a/llvm/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll b/llvm/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll index 09a6bbc..c940eba 100644 --- a/llvm/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll +++ b/llvm/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll @@ -1,7 +1,7 @@ ; Test to check the callgraph in summary when there is PGO ; RUN: opt -module-summary %s -o %t.o ; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s -; RUN: opt -module-summary %p/Inputs/thinlto-function-summary-callgraph-profile-summary.ll -o %t2.o +; RUN: opt -module-summary %p/Inputs/thinlto-function-summary-callgraph-sample-profile-summary.ll -o %t2.o ; RUN: llvm-lto -thinlto -o %t3 %t.o %t2.o ; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED @@ -16,24 +16,26 @@ ; "hot3" ; CHECK-NEXT: -; op4=hot1 op6=cold op8=hot2 op10=hot4 op12=none1 op14=hot3 op16=none2 op18=none3 op20=123 -; CHECK-NEXT: +; CHECK-NEXT: +; op4=none1 op6=hot1 op8=cold1 op10=none2 op12=hot2 op14=cold2 op16=none3 op18=hot3 op20=cold3 op22=123 +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK: +; COMBINED-NEXT: ; COMBINED_NEXT: @@ -63,24 +69,19 @@ target triple = "x86_64-unknown-linux-gnu" ; This function have high profile count, so entry block is hot. define void @hot_function(i1 %a, i1 %a2) !prof !20 { entry: - call void @hot1() - br i1 %a, label %Cold, label %Hot, !prof !41 -Cold: ; 1/1000 goes here - call void @cold() - call void @hot2() - call void @hot4(), !prof !15 - call void @none1() - br label %exit -Hot: ; 999/1000 goes here - call void @hot2() - call void @hot3() - br i1 %a2, label %None1, label %None2, !prof !42 -None1: ; half goes here call void @none1() + call void @hot1(), !prof !15 + call void @cold1(), !prof !16 + br i1 %a, label %Cold, label %Hot, !prof !41 +Cold: ; 1/1000 goes here call void @none2() + call void @hot2(), !prof !15 + call void @cold2(), !prof !16 br label %exit -None2: ; half goes here +Hot: ; 999/1000 goes here call void @none3() + call void @hot3(), !prof !15 + call void @cold3(), !prof !16 br label %exit exit: ret void @@ -89,17 +90,14 @@ exit: declare void @hot1() #1 declare void @hot2() #1 declare void @hot3() #1 -declare void @hot4() #1 -declare void @cold() #1 +declare void @cold1() #1 +declare void @cold2() #1 +declare void @cold3() #1 declare void @none1() #1 declare void @none2() #1 declare void @none3() #1 - !41 = !{!"branch_weights", i32 1, i32 1000} -!42 = !{!"branch_weights", i32 1, i32 1} - - !llvm.module.flags = !{!1} !20 = !{!"function_entry_count", i64 110, i64 123} @@ -119,3 +117,4 @@ declare void @none3() #1 !13 = !{i32 999000, i64 100, i32 1} !14 = !{i32 999999, i64 1, i32 2} !15 = !{!"branch_weights", i32 100} +!16 = !{!"branch_weights", i32 1} diff --git a/llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp b/llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp index 68a6d7b..76b0744 100644 --- a/llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp +++ b/llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp @@ -196,14 +196,18 @@ TEST_F(ProfileSummaryInfoTest, SampleProf) { CallSite CS1(BB1->getFirstNonPHI()); auto *CI2 = BB2->getFirstNonPHI(); + // Manually attach branch weights metadata to the call instruction. + SmallVector Weights; + Weights.push_back(1000); + MDBuilder MDB(M->getContext()); + CI2->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); CallSite CS2(CI2); - EXPECT_TRUE(PSI.isHotCallSite(CS1, &BFI)); - EXPECT_FALSE(PSI.isHotCallSite(CS2, &BFI)); + EXPECT_FALSE(PSI.isHotCallSite(CS1, &BFI)); + EXPECT_TRUE(PSI.isHotCallSite(CS2, &BFI)); // Test that CS2 is considered hot when it gets an MD_prof metadata with // weights that exceed the hot count threshold. - MDBuilder MDB(M->getContext()); CI2->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights({400})); EXPECT_TRUE(PSI.isHotCallSite(CS2, &BFI)); } -- 2.7.4