From: Hiroshi Yamauchi Date: Wed, 29 Jan 2020 17:36:31 +0000 (-0800) Subject: [PGO][PGSO] Handle MBFIWrapper X-Git-Tag: llvmorg-12-init~16170 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ac8da31a0f9895685e34e661d44075f46b559fbe;p=platform%2Fupstream%2Fllvm.git [PGO][PGSO] Handle MBFIWrapper Some code gen passes use MBFIWrapper to keep track of the frequency of new blocks. This was not taken into account and could lead to incorrect frequencies as MBFI silently returns zero frequency for unknown/new blocks. Add a variant for MBFIWrapper in the PGSO query interface. Depends on D73494. --- diff --git a/llvm/include/llvm/CodeGen/MachineSizeOpts.h b/llvm/include/llvm/CodeGen/MachineSizeOpts.h index 3b02d08..07bbbad 100644 --- a/llvm/include/llvm/CodeGen/MachineSizeOpts.h +++ b/llvm/include/llvm/CodeGen/MachineSizeOpts.h @@ -21,6 +21,7 @@ class ProfileSummaryInfo; class MachineBasicBlock; class MachineBlockFrequencyInfo; class MachineFunction; +class MBFIWrapper; /// Returns true if machine function \p MF is suggested to be size-optimized /// based on the profile. @@ -33,6 +34,12 @@ bool shouldOptimizeForSize(const MachineBasicBlock *MBB, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *MBFI, PGSOQueryType QueryType = PGSOQueryType::Other); +/// Returns true if machine basic block \p MBB is suggested to be size-optimized +/// based on the profile. +bool shouldOptimizeForSize(const MachineBasicBlock *MBB, + ProfileSummaryInfo *PSI, + MBFIWrapper *MBFIWrapper, + PGSOQueryType QueryType = PGSOQueryType::Other); } // end namespace llvm diff --git a/llvm/include/llvm/CodeGen/TailDuplicator.h b/llvm/include/llvm/CodeGen/TailDuplicator.h index e0623a3..5b2de1c 100644 --- a/llvm/include/llvm/CodeGen/TailDuplicator.h +++ b/llvm/include/llvm/CodeGen/TailDuplicator.h @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include #include @@ -42,7 +43,7 @@ class TailDuplicator { const MachineModuleInfo *MMI; MachineRegisterInfo *MRI; MachineFunction *MF; - const MachineBlockFrequencyInfo *MBFI; + MBFIWrapper *MBFI; ProfileSummaryInfo *PSI; bool PreRegAlloc; bool LayoutMode; @@ -69,7 +70,7 @@ public: /// default implies using the command line value TailDupSize. void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, - const MachineBlockFrequencyInfo *MBFI, + MBFIWrapper *MBFI, ProfileSummaryInfo *PSI, bool LayoutMode, unsigned TailDupSize = 0); diff --git a/llvm/include/llvm/Transforms/Utils/SizeOpts.h b/llvm/include/llvm/Transforms/Utils/SizeOpts.h index 1137c89..daead14 100644 --- a/llvm/include/llvm/Transforms/Utils/SizeOpts.h +++ b/llvm/include/llvm/Transforms/Utils/SizeOpts.h @@ -63,10 +63,9 @@ bool shouldFuncOptimizeForSizeImpl(const FuncT *F, ProfileSummaryInfo *PSI, F, PSI, *BFI); } -template -bool shouldOptimizeForSizeImpl(const BlockT *BB, ProfileSummaryInfo *PSI, +template +bool shouldOptimizeForSizeImpl(BlockTOrBlockFreq BBOrBlockFreq, ProfileSummaryInfo *PSI, BFIT *BFI, PGSOQueryType QueryType) { - assert(BB); if (!PSI || !BFI || !PSI->hasProfileSummary()) return false; if (ForcePGSO) @@ -81,11 +80,11 @@ bool shouldOptimizeForSizeImpl(const BlockT *BB, ProfileSummaryInfo *PSI, if (PGSOColdCodeOnly || (PGSOLargeWorkingSetSizeOnly && !PSI->hasLargeWorkingSetSize())) { // Even if the working set size isn't large, size-optimize cold code. - return AdapterT::isColdBlock(BB, PSI, BFI); + return AdapterT::isColdBlock(BBOrBlockFreq, PSI, BFI); } return !AdapterT::isHotBlockNthPercentile( PSI->hasSampleProfile() ? PgsoCutoffSampleProf : PgsoCutoffInstrProf, - BB, PSI, BFI); + BBOrBlockFreq, PSI, BFI); } /// Returns true if function \p F is suggested to be size-optimized based on the diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index eb4e076..e355e16 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -655,8 +655,8 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, MachineFunction *MF = MBB1->getParent(); bool OptForSize = MF->getFunction().hasOptSize() || - (llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo.getMBFI()) && - llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo.getMBFI())); + (llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo) && + llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo)); return EffectiveTailLen >= 2 && OptForSize && (FullBlockTail1 || FullBlockTail2); } @@ -1511,7 +1511,7 @@ ReoptimizeBlock: bool OptForSize = MF.getFunction().hasOptSize() || - llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo.getMBFI()); + llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo); if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && OptForSize) { // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch // direction, thereby defeating careful block placement and regressing diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 9dfa8d7..06e2f46 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -2082,8 +2082,7 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L, // In practice this never happens though: there always seems to be a preheader // that can fallthrough and that is also placed before the header. bool OptForSize = F->getFunction().hasOptSize() || - llvm::shouldOptimizeForSize(L.getHeader(), PSI, - &MBFI->getMBFI()); + llvm::shouldOptimizeForSize(L.getHeader(), PSI, MBFI.get()); if (OptForSize) return L.getHeader(); @@ -2841,7 +2840,7 @@ void MachineBlockPlacement::alignBlocks() { continue; // If the global profiles indicates so, don't align it. - if (llvm::shouldOptimizeForSize(ChainBB, PSI, &MBFI->getMBFI()) && + if (llvm::shouldOptimizeForSize(ChainBB, PSI, MBFI.get()) && !TLI->alignLoopsWithOptSize()) continue; @@ -3088,7 +3087,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (OptForSize) TailDupSize = 1; bool PreRegAlloc = false; - TailDup.initMF(MF, PreRegAlloc, MBPI, &MBFI->getMBFI(), PSI, + TailDup.initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI, /* LayoutMode */ true, TailDupSize); precomputeTriangleChains(); } diff --git a/llvm/lib/CodeGen/MachineSizeOpts.cpp b/llvm/lib/CodeGen/MachineSizeOpts.cpp index aff67f9..a181b54 100644 --- a/llvm/lib/CodeGen/MachineSizeOpts.cpp +++ b/llvm/lib/CodeGen/MachineSizeOpts.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineSizeOpts.h" +#include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -33,6 +34,13 @@ bool isColdBlock(const MachineBasicBlock *MBB, return Count && PSI->isColdCount(*Count); } +bool isColdBlock(BlockFrequency BlockFreq, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency()); + return Count && PSI->isColdCount(*Count); +} + /// Like ProfileSummaryInfo::isHotBlockNthPercentile but for MachineBasicBlock. static bool isHotBlockNthPercentile(int PercentileCutoff, const MachineBasicBlock *MBB, @@ -42,6 +50,14 @@ static bool isHotBlockNthPercentile(int PercentileCutoff, return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count); } +static bool isHotBlockNthPercentile(int PercentileCutoff, + BlockFrequency BlockFreq, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency()); + return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count); +} + /// Like ProfileSummaryInfo::isFunctionColdInCallGraph but for /// MachineFunction. bool isFunctionColdInCallGraph( @@ -95,6 +111,11 @@ struct MachineBasicBlockBFIAdapter { const MachineBlockFrequencyInfo *MBFI) { return machine_size_opts_detail::isColdBlock(MBB, PSI, MBFI); } + static bool isColdBlock(BlockFrequency BlockFreq, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + return machine_size_opts_detail::isColdBlock(BlockFreq, PSI, MBFI); + } static bool isHotBlockNthPercentile(int CutOff, const MachineBasicBlock *MBB, ProfileSummaryInfo *PSI, @@ -102,6 +123,13 @@ struct MachineBasicBlockBFIAdapter { return machine_size_opts_detail::isHotBlockNthPercentile( CutOff, MBB, PSI, MBFI); } + static bool isHotBlockNthPercentile(int CutOff, + BlockFrequency BlockFreq, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + return machine_size_opts_detail::isHotBlockNthPercentile( + CutOff, BlockFreq, PSI, MBFI); + } }; } // end anonymous namespace @@ -117,6 +145,19 @@ bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *MBFI, PGSOQueryType QueryType) { + assert(MBB); return shouldOptimizeForSizeImpl( MBB, PSI, MBFI, QueryType); } + +bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB, + ProfileSummaryInfo *PSI, + MBFIWrapper *MBFIW, + PGSOQueryType QueryType) { + assert(MBB); + if (!PSI || !MBFIW) + return false; + BlockFrequency BlockFreq = MBFIW->getBlockFreq(MBB); + return shouldOptimizeForSizeImpl( + BlockFreq, PSI, &MBFIW->getMBFI(), QueryType); +} diff --git a/llvm/lib/CodeGen/TailDuplication.cpp b/llvm/lib/CodeGen/TailDuplication.cpp index 648bf48..20892a7 100644 --- a/llvm/lib/CodeGen/TailDuplication.cpp +++ b/llvm/lib/CodeGen/TailDuplication.cpp @@ -31,6 +31,7 @@ namespace { class TailDuplicateBase : public MachineFunctionPass { TailDuplicator Duplicator; + std::unique_ptr MBFIW; bool PreRegAlloc; public: TailDuplicateBase(char &PassID, bool PreRegAlloc) @@ -88,7 +89,10 @@ bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) { auto *MBFI = (PSI && PSI->hasProfileSummary()) ? &getAnalysis().getBFI() : nullptr; - Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI, PSI, /*LayoutMode=*/false); + if (MBFI) + MBFIW = std::make_unique(*MBFI); + Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI ? MBFIW.get() : nullptr, PSI, + /*LayoutMode=*/false); bool MadeChange = false; while (Duplicator.tailDuplicateBlocks()) diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index 45eef7d..87db164 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -80,7 +80,7 @@ static cl::opt TailDupLimit("tail-dup-limit", cl::init(~0U), void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPIin, - const MachineBlockFrequencyInfo *MBFIin, + MBFIWrapper *MBFIin, ProfileSummaryInfo *PSIin, bool LayoutModeIn, unsigned TailDupSizeIn) { MF = &MFin; diff --git a/llvm/lib/Transforms/Utils/SizeOpts.cpp b/llvm/lib/Transforms/Utils/SizeOpts.cpp index d2a4000..a959cc6 100644 --- a/llvm/lib/Transforms/Utils/SizeOpts.cpp +++ b/llvm/lib/Transforms/Utils/SizeOpts.cpp @@ -84,6 +84,7 @@ bool llvm::shouldOptimizeForSize(const Function *F, ProfileSummaryInfo *PSI, bool llvm::shouldOptimizeForSize(const BasicBlock *BB, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, PGSOQueryType QueryType) { + assert(BB); return shouldOptimizeForSizeImpl(BB, PSI, BFI, QueryType); } diff --git a/llvm/test/CodeGen/X86/tail-opts.ll b/llvm/test/CodeGen/X86/tail-opts.ll index 32ad04a..89c26df 100644 --- a/llvm/test/CodeGen/X86/tail-opts.ll +++ b/llvm/test/CodeGen/X86/tail-opts.ll @@ -846,6 +846,72 @@ cont4: ret void } +; This triggers a situation where a new block (bb4 is split) is created and then +; would be passed to the PGSO interface llvm::shouldOptimizeForSize(). +@GV = global i32 0 +define void @bfi_new_block_pgso(i32 %c) nounwind { +; CHECK-LABEL: bfi_new_block_pgso: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: je .LBB14_4 +; CHECK-NEXT: # %bb.1: # %bb1 +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: cmpl $16, %edi +; CHECK-NEXT: je .LBB14_6 +; CHECK-NEXT: # %bb.2: # %bb1 +; CHECK-NEXT: cmpl $17, %edi +; CHECK-NEXT: je .LBB14_7 +; CHECK-NEXT: # %bb.3: # %bb4 +; CHECK-NEXT: popq %rax +; CHECK-NEXT: jmp tail_call_me # TAILCALL +; CHECK-NEXT: .LBB14_4: # %bb5 +; CHECK-NEXT: cmpl $128, %edi +; CHECK-NEXT: jne .LBB14_8 +; CHECK-NEXT: # %bb.5: # %return +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB14_6: # %bb3 +; CHECK-NEXT: movl $0, {{.*}}(%rip) +; CHECK-NEXT: .LBB14_7: # %bb4 +; CHECK-NEXT: callq func +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .LBB14_8: # %bb6 +; CHECK-NEXT: jmp tail_call_me # TAILCALL +entry: + %0 = icmp eq i32 %c, 0 + br i1 %0, label %bb5, label %bb1 + +bb1: + switch i32 %c, label %bb4 [ + i32 16, label %bb3 + i32 17, label %bb2 + ] + +bb2: + call void @func() + br label %bb4 + +bb3: + store i32 0, i32* @GV + call void @func() + br label %bb4 + +bb4: + tail call void @tail_call_me() + br label %return + +bb5: + switch i32 %c, label %bb6 [ + i32 128, label %return + ] + +bb6: + tail call void @tail_call_me() + br label %return + +return: + ret void +} + !llvm.module.flags = !{!0} !0 = !{i32 1, !"ProfileSummary", !1} !1 = !{!2, !3, !4, !5, !6, !7, !8, !9}