class MachineBasicBlock;
class MachineBlockFrequencyInfo;
class MachineFunction;
+class MBFIWrapper;
/// Returns true if machine function \p MF is suggested to be size-optimized
/// based on the profile.
ProfileSummaryInfo *PSI,
const MachineBlockFrequencyInfo *MBFI,
PGSOQueryType QueryType = PGSOQueryType::Other);
+/// Returns true if machine basic block \p MBB is suggested to be size-optimized
+/// based on the profile.
+bool shouldOptimizeForSize(const MachineBasicBlock *MBB,
+ ProfileSummaryInfo *PSI,
+ MBFIWrapper *MBFIWrapper,
+ PGSOQueryType QueryType = PGSOQueryType::Other);
} // end namespace llvm
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include <utility>
#include <vector>
const MachineModuleInfo *MMI;
MachineRegisterInfo *MRI;
MachineFunction *MF;
- const MachineBlockFrequencyInfo *MBFI;
+ MBFIWrapper *MBFI;
ProfileSummaryInfo *PSI;
bool PreRegAlloc;
bool LayoutMode;
/// default implies using the command line value TailDupSize.
void initMF(MachineFunction &MF, bool PreRegAlloc,
const MachineBranchProbabilityInfo *MBPI,
- const MachineBlockFrequencyInfo *MBFI,
+ MBFIWrapper *MBFI,
ProfileSummaryInfo *PSI,
bool LayoutMode, unsigned TailDupSize = 0);
F, PSI, *BFI);
}
-template<typename AdapterT, typename BlockT, typename BFIT>
-bool shouldOptimizeForSizeImpl(const BlockT *BB, ProfileSummaryInfo *PSI,
+template<typename AdapterT, typename BlockTOrBlockFreq, typename BFIT>
+bool shouldOptimizeForSizeImpl(BlockTOrBlockFreq BBOrBlockFreq, ProfileSummaryInfo *PSI,
BFIT *BFI, PGSOQueryType QueryType) {
- assert(BB);
if (!PSI || !BFI || !PSI->hasProfileSummary())
return false;
if (ForcePGSO)
if (PGSOColdCodeOnly ||
(PGSOLargeWorkingSetSizeOnly && !PSI->hasLargeWorkingSetSize())) {
// Even if the working set size isn't large, size-optimize cold code.
- return AdapterT::isColdBlock(BB, PSI, BFI);
+ return AdapterT::isColdBlock(BBOrBlockFreq, PSI, BFI);
}
return !AdapterT::isHotBlockNthPercentile(
PSI->hasSampleProfile() ? PgsoCutoffSampleProf : PgsoCutoffInstrProf,
- BB, PSI, BFI);
+ BBOrBlockFreq, PSI, BFI);
}
/// Returns true if function \p F is suggested to be size-optimized based on the
MachineFunction *MF = MBB1->getParent();
bool OptForSize =
MF->getFunction().hasOptSize() ||
- (llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo.getMBFI()) &&
- llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo.getMBFI()));
+ (llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo) &&
+ llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo));
return EffectiveTailLen >= 2 && OptForSize &&
(FullBlockTail1 || FullBlockTail2);
}
bool OptForSize =
MF.getFunction().hasOptSize() ||
- llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo.getMBFI());
+ llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo);
if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && OptForSize) {
// Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
// direction, thereby defeating careful block placement and regressing
// In practice this never happens though: there always seems to be a preheader
// that can fallthrough and that is also placed before the header.
bool OptForSize = F->getFunction().hasOptSize() ||
- llvm::shouldOptimizeForSize(L.getHeader(), PSI,
- &MBFI->getMBFI());
+ llvm::shouldOptimizeForSize(L.getHeader(), PSI, MBFI.get());
if (OptForSize)
return L.getHeader();
continue;
// If the global profiles indicates so, don't align it.
- if (llvm::shouldOptimizeForSize(ChainBB, PSI, &MBFI->getMBFI()) &&
+ if (llvm::shouldOptimizeForSize(ChainBB, PSI, MBFI.get()) &&
!TLI->alignLoopsWithOptSize())
continue;
if (OptForSize)
TailDupSize = 1;
bool PreRegAlloc = false;
- TailDup.initMF(MF, PreRegAlloc, MBPI, &MBFI->getMBFI(), PSI,
+ TailDup.initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI,
/* LayoutMode */ true, TailDupSize);
precomputeTriangleChains();
}
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineSizeOpts.h"
+#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
return Count && PSI->isColdCount(*Count);
}
+bool isColdBlock(BlockFrequency BlockFreq,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency());
+ return Count && PSI->isColdCount(*Count);
+}
+
/// Like ProfileSummaryInfo::isHotBlockNthPercentile but for MachineBasicBlock.
static bool isHotBlockNthPercentile(int PercentileCutoff,
const MachineBasicBlock *MBB,
return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count);
}
+static bool isHotBlockNthPercentile(int PercentileCutoff,
+ BlockFrequency BlockFreq,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency());
+ return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count);
+}
+
/// Like ProfileSummaryInfo::isFunctionColdInCallGraph but for
/// MachineFunction.
bool isFunctionColdInCallGraph(
const MachineBlockFrequencyInfo *MBFI) {
return machine_size_opts_detail::isColdBlock(MBB, PSI, MBFI);
}
+ static bool isColdBlock(BlockFrequency BlockFreq,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ return machine_size_opts_detail::isColdBlock(BlockFreq, PSI, MBFI);
+ }
static bool isHotBlockNthPercentile(int CutOff,
const MachineBasicBlock *MBB,
ProfileSummaryInfo *PSI,
return machine_size_opts_detail::isHotBlockNthPercentile(
CutOff, MBB, PSI, MBFI);
}
+ static bool isHotBlockNthPercentile(int CutOff,
+ BlockFrequency BlockFreq,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ return machine_size_opts_detail::isHotBlockNthPercentile(
+ CutOff, BlockFreq, PSI, MBFI);
+ }
};
} // end anonymous namespace
ProfileSummaryInfo *PSI,
const MachineBlockFrequencyInfo *MBFI,
PGSOQueryType QueryType) {
+ assert(MBB);
return shouldOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
MBB, PSI, MBFI, QueryType);
}
+
+bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
+ ProfileSummaryInfo *PSI,
+ MBFIWrapper *MBFIW,
+ PGSOQueryType QueryType) {
+ assert(MBB);
+ if (!PSI || !MBFIW)
+ return false;
+ BlockFrequency BlockFreq = MBFIW->getBlockFreq(MBB);
+ return shouldOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
+ BlockFreq, PSI, &MBFIW->getMBFI(), QueryType);
+}
class TailDuplicateBase : public MachineFunctionPass {
TailDuplicator Duplicator;
+ std::unique_ptr<MBFIWrapper> MBFIW;
bool PreRegAlloc;
public:
TailDuplicateBase(char &PassID, bool PreRegAlloc)
auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
&getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
nullptr;
- Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI, PSI, /*LayoutMode=*/false);
+ if (MBFI)
+ MBFIW = std::make_unique<MBFIWrapper>(*MBFI);
+ Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI ? MBFIW.get() : nullptr, PSI,
+ /*LayoutMode=*/false);
bool MadeChange = false;
while (Duplicator.tailDuplicateBlocks())
void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
const MachineBranchProbabilityInfo *MBPIin,
- const MachineBlockFrequencyInfo *MBFIin,
+ MBFIWrapper *MBFIin,
ProfileSummaryInfo *PSIin,
bool LayoutModeIn, unsigned TailDupSizeIn) {
MF = &MFin;
bool llvm::shouldOptimizeForSize(const BasicBlock *BB, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI,
PGSOQueryType QueryType) {
+ assert(BB);
return shouldOptimizeForSizeImpl<BasicBlockBFIAdapter>(BB, PSI, BFI,
QueryType);
}
ret void
}
+; This triggers a situation where a new block (bb4 is split) is created and then
+; would be passed to the PGSO interface llvm::shouldOptimizeForSize().
+@GV = global i32 0
+define void @bfi_new_block_pgso(i32 %c) nounwind {
+; CHECK-LABEL: bfi_new_block_pgso:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: je .LBB14_4
+; CHECK-NEXT: # %bb.1: # %bb1
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: cmpl $16, %edi
+; CHECK-NEXT: je .LBB14_6
+; CHECK-NEXT: # %bb.2: # %bb1
+; CHECK-NEXT: cmpl $17, %edi
+; CHECK-NEXT: je .LBB14_7
+; CHECK-NEXT: # %bb.3: # %bb4
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: jmp tail_call_me # TAILCALL
+; CHECK-NEXT: .LBB14_4: # %bb5
+; CHECK-NEXT: cmpl $128, %edi
+; CHECK-NEXT: jne .LBB14_8
+; CHECK-NEXT: # %bb.5: # %return
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB14_6: # %bb3
+; CHECK-NEXT: movl $0, {{.*}}(%rip)
+; CHECK-NEXT: .LBB14_7: # %bb4
+; CHECK-NEXT: callq func
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: .LBB14_8: # %bb6
+; CHECK-NEXT: jmp tail_call_me # TAILCALL
+entry:
+ %0 = icmp eq i32 %c, 0
+ br i1 %0, label %bb5, label %bb1
+
+bb1:
+ switch i32 %c, label %bb4 [
+ i32 16, label %bb3
+ i32 17, label %bb2
+ ]
+
+bb2:
+ call void @func()
+ br label %bb4
+
+bb3:
+ store i32 0, i32* @GV
+ call void @func()
+ br label %bb4
+
+bb4:
+ tail call void @tail_call_me()
+ br label %return
+
+bb5:
+ switch i32 %c, label %bb6 [
+ i32 128, label %return
+ ]
+
+bb6:
+ tail call void @tail_call_me()
+ br label %return
+
+return:
+ ret void
+}
+
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}