From 2eb30fafa5f95d60353909c7c676431f2a29a745 Mon Sep 17 00:00:00 2001 From: Hiroshi Yamauchi Date: Fri, 6 Dec 2019 12:17:32 -0800 Subject: [PATCH] Revert "[PGO][PGSO] Instrument the code gen / target passes." This reverts commit 9a0b5e14075a1f42a72eedb66fd4fde7985d37ac. This seems to break buildbots. --- llvm/include/llvm/CodeGen/AsmPrinter.h | 6 --- llvm/include/llvm/CodeGen/TailDuplicator.h | 6 --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 14 +----- llvm/lib/CodeGen/BranchFolding.cpp | 27 +++------- llvm/lib/CodeGen/BranchFolding.h | 4 -- llvm/lib/CodeGen/CodeGenPrepare.cpp | 58 ++++++++-------------- llvm/lib/CodeGen/ExpandMemCmp.cpp | 39 ++++----------- llvm/lib/CodeGen/IfConversion.cpp | 9 +--- llvm/lib/CodeGen/MachineBlockPlacement.cpp | 26 ++-------- llvm/lib/CodeGen/MachineCombiner.cpp | 23 ++------- llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 11 +--- llvm/lib/CodeGen/TailDuplication.cpp | 10 +--- llvm/lib/CodeGen/TailDuplicator.cpp | 17 ++----- llvm/lib/Target/X86/X86FixupBWInsts.cpp | 16 +----- llvm/lib/Target/X86/X86OptimizeLEAs.cpp | 18 +------ llvm/lib/Target/X86/X86PadShortFunction.cpp | 20 -------- llvm/test/CodeGen/AArch64/O0-pipeline.ll | 5 -- llvm/test/CodeGen/AArch64/O3-pipeline.ll | 10 +--- llvm/test/CodeGen/ARM/O3-pipeline.ll | 7 --- llvm/test/CodeGen/X86/O0-pipeline.ll | 5 -- llvm/test/CodeGen/X86/O3-pipeline.ll | 13 +---- 21 files changed, 61 insertions(+), 283 deletions(-) diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index 49b1907..16298ff 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -48,7 +48,6 @@ class GlobalObject; class GlobalValue; class GlobalVariable; class MachineBasicBlock; -class MachineBlockFrequencyInfo; class MachineConstantPoolValue; class MachineDominatorTree; class MachineFunction; @@ -70,7 +69,6 @@ class MCSymbol; class MCTargetOptions; class MDNode; class Module; -class ProfileSummaryInfo; class raw_ostream; class RemarkStreamer; class StackMaps; @@ -110,10 +108,6 @@ public: /// Optimization remark emitter. MachineOptimizationRemarkEmitter *ORE; - MachineBlockFrequencyInfo *MBFI; - - ProfileSummaryInfo *PSI; - /// The symbol for the current function. This is recalculated at the beginning /// of each call to runOnMachineFunction(). MCSymbol *CurrentFnSym = nullptr; diff --git a/llvm/include/llvm/CodeGen/TailDuplicator.h b/llvm/include/llvm/CodeGen/TailDuplicator.h index e0623a3..358798d 100644 --- a/llvm/include/llvm/CodeGen/TailDuplicator.h +++ b/llvm/include/llvm/CodeGen/TailDuplicator.h @@ -25,13 +25,11 @@ namespace llvm { class MachineBasicBlock; -class MachineBlockFrequencyInfo; class MachineBranchProbabilityInfo; class MachineFunction; class MachineInstr; class MachineModuleInfo; class MachineRegisterInfo; -class ProfileSummaryInfo; class TargetRegisterInfo; /// Utility class to perform tail duplication. @@ -42,8 +40,6 @@ class TailDuplicator { const MachineModuleInfo *MMI; MachineRegisterInfo *MRI; MachineFunction *MF; - const MachineBlockFrequencyInfo *MBFI; - ProfileSummaryInfo *PSI; bool PreRegAlloc; bool LayoutMode; unsigned TailDupSize; @@ -69,8 +65,6 @@ public: /// default implies using the command line value TailDupSize. void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, - const MachineBlockFrequencyInfo *MBFI, - ProfileSummaryInfo *PSI, bool LayoutMode, unsigned TailDupSize = 0); bool tailDuplicateBlocks(); diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index c0b18fc..e2ef415 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -31,16 +31,13 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/GCStrategy.h" -#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -55,7 +52,6 @@ #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" -#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -252,8 +248,6 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addRequired(); - AU.addRequired(); } bool AsmPrinter::doInitialization(Module &M) { @@ -1690,10 +1684,6 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { } ORE = &getAnalysis().getORE(); - PSI = &getAnalysis().getPSI(); - MBFI = (PSI && PSI->hasProfileSummary()) ? - &getAnalysis().getBFI() : - nullptr; } namespace { @@ -2923,10 +2913,8 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB, void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB, MCCodePaddingContext &Context) const { assert(MF != nullptr && "Machine function must be valid"); - bool OptForSize = MF->getFunction().hasOptSize() || - llvm::shouldOptimizeForSize(&MBB, PSI, MBFI); Context.IsPaddingActive = !MF->hasInlineAsm() && - !OptForSize && + !MF->getFunction().hasOptSize() && TM.getOptLevel() != CodeGenOpt::None; Context.IsBasicBlockReachableViaFallthrough = std::find(MBB.pred_begin(), MBB.pred_end(), MBB.getPrevNode()) != diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index 2bf1b39..fbf87a5 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -24,7 +24,6 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -39,7 +38,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -105,7 +103,6 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); - AU.addRequired(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -132,8 +129,7 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { BranchFolder::MBFIWrapper MBBFreqInfo( getAnalysis()); BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo, - getAnalysis(), - &getAnalysis().getPSI()); + getAnalysis()); auto *MMIWP = getAnalysisIfAvailable(); return Folder.OptimizeFunction( MF, MF.getSubtarget().getInstrInfo(), MF.getSubtarget().getRegisterInfo(), @@ -143,10 +139,9 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, MBFIWrapper &FreqInfo, const MachineBranchProbabilityInfo &ProbInfo, - ProfileSummaryInfo *PSI, unsigned MinTailLength) : EnableHoistCommonCode(CommonHoist), MinCommonTailLength(MinTailLength), - MBBFreqInfo(FreqInfo), MBPI(ProbInfo), PSI(PSI) { + MBBFreqInfo(FreqInfo), MBPI(ProbInfo) { if (MinCommonTailLength == 0) MinCommonTailLength = TailMergeSize; switch (FlagEnableTailMerge) { @@ -590,9 +585,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB, MachineBasicBlock *PredBB, DenseMap &EHScopeMembership, - bool AfterPlacement, - BranchFolder::MBFIWrapper &MBBFreqInfo, - ProfileSummaryInfo *PSI) { + bool AfterPlacement) { // It is never profitable to tail-merge blocks from two different EH scopes. if (!EHScopeMembership.empty()) { auto EHScope1 = EHScopeMembership.find(MBB1); @@ -689,11 +682,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, // branch instruction, which is likely to be smaller than the 2 // instructions that would be deleted in the merge. MachineFunction *MF = MBB1->getParent(); - bool OptForSize = - MF->getFunction().hasOptSize() || - (llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo.getMBFI()) && - llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo.getMBFI())); - return EffectiveTailLen >= 2 && OptForSize && + return EffectiveTailLen >= 2 && MF->getFunction().hasOptSize() && (FullBlockTail1 || FullBlockTail2); } @@ -715,7 +704,7 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash, CommonTailLen, TrialBBI1, TrialBBI2, SuccBB, PredBB, EHScopeMembership, - AfterBlockPlacement, MBBFreqInfo, PSI)) { + AfterBlockPlacement)) { if (CommonTailLen > maxCommonTailLength) { SameTails.clear(); maxCommonTailLength = CommonTailLen; @@ -1545,10 +1534,8 @@ ReoptimizeBlock: } } - bool OptForSize = - MF.getFunction().hasOptSize() || - llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo.getMBFI()); - if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && OptForSize) { + if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && + MF.getFunction().hasOptSize()) { // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch // direction, thereby defeating careful block placement and regressing // performance. Therefore, only consider this for optsize functions. diff --git a/llvm/lib/CodeGen/BranchFolding.h b/llvm/lib/CodeGen/BranchFolding.h index 7a4c68e..761ff9c 100644 --- a/llvm/lib/CodeGen/BranchFolding.h +++ b/llvm/lib/CodeGen/BranchFolding.h @@ -27,7 +27,6 @@ class MachineFunction; class MachineLoopInfo; class MachineModuleInfo; class MachineRegisterInfo; -class ProfileSummaryInfo; class raw_ostream; class TargetInstrInfo; class TargetRegisterInfo; @@ -40,7 +39,6 @@ class TargetRegisterInfo; bool CommonHoist, MBFIWrapper &FreqInfo, const MachineBranchProbabilityInfo &ProbInfo, - ProfileSummaryInfo *PSI, // Min tail length to merge. Defaults to commandline // flag. Ignored for optsize. unsigned MinTailLength = 0); @@ -147,7 +145,6 @@ class TargetRegisterInfo; const BlockFrequency Freq) const; void view(const Twine &Name, bool isSimple = true); uint64_t getEntryFreq() const; - const MachineBlockFrequencyInfo &getMBFI() { return MBFI; } private: const MachineBlockFrequencyInfo &MBFI; @@ -157,7 +154,6 @@ class TargetRegisterInfo; private: MBFIWrapper &MBBFreqInfo; const MachineBranchProbabilityInfo &MBPI; - ProfileSummaryInfo *PSI; bool TailMergeBlocks(MachineFunction &MF); bool TryTailMergeBlocks(MachineBasicBlock* SuccBB, diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 3c86a83..a041808 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -90,7 +90,6 @@ #include "llvm/Transforms/Utils/BypassSlowDivision.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" -#include "llvm/Transforms/Utils/SizeOpts.h" #include #include #include @@ -257,7 +256,6 @@ class TypePromotionTransaction; const LoopInfo *LI; std::unique_ptr BFI; std::unique_ptr BPI; - ProfileSummaryInfo *PSI; /// As we scan instructions optimizing them, this is the next instruction /// to optimize. Transforms that can invalidate this should update it. @@ -300,7 +298,7 @@ class TypePromotionTransaction; /// Keep track of SExt promoted. ValueToSExts ValToSExtendedUses; - /// True if the function has the OptSize attribute. + /// True if optimizing for size. bool OptSize; /// DataLayout for the Function being processed. @@ -437,8 +435,10 @@ bool CodeGenPrepare::runOnFunction(Function &F) { LI = &getAnalysis().getLoopInfo(); BPI.reset(new BranchProbabilityInfo(F, *LI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); - PSI = &getAnalysis().getPSI(); OptSize = F.hasOptSize(); + + ProfileSummaryInfo *PSI = + &getAnalysis().getPSI(); if (ProfileGuidedSectionPrefix) { if (PSI->isFunctionHotInCallGraph(&F, *BFI)) F.setSectionPrefix(".hot"); @@ -457,9 +457,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // bypassSlowDivision may create new BBs, but we don't want to reapply the // optimization to those blocks. BasicBlock* Next = BB->getNextNode(); - // F.hasOptSize is already checked in the outer if statement. - if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get())) - EverMadeChange |= bypassSlowDivision(BB, BypassWidths); + EverMadeChange |= bypassSlowDivision(BB, BypassWidths); BB = Next; } } @@ -1940,8 +1938,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { // cold block. This interacts with our handling for loads and stores to // ensure that we can fold all uses of a potential addressing computation // into their uses. TODO: generalize this to work over profiling data - bool OptForSize = OptSize || llvm::shouldOptimizeForSize(BB, PSI, BFI.get()); - if (!OptForSize && CI->hasFnAttr(Attribute::Cold)) + if (!OptSize && CI->hasFnAttr(Attribute::Cold)) for (auto &Arg : CI->arg_operands()) { if (!Arg->getType()->isPointerTy()) continue; @@ -2878,24 +2875,16 @@ class AddressingModeMatcher { /// When true, IsProfitableToFoldIntoAddressingMode always returns true. bool IgnoreProfitability; - /// True if we are optimizing for size. - bool OptSize; - - ProfileSummaryInfo *PSI; - BlockFrequencyInfo *BFI; - AddressingModeMatcher( SmallVectorImpl &AMI, const TargetLowering &TLI, const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI, ExtAddrMode &AM, const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT, - std::pair, int64_t> &LargeOffsetGEP, - bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) + std::pair, int64_t> &LargeOffsetGEP) : AddrModeInsts(AMI), TLI(TLI), TRI(TRI), DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts), - PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP), - OptSize(OptSize), PSI(PSI), BFI(BFI) { + PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP) { IgnoreProfitability = false; } @@ -2913,14 +2902,12 @@ public: const TargetLowering &TLI, const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT, - std::pair, int64_t> &LargeOffsetGEP, - bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { + std::pair, int64_t> &LargeOffsetGEP) { ExtAddrMode Result; bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS, MemoryInst, Result, InsertedInsts, - PromotedInsts, TPT, LargeOffsetGEP, - OptSize, PSI, BFI) + PromotedInsts, TPT, LargeOffsetGEP) .matchAddr(V, 0); (void)Success; assert(Success && "Couldn't select *anything*?"); return Result; @@ -4531,8 +4518,7 @@ static bool FindAllMemoryUses( Instruction *I, SmallVectorImpl> &MemoryUses, SmallPtrSetImpl &ConsideredInsts, const TargetLowering &TLI, - const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI, int SeenInsts = 0) { + const TargetRegisterInfo &TRI, int SeenInsts = 0) { // If we already considered this instruction, we're done. if (!ConsideredInsts.insert(I).second) return false; @@ -4541,6 +4527,8 @@ static bool FindAllMemoryUses( if (!MightBeFoldableInst(I)) return true; + const bool OptSize = I->getFunction()->hasOptSize(); + // Loop over all the uses, recursively processing them. for (Use &U : I->uses()) { // Conservatively return true if we're seeing a large number or a deep chain @@ -4581,9 +4569,7 @@ static bool FindAllMemoryUses( if (CallInst *CI = dyn_cast(UserI)) { // If this is a cold call, we can sink the addressing calculation into // the cold path. See optimizeCallInst - bool OptForSize = OptSize || - llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); - if (!OptForSize && CI->hasFnAttr(Attribute::Cold)) + if (!OptSize && CI->hasFnAttr(Attribute::Cold)) continue; InlineAsm *IA = dyn_cast(CI->getCalledValue()); @@ -4595,8 +4581,8 @@ static bool FindAllMemoryUses( continue; } - if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, - PSI, BFI, SeenInsts)) + if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, + SeenInsts)) return true; } @@ -4684,8 +4670,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, // the use is just a particularly nice way of sinking it. SmallVector, 16> MemoryUses; SmallPtrSet ConsideredInsts; - if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, - PSI, BFI)) + if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI)) return false; // Has a non-memory, non-foldable use! // Now that we know that all uses of this instruction are part of a chain of @@ -4721,7 +4706,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, TPT.getRestorationPoint(); AddressingModeMatcher Matcher( MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result, - InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, BFI); + InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP); Matcher.IgnoreProfitability = true; bool Success = Matcher.matchAddr(Address, 0); (void)Success; assert(Success && "Couldn't select *anything*?"); @@ -4827,8 +4812,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, 0); ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI, - InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, - BFI.get()); + InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP); GetElementPtrInst *GEP = LargeOffsetGEP.first; if (GEP && !NewGEPBases.count(GEP)) { @@ -6046,9 +6030,7 @@ bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) { /// turn it into a branch. bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { // If branch conversion isn't desirable, exit early. - if (DisableSelectToBranch || - OptSize || llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get()) || - !TLI) + if (DisableSelectToBranch || OptSize || !TLI) return false; // Find all consecutive select instructions that share the same condition. diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp index a1adf4e..f49b882 100644 --- a/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -13,8 +13,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/LazyBlockFrequencyInfo.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -23,7 +21,6 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/InitializePasses.h" -#include "llvm/Transforms/Utils/SizeOpts.h" using namespace llvm; @@ -724,8 +721,7 @@ Value *MemCmpExpansion::getMemCmpExpansion() { /// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ] /// ret i32 %phi.res static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, - const TargetLowering *TLI, const DataLayout *DL, - ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { + const TargetLowering *TLI, const DataLayout *DL) { NumMemCmpCalls++; // Early exit from expansion if -Oz. @@ -746,20 +742,18 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, // TTI call to check if target would like to expand memcmp. Also, get the // available load sizes. const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); - bool OptForSize = CI->getFunction()->hasOptSize() || - llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); - auto Options = TTI->enableMemCmpExpansion(OptForSize, + auto Options = TTI->enableMemCmpExpansion(CI->getFunction()->hasOptSize(), IsUsedForZeroCmp); if (!Options) return false; if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences()) Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock; - if (OptForSize && + if (CI->getFunction()->hasOptSize() && MaxLoadsPerMemcmpOptSize.getNumOccurrences()) Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize; - if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences()) + if (!CI->getFunction()->hasOptSize() && MaxLoadsPerMemcmp.getNumOccurrences()) Options.MaxNumLoads = MaxLoadsPerMemcmp; MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL); @@ -805,11 +799,7 @@ public: &getAnalysis().getTLI(F); const TargetTransformInfo *TTI = &getAnalysis().getTTI(F); - auto *PSI = &getAnalysis().getPSI(); - auto *BFI = (PSI && PSI->hasProfileSummary()) ? - &getAnalysis().getBFI() : - nullptr; - auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI); + auto PA = runImpl(F, TLI, TTI, TL); return !PA.areAllPreserved(); } @@ -817,26 +807,22 @@ private: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); - AU.addRequired(); - LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); FunctionPass::getAnalysisUsage(AU); } PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, - const TargetLowering* TL, - ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI); + const TargetLowering* TL); // Returns true if a change was made. bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, const TargetLowering* TL, - const DataLayout& DL, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI); + const DataLayout& DL); }; bool ExpandMemCmpPass::runOnBlock( BasicBlock &BB, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, const TargetLowering* TL, - const DataLayout& DL, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { + const DataLayout& DL) { for (Instruction& I : BB) { CallInst *CI = dyn_cast(&I); if (!CI) { @@ -845,7 +831,7 @@ bool ExpandMemCmpPass::runOnBlock( LibFunc Func; if (TLI->getLibFunc(ImmutableCallSite(CI), Func) && (Func == LibFunc_memcmp || Func == LibFunc_bcmp) && - expandMemCmp(CI, TTI, TL, &DL, PSI, BFI)) { + expandMemCmp(CI, TTI, TL, &DL)) { return true; } } @@ -855,12 +841,11 @@ bool ExpandMemCmpPass::runOnBlock( PreservedAnalyses ExpandMemCmpPass::runImpl( Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, - const TargetLowering* TL, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI) { + const TargetLowering* TL) { const DataLayout& DL = F.getParent()->getDataLayout(); bool MadeChanges = false; for (auto BBIt = F.begin(); BBIt != F.end();) { - if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI)) { + if (runOnBlock(*BBIt, TLI, TTI, TL, DL)) { MadeChanges = true; // If changes were made, restart the function from the beginning, since // the structure of the function was changed. @@ -879,8 +864,6 @@ INITIALIZE_PASS_BEGIN(ExpandMemCmpPass, "expandmemcmp", "Expand memcmp() to load/stores", false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass) -INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(ExpandMemCmpPass, "expandmemcmp", "Expand memcmp() to load/stores", false, false) diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp index 7d64828..14485a2 100644 --- a/llvm/lib/CodeGen/IfConversion.cpp +++ b/llvm/lib/CodeGen/IfConversion.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/SparseSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -214,7 +213,6 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); - AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -436,7 +434,6 @@ char &llvm::IfConverterID = IfConverter::ID; INITIALIZE_PASS_BEGIN(IfConverter, DEBUG_TYPE, "If Converter", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) -INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(IfConverter, DEBUG_TYPE, "If Converter", false, false) bool IfConverter::runOnMachineFunction(MachineFunction &MF) { @@ -449,8 +446,6 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TRI = ST.getRegisterInfo(); BranchFolder::MBFIWrapper MBFI(getAnalysis()); MBPI = &getAnalysis(); - ProfileSummaryInfo *PSI = - &getAnalysis().getPSI(); MRI = &MF.getRegInfo(); SchedModel.init(&ST); @@ -461,7 +456,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { bool BFChange = false; if (!PreRegAlloc) { // Tail merge tend to expose more if-conversion opportunities. - BranchFolder BF(true, false, MBFI, *MBPI, PSI); + BranchFolder BF(true, false, MBFI, *MBPI); auto *MMIWP = getAnalysisIfAvailable(); BFChange = BF.OptimizeFunction( MF, TII, ST.getRegisterInfo(), @@ -603,7 +598,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { BBAnalysis.clear(); if (MadeChange && IfCvtBranchFold) { - BranchFolder BF(false, false, MBFI, *MBPI, PSI); + BranchFolder BF(false, false, MBFI, *MBPI); auto *MMIWP = getAnalysisIfAvailable(); BF.OptimizeFunction( MF, TII, MF.getSubtarget().getRegisterInfo(), diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 30b98ec..c2d9d1b 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -33,7 +33,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" @@ -42,7 +41,6 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachinePostDominators.h" -#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/TailDuplicator.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -365,8 +363,6 @@ class MachineBlockPlacement : public MachineFunctionPass { /// A handle to the post dominator tree. MachinePostDominatorTree *MPDT; - ProfileSummaryInfo *PSI; - /// Duplicator used to duplicate tails during placement. /// /// Placement decisions can open up new tail duplication opportunities, but @@ -542,7 +538,6 @@ public: if (TailDupPlacement) AU.addRequired(); AU.addRequired(); - AU.addRequired(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -560,7 +555,6 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE, "Branch Probability Basic Block Placement", false, false) @@ -2081,10 +2075,7 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L, // i.e. when the layout predecessor does not fallthrough to the loop header. // In practice this never happens though: there always seems to be a preheader // that can fallthrough and that is also placed before the header. - bool OptForSize = F->getFunction().hasOptSize() || - llvm::shouldOptimizeForSize(L.getHeader(), PSI, - &MBFI->getMBFI()); - if (OptForSize) + if (F->getFunction().hasOptSize()) return L.getHeader(); MachineBasicBlock *OldTop = nullptr; @@ -2840,11 +2831,6 @@ void MachineBlockPlacement::alignBlocks() { if (Freq < (LoopHeaderFreq * ColdProb)) continue; - // If the global profiles indicates so, don't align it. - if (llvm::shouldOptimizeForSize(ChainBB, PSI, &MBFI->getMBFI()) && - !TLI->alignLoopsWithOptSize()) - continue; - // Check for the existence of a non-layout predecessor which would benefit // from aligning this block. MachineBasicBlock *LayoutPred = @@ -3052,7 +3038,6 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); TLI = MF.getSubtarget().getTargetLowering(); MPDT = nullptr; - PSI = &getAnalysis().getPSI(); // Initialize PreferredLoopExit to nullptr here since it may never be set if // there are no MachineLoops. @@ -3083,13 +3068,10 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (allowTailDupPlacement()) { MPDT = &getAnalysis(); - bool OptForSize = MF.getFunction().hasOptSize() || - llvm::shouldOptimizeForSize(&MF, PSI, &MBFI->getMBFI()); - if (OptForSize) + if (MF.getFunction().hasOptSize()) TailDupSize = 1; bool PreRegAlloc = false; - TailDup.initMF(MF, PreRegAlloc, MBPI, &MBFI->getMBFI(), PSI, - /* LayoutMode */ true, TailDupSize); + TailDup.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ true, TailDupSize); precomputeTriangleChains(); } @@ -3105,7 +3087,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (MF.size() > 3 && EnableTailMerge) { unsigned TailMergeSize = TailDupSize + 1; BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI, - *MBPI, PSI, TailMergeSize); + *MBPI, TailMergeSize); auto *MMIWP = getAnalysisIfAvailable(); if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp index 73895bd..70c06c8 100644 --- a/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/llvm/lib/CodeGen/MachineCombiner.cpp @@ -12,14 +12,11 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -70,8 +67,6 @@ class MachineCombiner : public MachineFunctionPass { MachineLoopInfo *MLI; // Current MachineLoopInfo MachineTraceMetrics *Traces; MachineTraceMetrics::Ensemble *MinInstr; - MachineBlockFrequencyInfo *MBFI; - ProfileSummaryInfo *PSI; TargetSchedModel TSchedModel; @@ -88,7 +83,7 @@ public: StringRef getPassName() const override { return "Machine InstCombiner"; } private: - bool doSubstitute(unsigned NewSize, unsigned OldSize, bool OptForSize); + bool doSubstitute(unsigned NewSize, unsigned OldSize); bool combineInstructions(MachineBasicBlock *); MachineInstr *getOperandDef(const MachineOperand &MO); unsigned getDepth(SmallVectorImpl &InsInstrs, @@ -137,8 +132,6 @@ void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - AU.addRequired(); - AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -416,9 +409,8 @@ bool MachineCombiner::preservesResourceLen( /// \returns true when new instruction sequence should be generated /// independent if it lengthens critical path or not -bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize, - bool OptForSize) { - if (OptForSize && (NewSize < OldSize)) +bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) { + if (OptSize && (NewSize < OldSize)) return true; if (!TSchedModel.hasInstrSchedModelOrItineraries()) return true; @@ -516,8 +508,6 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { SparseSet RegUnits; RegUnits.setUniverse(TRI->getNumRegUnits()); - bool OptForSize = OptSize || llvm::shouldOptimizeForSize(MBB, PSI, MBFI); - while (BlockIter != MBB->end()) { auto &MI = *BlockIter++; SmallVector Patterns; @@ -594,8 +584,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { // fewer instructions OR // the new sequence neither lengthens the critical path nor increases // resource pressure. - if (SubstituteAlways || - doSubstitute(NewInstCount, OldInstCount, OptForSize)) { + if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount)) { insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr, RegUnits, IncrementalUpdate); // Eagerly stop after the first pattern fires. @@ -650,10 +639,6 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); MLI = &getAnalysis(); Traces = &getAnalysis(); - PSI = &getAnalysis().getPSI(); - MBFI = (PSI && PSI->hasProfileSummary()) ? - &getAnalysis().getBFI() : - nullptr; MinInstr = nullptr; OptSize = MF.getFunction().hasOptSize(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index f2ddb1f..b16d4af 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -27,10 +27,8 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" -#include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/FastISel.h" @@ -336,8 +334,6 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); if (UseMBPI && OptLevel != CodeGenOpt::None) AU.addRequired(); - AU.addRequired(); - LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -440,17 +436,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; auto *LIWP = getAnalysisIfAvailable(); LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; - auto *PSI = &getAnalysis().getPSI(); - auto *BFI = (PSI && PSI->hasProfileSummary()) ? - &getAnalysis().getBFI() : - nullptr; LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); SplitCriticalSideEffectEdges(const_cast(Fn), DT, LI); CurDAG->init(*MF, *ORE, this, LibInfo, - getAnalysisIfAvailable(), PSI, BFI); + getAnalysisIfAvailable(), + nullptr, nullptr); FuncInfo->set(Fn, *MF, CurDAG); SwiftError->setFunction(*MF); diff --git a/llvm/lib/CodeGen/TailDuplication.cpp b/llvm/lib/CodeGen/TailDuplication.cpp index 9428774..b4d0a30 100644 --- a/llvm/lib/CodeGen/TailDuplication.cpp +++ b/llvm/lib/CodeGen/TailDuplication.cpp @@ -12,8 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -40,8 +38,6 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); - AU.addRequired(); - AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } }; @@ -79,11 +75,7 @@ bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) { return false; auto MBPI = &getAnalysis(); - auto *PSI = &getAnalysis().getPSI(); - auto *MBFI = (PSI && PSI->hasProfileSummary()) ? - &getAnalysis().getBFI() : - nullptr; - Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI, PSI, /*LayoutMode=*/false); + Duplicator.initMF(MF, PreRegAlloc, MBPI, /*LayoutMode=*/false); bool MadeChange = false; while (Duplicator.tailDuplicateBlocks()) diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index cd1278f..03c68a3 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -19,16 +19,13 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" -#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/MachineSSAUpdater.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -80,8 +77,6 @@ static cl::opt TailDupLimit("tail-dup-limit", cl::init(~0U), void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPIin, - const MachineBlockFrequencyInfo *MBFIin, - ProfileSummaryInfo *PSIin, bool LayoutModeIn, unsigned TailDupSizeIn) { MF = &MFin; TII = MF->getSubtarget().getInstrInfo(); @@ -89,8 +84,6 @@ void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc, MRI = &MF->getRegInfo(); MMI = &MF->getMMI(); MBPI = MBPIin; - MBFI = MBFIin; - PSI = PSIin; TailDupSize = TailDupSizeIn; assert(MBPI != nullptr && "Machine Branch Probability Info required"); @@ -562,14 +555,14 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. unsigned MaxDuplicateCount; - bool OptForSize = MF->getFunction().hasOptSize() || - llvm::shouldOptimizeForSize(&TailBB, PSI, MBFI); - if (TailDupSize == 0) + if (TailDupSize == 0 && + TailDuplicateSize.getNumOccurrences() == 0 && + MF->getFunction().hasOptSize()) + MaxDuplicateCount = 1; + else if (TailDupSize == 0) MaxDuplicateCount = TailDuplicateSize; else MaxDuplicateCount = TailDupSize; - if (OptForSize) - MaxDuplicateCount = 1; // If the block to be duplicated ends in an unanalyzable fallthrough, don't // duplicate it. diff --git a/llvm/lib/Target/X86/X86FixupBWInsts.cpp b/llvm/lib/Target/X86/X86FixupBWInsts.cpp index f8c4a2a..11dd690 100644 --- a/llvm/lib/Target/X86/X86FixupBWInsts.cpp +++ b/llvm/lib/Target/X86/X86FixupBWInsts.cpp @@ -48,14 +48,11 @@ #include "X86InstrInfo.h" #include "X86Subtarget.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Debug.h" @@ -116,8 +113,6 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); // Machine loop info is used to // guide some heuristics. - AU.addRequired(); - AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -145,9 +140,6 @@ private: /// Register Liveness information after the current instruction. LivePhysRegs LiveRegs; - - ProfileSummaryInfo *PSI; - MachineBlockFrequencyInfo *MBFI; }; char FixupBWInstPass::ID = 0; } @@ -162,11 +154,8 @@ bool FixupBWInstPass::runOnMachineFunction(MachineFunction &MF) { this->MF = &MF; TII = MF.getSubtarget().getInstrInfo(); + OptForSize = MF.getFunction().hasOptSize(); MLI = &getAnalysis(); - PSI = &getAnalysis().getPSI(); - MBFI = (PSI && PSI->hasProfileSummary()) ? - &getAnalysis().getBFI() : - nullptr; LiveRegs.init(TII->getRegisterInfo()); LLVM_DEBUG(dbgs() << "Start X86FixupBWInsts\n";); @@ -437,9 +426,6 @@ void FixupBWInstPass::processBasicBlock(MachineFunction &MF, // We run after PEI, so we need to AddPristinesAndCSRs. LiveRegs.addLiveOuts(MBB); - OptForSize = MF.getFunction().hasOptSize() || - llvm::shouldOptimizeForSize(&MBB, PSI, MBFI); - for (auto I = MBB.rbegin(); I != MBB.rend(); ++I) { MachineInstr *MI = &*I; diff --git a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp index 0c791b6..8fa553f 100644 --- a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp +++ b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp @@ -25,8 +25,6 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -34,7 +32,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -250,12 +247,6 @@ public: static char ID; - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - MachineFunctionPass::getAnalysisUsage(AU); - } - private: using MemOpMap = DenseMap>; @@ -690,11 +681,6 @@ bool X86OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); - auto *PSI = - &getAnalysis().getPSI(); - auto *MBFI = (PSI && PSI->hasProfileSummary()) ? - &getAnalysis().getBFI() : - nullptr; // Process all basic blocks. for (auto &MBB : MF) { @@ -713,9 +699,7 @@ bool X86OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { // Remove redundant address calculations. Do it only for -Os/-Oz since only // a code size gain is expected from this part of the pass. - bool OptForSize = MF.getFunction().hasOptSize() || - llvm::shouldOptimizeForSize(&MBB, PSI, MBFI); - if (OptForSize) + if (MF.getFunction().hasOptSize()) Changed |= removeRedundantAddrCalc(LEAs); } diff --git a/llvm/lib/Target/X86/X86PadShortFunction.cpp b/llvm/lib/Target/X86/X86PadShortFunction.cpp index 4c6bd0c..af974c8 100644 --- a/llvm/lib/Target/X86/X86PadShortFunction.cpp +++ b/llvm/lib/Target/X86/X86PadShortFunction.cpp @@ -17,11 +17,8 @@ #include "X86InstrInfo.h" #include "X86Subtarget.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/IR/Function.h" @@ -55,12 +52,6 @@ namespace { bool runOnMachineFunction(MachineFunction &MF) override; - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - MachineFunctionPass::getAnalysisUsage(AU); - } - MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs); @@ -114,12 +105,6 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) { TSM.init(&MF.getSubtarget()); - auto *PSI = - &getAnalysis().getPSI(); - auto *MBFI = (PSI && PSI->hasProfileSummary()) ? - &getAnalysis().getBFI() : - nullptr; - // Search through basic blocks and mark the ones that have early returns ReturnBBs.clear(); VisitedBBs.clear(); @@ -133,11 +118,6 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) { MachineBasicBlock *MBB = I->first; unsigned Cycles = I->second; - // Function::hasOptSize is already checked above. - bool OptForSize = llvm::shouldOptimizeForSize(MBB, PSI, MBFI); - if (OptForSize) - continue; - if (Cycles < Threshold) { // BB ends in a return. Skip over any DBG_VALUE instructions // trailing the terminator. diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll index 07170fa..0fe214d 100644 --- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll @@ -11,7 +11,6 @@ ; CHECK-NEXT: Scoped NoAlias Alias Analysis ; CHECK-NEXT: Assumption Cache Tracker ; CHECK-NEXT: Create Garbage Collector Module Metadata -; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Machine Branch Probability Analysis ; CHECK-NEXT: ModulePass Manager ; CHECK-NEXT: Pre-ISel Intrinsic Lowering @@ -46,10 +45,6 @@ ; CHECK-NEXT: Analysis for ComputingKnownBits ; CHECK-NEXT: InstructionSelect ; CHECK-NEXT: ResetMachineFunction -; CHECK-NEXT: Dominator Tree Construction -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: AArch64 Instruction Selection ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions ; CHECK-NEXT: Local Stack Slot Allocation diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index 96f39f0..e1ab678 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -10,8 +10,8 @@ ; CHECK-NEXT: Assumption Cache Tracker ; CHECK-NEXT: Type-Based Alias Analysis ; CHECK-NEXT: Scoped NoAlias Alias Analysis -; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Create Garbage Collector Module Metadata +; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Machine Branch Probability Analysis ; CHECK-NEXT: ModulePass Manager ; CHECK-NEXT: Pre-ISel Intrinsic Lowering @@ -35,9 +35,6 @@ ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Merge contiguous icmps into a memcmp -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Expand memcmp() to load/stores ; CHECK-NEXT: Lower Garbage Collection Instructions ; CHECK-NEXT: Shadow Stack GC Lowering @@ -81,13 +78,10 @@ ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Branch Probability Analysis -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: AArch64 Instruction Selection ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: AArch64 Local Dynamic TLS Access Clean-up ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions -; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Early Tail Duplication ; CHECK-NEXT: Optimize machine instruction PHIs ; CHECK-NEXT: Slot index numbering @@ -99,7 +93,6 @@ ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: Machine Trace Metrics ; CHECK-NEXT: AArch64 Conditional Compares -; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine InstCombiner ; CHECK-NEXT: AArch64 Conditional Branch Tuning ; CHECK-NEXT: Machine Trace Metrics @@ -156,7 +149,6 @@ ; CHECK-NEXT: Shrink Wrapping analysis ; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization ; CHECK-NEXT: Control Flow Optimizer -; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Tail Duplication ; CHECK-NEXT: Machine Copy Propagation Pass ; CHECK-NEXT: Post-RA pseudo instruction expansion pass diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll index 4188ce7..3fd35bd 100644 --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -19,9 +19,6 @@ ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Merge contiguous icmps into a memcmp -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Expand memcmp() to load/stores ; CHECK-NEXT: Lower Garbage Collection Instructions ; CHECK-NEXT: Shadow Stack GC Lowering @@ -70,11 +67,8 @@ ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Branch Probability Analysis -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: ARM Instruction Selection ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions -; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Early Tail Duplication ; CHECK-NEXT: Optimize machine instruction PHIs ; CHECK-NEXT: Slot index numbering @@ -130,7 +124,6 @@ ; CHECK-NEXT: Shrink Wrapping analysis ; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization ; CHECK-NEXT: Control Flow Optimizer -; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Tail Duplication ; CHECK-NEXT: Machine Copy Propagation Pass ; CHECK-NEXT: Post-RA pseudo instruction expansion pass diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll index 33ecad6..b6e1e45 100644 --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -14,7 +14,6 @@ ; CHECK-NEXT: Scoped NoAlias Alias Analysis ; CHECK-NEXT: Assumption Cache Tracker ; CHECK-NEXT: Create Garbage Collector Module Metadata -; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Machine Branch Probability Analysis ; CHECK-NEXT: ModulePass Manager ; CHECK-NEXT: Pre-ISel Intrinsic Lowering @@ -38,10 +37,6 @@ ; CHECK-NEXT: Safe Stack instrumentation pass ; CHECK-NEXT: Insert stack protectors ; CHECK-NEXT: Module Verifier -; CHECK-NEXT: Dominator Tree Construction -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: X86 DAG->DAG Instruction Selection ; CHECK-NEXT: X86 PIC Global Base Reg Initialization ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions diff --git a/llvm/test/CodeGen/X86/O3-pipeline.ll b/llvm/test/CodeGen/X86/O3-pipeline.ll index 575b704..389c21f 100644 --- a/llvm/test/CodeGen/X86/O3-pipeline.ll +++ b/llvm/test/CodeGen/X86/O3-pipeline.ll @@ -13,8 +13,8 @@ ; CHECK-NEXT: Type-Based Alias Analysis ; CHECK-NEXT: Scoped NoAlias Alias Analysis ; CHECK-NEXT: Assumption Cache Tracker -; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Create Garbage Collector Module Metadata +; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Machine Branch Probability Analysis ; CHECK-NEXT: ModulePass Manager ; CHECK-NEXT: Pre-ISel Intrinsic Lowering @@ -32,9 +32,6 @@ ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Merge contiguous icmps into a memcmp -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Expand memcmp() to load/stores ; CHECK-NEXT: Lower Garbage Collection Instructions ; CHECK-NEXT: Shadow Stack GC Lowering @@ -67,15 +64,12 @@ ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Branch Probability Analysis -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: X86 DAG->DAG Instruction Selection ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Local Dynamic TLS Access Clean-up ; CHECK-NEXT: X86 PIC Global Base Reg Initialization ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions ; CHECK-NEXT: X86 Domain Reassignment Pass -; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Early Tail Duplication ; CHECK-NEXT: Optimize machine instruction PHIs ; CHECK-NEXT: Slot index numbering @@ -86,7 +80,6 @@ ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: Machine Trace Metrics ; CHECK-NEXT: Early If-Conversion -; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine InstCombiner ; CHECK-NEXT: X86 cmov Conversion ; CHECK-NEXT: MachineDominator Tree Construction @@ -101,7 +94,6 @@ ; CHECK-NEXT: Remove dead machine instructions ; CHECK-NEXT: Live Range Shrink ; CHECK-NEXT: X86 Fixup SetCC -; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: X86 LEA Optimize ; CHECK-NEXT: X86 Optimize Call Frame ; CHECK-NEXT: X86 Avoid Store Forwarding Block @@ -147,7 +139,6 @@ ; CHECK-NEXT: Shrink Wrapping analysis ; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization ; CHECK-NEXT: Control Flow Optimizer -; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Tail Duplication ; CHECK-NEXT: Machine Copy Propagation Pass ; CHECK-NEXT: Post-RA pseudo instruction expansion pass @@ -166,9 +157,7 @@ ; CHECK-NEXT: X86 vzeroupper inserter ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction -; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: X86 Byte/Word Instruction Fixup -; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: X86 Atom pad short functions ; CHECK-NEXT: X86 LEA Fixup ; CHECK-NEXT: Compressing EVEX instrs to VEX encoding when possible -- 2.7.4