From: Daniel Jasper Date: Tue, 11 Oct 2016 07:36:11 +0000 (+0000) Subject: Revert "Codegen: Tail-duplicate during placement." X-Git-Tag: llvmorg-4.0.0-rc1~7534 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=0c42dc4784c77adf363995fe3962cbd2da2ac4d0;p=platform%2Fupstream%2Fllvm.git Revert "Codegen: Tail-duplicate during placement." This reverts commit r283842. test/CodeGen/X86/tail-dup-repeat.ll causes and llc crash with our internal testing. I'll share a link with you. llvm-svn: 283857 --- diff --git a/llvm/include/llvm/Analysis/LoopInfoImpl.h b/llvm/include/llvm/Analysis/LoopInfoImpl.h index 833a220..47ab87e 100644 --- a/llvm/include/llvm/Analysis/LoopInfoImpl.h +++ b/llvm/include/llvm/Analysis/LoopInfoImpl.h @@ -186,13 +186,8 @@ BlockT *LoopBase::getLoopLatch() const { template void LoopBase:: addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase &LIB) { -#ifndef NDEBUG - if (!Blocks.empty()) { - auto SameHeader = LIB[getHeader()]; - assert(contains(SameHeader) && getHeader() == SameHeader->getHeader() - && "Incorrect LI specified for this loop!"); - } -#endif + assert((Blocks.empty() || LIB[getHeader()] == this) && + "Incorrect LI specified for this loop!"); assert(NewBB && "Cannot add a null basic block to the loop!"); assert(!LIB[NewBB] && "BasicBlock already in the loop!"); diff --git a/llvm/include/llvm/CodeGen/TailDuplicator.h b/llvm/include/llvm/CodeGen/TailDuplicator.h index b667245..6037a12 100644 --- a/llvm/include/llvm/CodeGen/TailDuplicator.h +++ b/llvm/include/llvm/CodeGen/TailDuplicator.h @@ -15,7 +15,6 @@ #ifndef LLVM_CODEGEN_TAILDUPLICATOR_H #define LLVM_CODEGEN_TAILDUPLICATOR_H -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -38,7 +37,6 @@ class TailDuplicator { MachineRegisterInfo *MRI; MachineFunction *MF; bool PreRegAlloc; - bool LayoutMode; unsigned TailDupSize; // A list of virtual registers for which to update SSA form. @@ -52,16 +50,10 @@ class TailDuplicator { public: /// Prepare to run on a specific machine function. - /// @param MF - Function that will be processed - /// @param MBPI - Branch Probability Info. Used to propagate correct - /// probabilities when modifying the CFG. - /// @param LayoutMode - When true, don't use the existing layout to make - /// decisions. - /// @param TailDupSize - Maxmimum size of blocks to tail-duplicate. Zero - /// default implies using the command line value TailDupSize. + /// @param TailDupSize - Maxmimum size of blocks to tail-duplicate. void initMF(MachineFunction &MF, const MachineBranchProbabilityInfo *MBPI, - bool LayoutMode, unsigned TailDupSize = 0); + unsigned TailDupSize = 0); bool tailDuplicateBlocks(); static bool isSimpleBB(MachineBasicBlock *TailBB); bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB); @@ -71,13 +63,9 @@ public: /// up. /// If \p DuplicatePreds is not null, it will be updated to contain the list /// of predecessors that received a copy of \p MBB. - /// If \p RemovalCallback is non-null. It will be called before MBB is - /// deleted. bool tailDuplicateAndUpdate( bool IsSimple, MachineBasicBlock *MBB, - MachineBasicBlock *ForcedLayoutPred, - SmallVectorImpl *DuplicatedPreds = nullptr, - llvm::function_ref *RemovalCallback = nullptr); + SmallVectorImpl *DuplicatedPreds = nullptr); private: typedef TargetInstrInfo::RegSubRegPair RegSubRegPair; @@ -101,18 +89,14 @@ private: SmallVectorImpl &TDBBs, const DenseSet &RegsUsedByPhi, SmallVectorImpl &Copies); - bool tailDuplicate(bool IsSimple, - MachineBasicBlock *TailBB, - MachineBasicBlock *ForcedLayoutPred, + bool tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, SmallVectorImpl &TDBBs, SmallVectorImpl &Copies); void appendCopies(MachineBasicBlock *MBB, SmallVectorImpl> &CopyInfos, SmallVectorImpl &Copies); - void removeDeadBlock( - MachineBasicBlock *MBB, - llvm::function_ref *RemovalCallback = nullptr); + void removeDeadBlock(MachineBasicBlock *MBB); }; } // End llvm namespace diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 6adab32..90a576a 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -40,7 +40,6 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/TailDuplicator.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -122,12 +121,6 @@ static cl::opt MisfetchCost( static cl::opt JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden); -static cl::opt -TailDupPlacement("tail-dup-placement", - cl::desc("Perform tail duplication during placement. " - "Creates more fallthrough opportunites in " - "outline branches."), - cl::init(true), cl::Hidden); static cl::opt BranchFoldPlacement("branch-fold-placement", @@ -135,14 +128,6 @@ BranchFoldPlacement("branch-fold-placement", "Reduces code size."), cl::init(true), cl::Hidden); -// Heuristic for tail duplication. -static cl::opt TailDuplicatePlacementThreshold( - "tail-dup-placement-threshold", - cl::desc("Instruction cutoff for tail duplication during layout. " - "Tail merging during layout is forced to have a threshold " - "that won't conflict."), cl::init(2), - cl::Hidden); - extern cl::opt StaticLikelyProb; extern cl::opt ProfileLikelyProb; @@ -200,16 +185,6 @@ public: /// \brief End of blocks within the chain. iterator end() { return Blocks.end(); } - bool remove(MachineBasicBlock* BB) { - for(iterator i = begin(); i != end(); ++i) { - if (*i == BB) { - Blocks.erase(i); - return true; - } - } - return false; - } - /// \brief Merge a block chain into this one. /// /// This routine merges a block chain into this one. It takes care of forming @@ -291,13 +266,6 @@ class MachineBlockPlacement : public MachineFunctionPass { /// \brief A handle to the post dominator tree. MachineDominatorTree *MDT; - /// \brief Duplicator used to duplicate tails during placement. - /// - /// Placement decisions can open up new tail duplication opportunities, but - /// since tail duplication affects placement decisions of later blocks, it - /// must be done inline. - TailDuplicator TailDup; - /// \brief A set of blocks that are unavoidably execute, i.e. they dominate /// all terminators of the MachineFunction. SmallPtrSet UnavoidableBlocks; @@ -319,18 +287,8 @@ class MachineBlockPlacement : public MachineFunctionPass { /// between basic blocks. DenseMap BlockToChain; - /// Decrease the UnscheduledPredecessors count for all blocks in chain, and - /// if the count goes to 0, add them to the appropriate work list. void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, const BlockFilterSet *BlockFilter = nullptr); - - /// Decrease the UnscheduledPredecessors count for a single block, and - /// if the count goes to 0, add them to the appropriate work list. - void markBlockSuccessors( - BlockChain &Chain, MachineBasicBlock *BB, MachineBasicBlock *LoopHeaderBB, - const BlockFilterSet *BlockFilter = nullptr); - - BranchProbability collectViableSuccessors(MachineBasicBlock *BB, BlockChain &Chain, const BlockFilterSet *BlockFilter, @@ -340,16 +298,6 @@ class MachineBlockPlacement : public MachineFunctionPass { const BlockFilterSet *BlockFilter, BranchProbability SuccProb, BranchProbability HotProb); - bool repeatedlyTailDuplicateBlock( - MachineBasicBlock *BB, MachineBasicBlock *&LPred, - MachineBasicBlock *LoopHeaderBB, - BlockChain &Chain, BlockFilterSet *BlockFilter, - MachineFunction::iterator &PrevUnplacedBlockIt); - bool maybeTailDuplicateBlock(MachineBasicBlock *BB, MachineBasicBlock *LPred, - const BlockChain &Chain, - BlockFilterSet *BlockFilter, - MachineFunction::iterator &PrevUnplacedBlockIt, - bool &DuplicatedToPred); bool hasBetterLayoutPredecessor(MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &SuccChain, BranchProbability SuccProb, @@ -375,7 +323,7 @@ class MachineBlockPlacement : public MachineFunctionPass { SmallPtrSetImpl &UpdatedPreds, const BlockFilterSet *BlockFilter); void buildChain(MachineBasicBlock *BB, BlockChain &Chain, - BlockFilterSet *BlockFilter = nullptr); + const BlockFilterSet *BlockFilter = nullptr); MachineBasicBlock *findBestLoopTop(MachineLoop &L, const BlockFilterSet &LoopBlockSet); MachineBasicBlock *findBestLoopExit(MachineLoop &L, @@ -440,49 +388,37 @@ static std::string getBlockName(MachineBasicBlock *BB) { /// When a chain is being merged into the "placed" chain, this routine will /// quickly walk the successors of each block in the chain and mark them as /// having one fewer active predecessor. It also adds any successors of this -/// chain which reach the zero-predecessor state to the appropriate worklist. +/// chain which reach the zero-predecessor state to the worklist passed in. void MachineBlockPlacement::markChainSuccessors( BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, const BlockFilterSet *BlockFilter) { // Walk all the blocks in this chain, marking their successors as having // a predecessor placed. for (MachineBasicBlock *MBB : Chain) { - markBlockSuccessors(Chain, MBB, LoopHeaderBB, BlockFilter); - } -} - -/// \brief Mark a single block's successors as having one fewer preds. -/// -/// Under normal circumstances, this is only called by markChainSuccessors, -/// but if a block that was to be placed is completely tail-duplicated away, -/// and was duplicated into the chain end, we need to redo markBlockSuccessors -/// for just that block. -void MachineBlockPlacement::markBlockSuccessors( - BlockChain &Chain, MachineBasicBlock *MBB, MachineBasicBlock *LoopHeaderBB, - const BlockFilterSet *BlockFilter) { - // Add any successors for which this is the only un-placed in-loop - // predecessor to the worklist as a viable candidate for CFG-neutral - // placement. No subsequent placement of this block will violate the CFG - // shape, so we get to use heuristics to choose a favorable placement. - for (MachineBasicBlock *Succ : MBB->successors()) { - if (BlockFilter && !BlockFilter->count(Succ)) - continue; - BlockChain &SuccChain = *BlockToChain[Succ]; - // Disregard edges within a fixed chain, or edges to the loop header. - if (&Chain == &SuccChain || Succ == LoopHeaderBB) - continue; + // Add any successors for which this is the only un-placed in-loop + // predecessor to the worklist as a viable candidate for CFG-neutral + // placement. No subsequent placement of this block will violate the CFG + // shape, so we get to use heuristics to choose a favorable placement. + for (MachineBasicBlock *Succ : MBB->successors()) { + if (BlockFilter && !BlockFilter->count(Succ)) + continue; + BlockChain &SuccChain = *BlockToChain[Succ]; + // Disregard edges within a fixed chain, or edges to the loop header. + if (&Chain == &SuccChain || Succ == LoopHeaderBB) + continue; - // This is a cross-chain edge that is within the loop, so decrement the - // loop predecessor count of the destination chain. - if (SuccChain.UnscheduledPredecessors == 0 || - --SuccChain.UnscheduledPredecessors > 0) - continue; + // This is a cross-chain edge that is within the loop, so decrement the + // loop predecessor count of the destination chain. + if (SuccChain.UnscheduledPredecessors == 0 || + --SuccChain.UnscheduledPredecessors > 0) + continue; - auto *NewBB = *SuccChain.begin(); - if (NewBB->isEHPad()) - EHPadWorkList.push_back(NewBB); - else - BlockWorkList.push_back(NewBB); + auto *MBB = *SuccChain.begin(); + if (MBB->isEHPad()) + EHPadWorkList.push_back(MBB); + else + BlockWorkList.push_back(MBB); + } } } @@ -966,7 +902,7 @@ void MachineBlockPlacement::fillWorkLists( void MachineBlockPlacement::buildChain( MachineBasicBlock *BB, BlockChain &Chain, - BlockFilterSet *BlockFilter) { + const BlockFilterSet *BlockFilter) { assert(BB && "BB must not be null.\n"); assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match.\n"); MachineFunction::iterator PrevUnplacedBlockIt = F->begin(); @@ -1001,17 +937,6 @@ void MachineBlockPlacement::buildChain( "layout successor until the CFG reduces\n"); } - // Placement may have changed tail duplication opportunities. - // Check for that now. - if (TailDupPlacement && BestSucc) { - // If the chosen successor was duplicated into all its predecessors, - // don't bother laying it out, just go round the loop again with BB as - // the chain end. - if (repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain, - BlockFilter, PrevUnplacedBlockIt)) - continue; - } - // Place this block, updating the datastructures to reflect its placement. BlockChain &SuccChain = *BlockToChain[BestSucc]; // Zero out UnscheduledPredecessors for the successor we're about to merge in case @@ -1793,175 +1718,6 @@ void MachineBlockPlacement::alignBlocks() { } } -/// Tail duplicate \p BB into (some) predecessors if profitable, repeating if -/// it was duplicated into its chain predecessor and removed. -/// \p BB - Basic block that may be duplicated. -/// -/// \p LPred - Chosen layout predecessor of \p BB. -/// Updated to be the chain end if LPred is removed. -/// \p Chain - Chain to which \p LPred belongs, and \p BB will belong. -/// \p BlockFilter - Set of blocks that belong to the loop being laid out. -/// Used to identify which blocks to update predecessor -/// counts. -/// \p PrevUnplacedBlockIt - Iterator pointing to the last block that was -/// chosen in the given order due to unnatural CFG -/// only needed if \p BB is removed and -/// \p PrevUnplacedBlockIt pointed to \p BB. -/// @return true if \p BB was removed. -bool MachineBlockPlacement::repeatedlyTailDuplicateBlock( - MachineBasicBlock *BB, MachineBasicBlock *&LPred, - MachineBasicBlock *LoopHeaderBB, - BlockChain &Chain, BlockFilterSet *BlockFilter, - MachineFunction::iterator &PrevUnplacedBlockIt) { - bool Removed, DuplicatedToLPred; - bool DuplicatedToOriginalLPred; - Removed = maybeTailDuplicateBlock(BB, LPred, Chain, BlockFilter, - PrevUnplacedBlockIt, - DuplicatedToLPred); - if (!Removed) - return false; - DuplicatedToOriginalLPred = DuplicatedToLPred; - // Iteratively try to duplicate again. It can happen that a block that is - // duplicated into is still small enough to be duplicated again. - // No need to call markBlockSuccessors in this case, as the blocks being - // duplicated from here on are already scheduled. - // Note that DuplicatedToLPred always implies Removed. - while (DuplicatedToLPred) { - assert (Removed && "Block must have been removed to be duplicated into its " - "layout predecessor."); - MachineBasicBlock *DupBB, *DupPred; - // The removal callback causes Chain.end() to be updated when a block is - // removed. On the first pass through the loop, the chain end should be the - // same as it was on function entry. On subsequent passes, because we are - // duplicating the block at the end of the chain, if it is removed the - // chain will have shrunk by one block. - BlockChain::iterator ChainEnd = Chain.end(); - DupBB = *(--ChainEnd); - // Now try to duplicate again. - if (ChainEnd == Chain.begin()) - break; - DupPred = *std::prev(ChainEnd); - Removed = maybeTailDuplicateBlock(DupBB, DupPred, Chain, BlockFilter, - PrevUnplacedBlockIt, - DuplicatedToLPred); - } - // If BB was duplicated into LPred, it is now scheduled. But because it was - // removed, markChainSuccessors won't be called for its chain. Instead we - // call markBlockSuccessors for LPred to achieve the same effect. This must go - // at the end because repeating the tail duplication can increase the number - // of unscheduled predecessors. - if (DuplicatedToOriginalLPred) - markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter); - - LPred = *std::prev(Chain.end()); - return true; -} - -/// Tail duplicate \p BB into (some) predecessors if profitable. -/// \p BB - Basic block that may be duplicated -/// \p LPred - Chosen layout predecessor of \p BB -/// \p Chain - Chain to which \p LPred belongs, and \p BB will belong. -/// \p BlockFilter - Set of blocks that belong to the loop being laid out. -/// Used to identify which blocks to update predecessor -/// counts. -/// \p PrevUnplacedBlockIt - Iterator pointing to the last block that was -/// chosen in the given order due to unnatural CFG -/// only needed if \p BB is removed and -/// \p PrevUnplacedBlockIt pointed to \p BB. -/// \p DuplicatedToLPred - True if the block was duplicated into LPred. Will -/// only be true if the block was removed. -/// \return - True if the block was duplicated into all preds and removed. -bool MachineBlockPlacement::maybeTailDuplicateBlock( - MachineBasicBlock *BB, MachineBasicBlock *LPred, - const BlockChain &Chain, BlockFilterSet *BlockFilter, - MachineFunction::iterator &PrevUnplacedBlockIt, - bool &DuplicatedToLPred) { - - DuplicatedToLPred = false; - DEBUG(dbgs() << "Redoing tail duplication for Succ#" - << BB->getNumber() << "\n"); - bool IsSimple = TailDup.isSimpleBB(BB); - // Blocks with single successors don't create additional fallthrough - // opportunities. Don't duplicate them. TODO: When conditional exits are - // analyzable, allow them to be duplicated. - if (!IsSimple && BB->succ_size() == 1) - return false; - if (!TailDup.shouldTailDuplicate(IsSimple, *BB)) - return false; - // This has to be a callback because none of it can be done after - // BB is deleted. - bool Removed = false; - auto RemovalCallback = - [&](MachineBasicBlock *RemBB) { - // Signal to outer function - Removed = true; - - // Conservative default. - bool InWorkList = true; - // Remove from the Chain and Chain Map - if (BlockToChain.count(RemBB)) { - BlockChain *Chain = BlockToChain[RemBB]; - InWorkList = Chain->UnscheduledPredecessors == 0; - Chain->remove(RemBB); - BlockToChain.erase(RemBB); - } - - // Handle the unplaced block iterator - if (&(*PrevUnplacedBlockIt) == RemBB) { - PrevUnplacedBlockIt++; - } - - // Handle the Work Lists - if (InWorkList) { - SmallVectorImpl &RemoveList = BlockWorkList; - if (RemBB->isEHPad()) - RemoveList = EHPadWorkList; - RemoveList.erase( - remove_if(RemoveList, - [RemBB](MachineBasicBlock *BB) {return BB == RemBB;}), - RemoveList.end()); - } - - // Handle the filter set - if (BlockFilter) { - BlockFilter->erase(RemBB); - } - - // Remove the block from loop info. - MLI->removeBlock(RemBB); - - // TailDuplicator handles removing it from loops. - DEBUG(dbgs() << "TailDuplicator deleted block: " - << getBlockName(RemBB) << "\n"); - }; - auto RemovalCallbackRef = - llvm::function_ref(RemovalCallback); - - SmallVector DuplicatedPreds; - TailDup.tailDuplicateAndUpdate(IsSimple, BB, LPred, - &DuplicatedPreds, &RemovalCallbackRef); - - // Update UnscheduledPredecessors to reflect tail-duplication. - DuplicatedToLPred = false; - for (MachineBasicBlock *Pred : DuplicatedPreds) { - // We're only looking for unscheduled predecessors that match the filter. - BlockChain* PredChain = BlockToChain[Pred]; - if (Pred == LPred) - DuplicatedToLPred = true; - if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred)) - || PredChain == &Chain) - continue; - for (MachineBasicBlock *NewSucc : Pred->successors()) { - if (BlockFilter && !BlockFilter->count(NewSucc)) - continue; - BlockChain *NewChain = BlockToChain[NewSucc]; - if (NewChain != &Chain && NewChain != PredChain) - NewChain->UnscheduledPredecessors++; - } - } - return Removed; -} - bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) return false; @@ -1978,13 +1734,6 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); TLI = MF.getSubtarget().getTargetLowering(); MDT = &getAnalysis(); - if (TailDupPlacement) { - unsigned TailDupSize = TailDuplicatePlacementThreshold; - if (MF.getFunction()->optForSize()) - TailDupSize = 1; - TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize); - } - assert(BlockToChain.empty()); buildCFGChains(); @@ -1998,7 +1747,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { BranchFoldPlacement; // No tail merging opportunities if the block number is less than four. if (MF.size() > 3 && EnableTailMerge) { - unsigned TailMergeSize = TailDuplicatePlacementThreshold + 1; + // Default to the standard tail-merge-size option. + unsigned TailMergeSize = 0; BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI, *MBPI, TailMergeSize); @@ -2007,8 +1757,6 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { /*AfterBlockPlacement=*/true)) { // Redo the layout if tail merging creates/removes/moves blocks. BlockToChain.clear(); - // Must redo the dominator tree if blocks were changed. - MDT->runOnMachineFunction(MF); ChainAllocator.DestroyAll(); buildCFGChains(); } diff --git a/llvm/lib/CodeGen/TailDuplication.cpp b/llvm/lib/CodeGen/TailDuplication.cpp index e2377d8..6e8ee9e 100644 --- a/llvm/lib/CodeGen/TailDuplication.cpp +++ b/llvm/lib/CodeGen/TailDuplication.cpp @@ -49,7 +49,7 @@ bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { auto MBPI = &getAnalysis(); - Duplicator.initMF(MF, MBPI, /* LayoutMode */ false); + Duplicator.initMF(MF, MBPI); bool MadeChange = false; while (Duplicator.tailDuplicateBlocks()) diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index 06aa5e1..26b9b07 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -20,7 +20,6 @@ #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" @@ -65,7 +64,7 @@ static cl::opt TailDupLimit("tail-dup-limit", cl::init(~0U), void TailDuplicator::initMF(MachineFunction &MFin, const MachineBranchProbabilityInfo *MBPIin, - bool LayoutModeIn, unsigned TailDupSizeIn) { + unsigned TailDupSizeIn) { MF = &MFin; TII = MF->getSubtarget().getInstrInfo(); TRI = MF->getSubtarget().getRegisterInfo(); @@ -76,7 +75,6 @@ void TailDuplicator::initMF(MachineFunction &MFin, assert(MBPI != nullptr && "Machine Branch Probability Info required"); - LayoutMode = LayoutModeIn; PreRegAlloc = MRI->isSSA(); } @@ -129,23 +127,18 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { /// Tail duplicate the block and cleanup. /// \p IsSimple - return value of isSimpleBB /// \p MBB - block to be duplicated -/// \p ForcedLayoutPred - If non-null, treat this block as the layout -/// predecessor, instead of using the ordering in MF /// \p DuplicatedPreds - if non-null, \p DuplicatedPreds will contain a list of /// all Preds that received a copy of \p MBB. -/// \p RemovalCallback - if non-null, called just before MBB is deleted. bool TailDuplicator::tailDuplicateAndUpdate( bool IsSimple, MachineBasicBlock *MBB, - MachineBasicBlock *ForcedLayoutPred, - SmallVectorImpl *DuplicatedPreds, - llvm::function_ref *RemovalCallback) { + SmallVectorImpl *DuplicatedPreds) { // Save the successors list. SmallSetVector Succs(MBB->succ_begin(), MBB->succ_end()); SmallVector TDBBs; SmallVector Copies; - if (!tailDuplicate(IsSimple, MBB, ForcedLayoutPred, TDBBs, Copies)) + if (!tailDuplicate(IsSimple, MBB, TDBBs, Copies)) return false; ++NumTails; @@ -163,7 +156,7 @@ bool TailDuplicator::tailDuplicateAndUpdate( // If it is dead, remove it. if (isDead) { NumTailDupRemoved += MBB->size(); - removeDeadBlock(MBB, RemovalCallback); + removeDeadBlock(MBB); ++NumDeadBlocks; } @@ -262,7 +255,7 @@ bool TailDuplicator::tailDuplicateBlocks() { if (!shouldTailDuplicate(IsSimple, *MBB)) continue; - MadeChange |= tailDuplicateAndUpdate(IsSimple, MBB, nullptr); + MadeChange |= tailDuplicateAndUpdate(IsSimple, MBB); } if (PreRegAlloc && TailDupVerify) @@ -521,10 +514,8 @@ void TailDuplicator::updateSuccessorsPHIs( /// Determine if it is profitable to duplicate this block. bool TailDuplicator::shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB) { - // When doing tail-duplication during layout, the block ordering is in flux, - // so canFallThrough returns a result based on incorrect information and - // should just be ignored. - if (!LayoutMode && TailBB.canFallThrough()) + // Only duplicate blocks that end with unconditional branches. + if (TailBB.canFallThrough()) return false; // Don't try to tail-duplicate single-block loops. @@ -744,7 +735,7 @@ bool TailDuplicator::duplicateSimpleBB( bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB) { - // EH edges are ignored by analyzeBranch. + // EH edges are ignored by AnalyzeBranch. if (PredBB->succ_size() > 1) return false; @@ -759,16 +750,7 @@ bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB, /// If it is profitable, duplicate TailBB's contents in each /// of its predecessors. -/// \p IsSimple result of isSimpleBB -/// \p TailBB Block to be duplicated. -/// \p ForcedLayoutPred When non-null, use this block as the layout predecessor -/// instead of the previous block in MF's order. -/// \p TDBBs A vector to keep track of all blocks tail-duplicated -/// into. -/// \p Copies A vector of copy instructions inserted. Used later to -/// walk all the inserted copies and remove redundant ones. bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, - MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl &TDBBs, SmallVectorImpl &Copies) { DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); @@ -793,12 +775,7 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, continue; // Don't duplicate into a fall-through predecessor (at least for now). - bool IsLayoutSuccessor = false; - if (ForcedLayoutPred) - IsLayoutSuccessor = (ForcedLayoutPred == PredBB); - else if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) - IsLayoutSuccessor = true; - if (IsLayoutSuccessor) + if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) continue; DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB @@ -851,27 +828,19 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, // If TailBB was duplicated into all its predecessors except for the prior // block, which falls through unconditionally, move the contents of this // block into the prior block. - MachineBasicBlock *PrevBB = ForcedLayoutPred; - if (!PrevBB) - PrevBB = &*std::prev(TailBB->getIterator()); + MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator()); MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; SmallVector PriorCond; // This has to check PrevBB->succ_size() because EH edges are ignored by - // analyzeBranch. + // AnalyzeBranch. if (PrevBB->succ_size() == 1 && // Layout preds are not always CFG preds. Check. *PrevBB->succ_begin() == TailBB && !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) && - PriorCond.empty() && - (!PriorTBB || PriorTBB == TailBB) && - TailBB->pred_size() == 1 && + PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && !TailBB->hasAddressTaken()) { DEBUG(dbgs() << "\nMerging into block: " << *PrevBB << "From MBB: " << *TailBB); - // There may be a branch to the layout successor. This is unlikely but it - // happens. The correct thing to do is to remove the branch before - // duplicating the instructions in all cases. - TII->removeBranch(*PrevBB); if (PreRegAlloc) { DenseMap LocalVRMap; SmallVector, 4> CopyInfos; @@ -895,7 +864,6 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, } appendCopies(PrevBB, CopyInfos, Copies); } else { - TII->removeBranch(*PrevBB); // No PHIs to worry about, just splice the instructions over. PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); } @@ -968,15 +936,10 @@ void TailDuplicator::appendCopies(MachineBasicBlock *MBB, /// Remove the specified dead machine basic block from the function, updating /// the CFG. -void TailDuplicator::removeDeadBlock( - MachineBasicBlock *MBB, - llvm::function_ref *RemovalCallback) { +void TailDuplicator::removeDeadBlock(MachineBasicBlock *MBB) { assert(MBB->pred_empty() && "MBB must be dead!"); DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); - if (RemovalCallback) - (*RemovalCallback)(MBB); - // Remove all successors. while (!MBB->succ_empty()) MBB->removeSuccessor(MBB->succ_end() - 1); diff --git a/llvm/test/CodeGen/AArch64/arm64-extload-knownzero.ll b/llvm/test/CodeGen/AArch64/arm64-extload-knownzero.ll index 5dd8cb2..92ce2a0 100644 --- a/llvm/test/CodeGen/AArch64/arm64-extload-knownzero.ll +++ b/llvm/test/CodeGen/AArch64/arm64-extload-knownzero.ll @@ -12,6 +12,7 @@ bb1: %tmp2 = load i16, i16* %ptr, align 2 br label %bb2 bb2: +; CHECK: %bb2 ; CHECK-NOT: and {{w[0-9]+}}, [[REG]], #0xffff ; CHECK: cmp [[REG]], #23 %tmp3 = phi i16 [ 0, %entry ], [ %tmp2, %bb1 ] diff --git a/llvm/test/CodeGen/AArch64/machine_cse.ll b/llvm/test/CodeGen/AArch64/machine_cse.ll index e9fa680..032199e 100644 --- a/llvm/test/CodeGen/AArch64/machine_cse.ll +++ b/llvm/test/CodeGen/AArch64/machine_cse.ll @@ -1,8 +1,4 @@ -; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 -tail-dup-placement=0 | FileCheck %s -; -tail-dup-placement causes tail duplication during layout. This breaks the -; assumptions of the test case as written (specifically, it creates an -; additional cmp instruction, creating a false positive), so we pass -; -tail-dup-placement=0 to restore the original behavior +; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 | FileCheck %s ; marked as external to prevent possible optimizations @a = external global i32 diff --git a/llvm/test/CodeGen/AArch64/tail-dup-repeat-worklist.ll b/llvm/test/CodeGen/AArch64/tail-dup-repeat-worklist.ll deleted file mode 100644 index c2997c5..0000000 --- a/llvm/test/CodeGen/AArch64/tail-dup-repeat-worklist.ll +++ /dev/null @@ -1,69 +0,0 @@ -; RUN: llc -O3 -o - -verify-machineinstrs %s | FileCheck %s -target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -target triple = "aarch64-unknown-linux-gnu" - -%struct.s1 = type { %struct.s3*, %struct.s1* } -%struct.s2 = type opaque -%struct.s3 = type { i32 } - -; Function Attrs: nounwind -define internal fastcc i32 @repeated_dup_worklist(%struct.s1** %pp1, %struct.s2* %p2, i32 %state, i1 %i1_1, i32 %i32_1) unnamed_addr #0 { -entry: - br label %while.cond.outer - -; The loop gets laid out: -; %while.cond.outer -; %(null) -; %(null) -; %dup2 -; and then %dup1 gets chosen as the next block. -; when dup2 is duplicated into dup1, %worklist could erroneously be placed on -; the worklist, because all of its current predecessors are now scheduled. -; However, after dup2 is tail-duplicated, %worklist can't be on the worklist -; because it now has unscheduled predecessors.q -; CHECK-LABEL: repeated_dup_worklist -; CHECK: // %entry -; CHECK: // %while.cond.outer -; first %(null) block -; CHECK: // in Loop: -; CHECK: ldr -; CHECK-NEXT: tbnz -; second %(null) block -; CHECK: // in Loop: -; CHECK: // %dup2 -; CHECK: // %worklist -; CHECK: // %if.then96.i -while.cond.outer: ; preds = %dup1, %entry - %progress.0.ph = phi i32 [ 0, %entry ], [ %progress.1, %dup1 ] - %inc77 = add nsw i32 %progress.0.ph, 1 - %cmp = icmp slt i32 %progress.0.ph, %i32_1 - br i1 %cmp, label %dup2, label %dup1 - -dup2: ; preds = %if.then96.i, %worklist, %while.cond.outer - %progress.1.ph = phi i32 [ 0, %while.cond.outer ], [ %progress.1, %if.then96.i ], [ %progress.1, %worklist ] - %.pr = load %struct.s1*, %struct.s1** %pp1, align 8 - br label %dup1 - -dup1: ; preds = %dup2, %while.cond.outer - %0 = phi %struct.s1* [ %.pr, %dup2 ], [ undef, %while.cond.outer ] - %progress.1 = phi i32 [ %progress.1.ph, %dup2 ], [ %inc77, %while.cond.outer ] - br i1 %i1_1, label %while.cond.outer, label %worklist - -worklist: ; preds = %dup1 - %snode94 = getelementptr inbounds %struct.s1, %struct.s1* %0, i64 0, i32 0 - %1 = load %struct.s3*, %struct.s3** %snode94, align 8 - %2 = getelementptr inbounds %struct.s3, %struct.s3* %1, i32 0, i32 0 - %3 = load i32, i32* %2, align 4 - %tobool95.i = icmp eq i32 %3, 0 - br i1 %tobool95.i, label %if.then96.i, label %dup2 - -if.then96.i: ; preds = %worklist - call fastcc void @free_s3(%struct.s2* %p2, %struct.s3* %1) #1 - br label %dup2 -} - -; Function Attrs: nounwind -declare fastcc void @free_s3(%struct.s2*, %struct.s3*) unnamed_addr #0 - -attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a57" "target-features"="+crc,+crypto,+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll b/llvm/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll index 83c7676..15b7e94 100644 --- a/llvm/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll +++ b/llvm/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll @@ -25,6 +25,7 @@ bb1: ; preds = %bb br label %bb2 bb2: ; preds = %bb1, %entry +; CHECK: bb2 ; CHECK: cmp [[REG]], #0 ; CHECK: ble %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ] diff --git a/llvm/test/CodeGen/PowerPC/branch-opt.ll b/llvm/test/CodeGen/PowerPC/branch-opt.ll index e714972..b3c0dba 100644 --- a/llvm/test/CodeGen/PowerPC/branch-opt.ll +++ b/llvm/test/CodeGen/PowerPC/branch-opt.ll @@ -1,21 +1,9 @@ -; RUN: llc -verify-machineinstrs < %s -march=ppc32 | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -march=ppc32 | \ +; RUN: grep "b LBB.*" | count 4 target datalayout = "E-p:32:32" target triple = "powerpc-apple-darwin8.7.0" -;CHECK-LABEL: foo: -; There are 4 inner loops (%bb, %bb12, %bb25, %bb38) that all exit to %cond_next48 -; The last (whichever it is) should have a fallthrough exit, and the other three -; need an unconditional branch. No other block should have an unconditional -; branch to cond_next48 -; One of the blocks ends up with a loop exit block that gets a tail-duplicated copy -; of %cond_next48, so there should only be two unconditional branches. - -;CHECK: b LBB0_13 -;CHECK: b LBB0_13 -;CHECK-NOT: b LBB0_13 -;CHECK: LBB0_13: ; %cond_next48 - define void @foo(i32 %W, i32 %X, i32 %Y, i32 %Z) { entry: %tmp1 = and i32 %W, 1 ; [#uses=1] diff --git a/llvm/test/CodeGen/PowerPC/sjlj.ll b/llvm/test/CodeGen/PowerPC/sjlj.ll index f86f534..7fe3138 100644 --- a/llvm/test/CodeGen/PowerPC/sjlj.ll +++ b/llvm/test/CodeGen/PowerPC/sjlj.ll @@ -74,24 +74,24 @@ return: ; preds = %if.end, %if.then ; CHECK-DAG: std [[REGA]], [[OFF:[0-9]+]](31) # 8-byte Folded Spill ; CHECK-DAG: std 1, 16([[REGA]]) ; CHECK-DAG: std 2, 24([[REGA]]) -; CHECK: bcl 20, 31, .LBB1_3 +; CHECK: bcl 20, 31, .LBB1_5 ; CHECK: li 3, 1 -; CHECK: #EH_SjLj_Setup .LBB1_3 +; CHECK: #EH_SjLj_Setup .LBB1_5 ; CHECK: b .LBB1_1 -; CHECK: .LBB1_3: -; CHECK: mflr [[REGL:[0-9]+]] -; CHECK: ld [[REG2:[0-9]+]], [[OFF]](31) # 8-byte Folded Reload -; CHECK: std [[REGL]], 8([[REG2]]) -; CHECK: li 3, 0 - -; CHECK: .LBB1_5: +; CHECK: .LBB1_4: ; CHECK: lfd ; CHECK: lxvd2x ; CHECK: ld ; CHECK: blr +; CHECK: .LBB1_5: +; CHECK: mflr [[REGL:[0-9]+]] +; CHECK: ld [[REG2:[0-9]+]], [[OFF]](31) # 8-byte Folded Reload +; CHECK: std [[REGL]], 8([[REG2]]) +; CHECK: li 3, 0 + ; CHECK-NOAV: @main ; CHECK-NOAV-NOT: stxvd2x ; CHECK-NOAV: bcl diff --git a/llvm/test/CodeGen/PowerPC/tail-dup-branch-to-fallthrough.ll b/llvm/test/CodeGen/PowerPC/tail-dup-branch-to-fallthrough.ll deleted file mode 100644 index 5d03af8..0000000 --- a/llvm/test/CodeGen/PowerPC/tail-dup-branch-to-fallthrough.ll +++ /dev/null @@ -1,65 +0,0 @@ -; RUN: llc -O2 %s -o - | FileCheck %s -target datalayout = "E-m:e-i64:64-n32:64" -target triple = "powerpc64-unknown-linux-gnu" - -; Function Attrs: nounwind -declare void @llvm.lifetime.end(i64, i8* nocapture) #0 - -declare void @f1() -declare void @f2() -declare void @f3() -declare void @f4() - -; Function Attrs: nounwind -; CHECK-LABEL: tail_dup_fallthrough_with_branch -; CHECK: # %entry -; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}} -; CHECK: # %entry -; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}} -; CHECK: # %sw.0 -; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}} -; CHECK: # %sw.1 -; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}} -; CHECK: # %sw.default -; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}} -; CHECK: # %if.then -; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}} -; CHECK: # %if.else -; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}} -; CHECK: .Lfunc_end0 -define fastcc void @tail_dup_fallthrough_with_branch(i32 %a, i1 %b) unnamed_addr #0 { -entry: - switch i32 %a, label %sw.default [ - i32 0, label %sw.0 - i32 1, label %sw.1 - ] - -sw.0: ; preds = %entry - call void @f1() #0 - br label %dup1 - -sw.1: ; preds = %entry - call void @f2() #0 - br label %dup1 - -sw.default: ; preds = %entry - br i1 %b, label %if.then, label %if.else - -if.then: ; preds = %sw.default - call void @f3() #0 - br label %dup2 - -if.else: ; preds = %sw.default - call void @f4() #0 - br label %dup2 - -dup1: ; preds = %sw.0, %sw.1 - call void @llvm.lifetime.end(i64 8, i8* nonnull undef) #0 - unreachable - -dup2: ; preds = %if.then, %if.else - call void @llvm.lifetime.end(i64 8, i8* nonnull undef) #0 - unreachable -} - -attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/PowerPC/tail-dup-layout.ll b/llvm/test/CodeGen/PowerPC/tail-dup-layout.ll deleted file mode 100644 index 6790aa8..0000000 --- a/llvm/test/CodeGen/PowerPC/tail-dup-layout.ll +++ /dev/null @@ -1,100 +0,0 @@ -; RUN: llc -outline-optional-branches -O2 < %s | FileCheck %s -target datalayout = "e-m:e-i64:64-n32:64" -target triple = "powerpc64le-grtev4-linux-gnu" - -; Intended layout: -; The outlining flag produces the layout -; test1 -; test2 -; test3 -; test4 -; exit -; optional1 -; optional2 -; optional3 -; optional4 -; Tail duplication puts test n+1 at the end of optional n -; so optional1 includes a copy of test2 at the end, and branches -; to test3 (at the top) or falls through to optional 2. -; The CHECK statements check for the whole string of tests and exit block, -; and then check that the correct test has been duplicated into the end of -; the optional blocks and that the optional blocks are in the correct order. -;CHECK-LABEL: f: -; test1 may have been merged with entry -;CHECK: mr [[TAGREG:[0-9]+]], 3 -;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 -;CHECK-NEXT: bc 12, 1, [[OPT1LABEL:[._0-9A-Za-z]+]] -;CHECK-NEXT: [[TEST2LABEL:[._0-9A-Za-z]+]]: # %test2 -;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 -;CHECK-NEXT: bne 0, [[OPT2LABEL:[._0-9A-Za-z]+]] -;CHECK-NEXT: [[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3 -;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 -;CHECK-NEXT: bne 0, .[[OPT3LABEL:[._0-9A-Za-z]+]] -;CHECK-NEXT: [[TEST4LABEL:[._0-9A-Za-z]+]]: # %test4 -;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28 -;CHECK-NEXT: bne 0, .[[OPT4LABEL:[._0-9A-Za-z]+]] -;CHECK-NEXT: [[EXITLABEL:[._0-9A-Za-z]+]]: # %exit -;CHECK: blr -;CHECK-NEXT: [[OPT1LABEL]] -;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 -;CHECK-NEXT: beq 0, [[TEST3LABEL]] -;CHECK-NEXT: [[OPT2LABEL]] -;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 -;CHECK-NEXT: beq 0, [[TEST4LABEL]] -;CHECK-NEXT: [[OPT3LABEL]] -;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28 -;CHECK-NEXT: beq 0, [[EXITLABEL]] -;CHECK-NEXT: [[OPT4LABEL]] -;CHECK: b [[EXITLABEL]] - -define void @f(i32 %tag) { -entry: - br label %test1 -test1: - %tagbit1 = and i32 %tag, 1 - %tagbit1eq0 = icmp eq i32 %tagbit1, 0 - br i1 %tagbit1eq0, label %test2, label %optional1 -optional1: - call void @a() - call void @a() - call void @a() - call void @a() - br label %test2 -test2: - %tagbit2 = and i32 %tag, 2 - %tagbit2eq0 = icmp eq i32 %tagbit2, 0 - br i1 %tagbit2eq0, label %test3, label %optional2 -optional2: - call void @b() - call void @b() - call void @b() - call void @b() - br label %test3 -test3: - %tagbit3 = and i32 %tag, 4 - %tagbit3eq0 = icmp eq i32 %tagbit3, 0 - br i1 %tagbit3eq0, label %test4, label %optional3 -optional3: - call void @c() - call void @c() - call void @c() - call void @c() - br label %test4 -test4: - %tagbit4 = and i32 %tag, 8 - %tagbit4eq0 = icmp eq i32 %tagbit4, 0 - br i1 %tagbit4eq0, label %exit, label %optional4 -optional4: - call void @d() - call void @d() - call void @d() - call void @d() - br label %exit -exit: - ret void -} - -declare void @a() -declare void @b() -declare void @c() -declare void @d() diff --git a/llvm/test/CodeGen/WebAssembly/cfg-stackify.ll b/llvm/test/CodeGen/WebAssembly/cfg-stackify.ll index fcdbd7f..eb1f236 100644 --- a/llvm/test/CodeGen/WebAssembly/cfg-stackify.ll +++ b/llvm/test/CodeGen/WebAssembly/cfg-stackify.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false | FileCheck %s -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -tail-dup-placement=0 -verify-machineinstrs -fast-isel=false | FileCheck -check-prefix=OPT %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -verify-machineinstrs -fast-isel=false | FileCheck -check-prefix=OPT %s ; Test the CFG stackifier pass. diff --git a/llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll index 0ac1e1e..71787fe 100644 --- a/llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -tail-dup-placement=0| FileCheck %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt | FileCheck %s ; Test memcpy, memmove, and memset intrinsics. diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll index 39e2964..b690316 100644 --- a/llvm/test/CodeGen/X86/block-placement.ll +++ b/llvm/test/CodeGen/X86/block-placement.ll @@ -177,12 +177,6 @@ exit: ret i32 %sum } -; Tail duplication during layout can entirely remove body0 by duplicating it -; into the entry block and into body1. This is a good thing but it isn't what -; this test is looking for. So to make the blocks longer so they don't get -; duplicated, we add some calls to dummy. -declare void @dummy() - define i32 @test_loop_rotate(i32 %i, i32* %a) { ; Check that we rotate conditional exits from the loop to the bottom of the ; loop, eliminating unconditional branches to the top. @@ -200,8 +194,6 @@ body0: %base = phi i32 [ 0, %entry ], [ %sum, %body1 ] %next = add i32 %iv, 1 %exitcond = icmp eq i32 %next, %i - call void @dummy() - call void @dummy() br i1 %exitcond, label %exit, label %body1 body1: @@ -953,7 +945,7 @@ define void @benchmark_heapsort(i32 %n, double* nocapture %ra) { ; First rotated loop top. ; CHECK: .p2align ; CHECK: %while.end -; %for.cond gets completely tail-duplicated away. +; CHECK: %for.cond ; CHECK: %if.then ; CHECK: %if.else ; CHECK: %if.end10 diff --git a/llvm/test/CodeGen/X86/cmov-into-branch.ll b/llvm/test/CodeGen/X86/cmov-into-branch.ll index 6e4762b..c0c6fc4 100644 --- a/llvm/test/CodeGen/X86/cmov-into-branch.ll +++ b/llvm/test/CodeGen/X86/cmov-into-branch.ll @@ -105,11 +105,9 @@ define i32 @weighted_select3(i32 %a, i32 %b) { ; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: je [[LABEL_BB6:.*]] ; CHECK: movl %edi, %eax -; CHECK-NEXT: retq ; CHECK: [[LABEL_BB6]] ; CHECK-NEXT: movl %esi, %edi -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: retq +; CHECK-NEXT: jmp ; %cmp = icmp ne i32 %a, 0 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !2 diff --git a/llvm/test/CodeGen/X86/fma-intrinsics-phi-213-to-231.ll b/llvm/test/CodeGen/X86/fma-intrinsics-phi-213-to-231.ll index 78e7471..8d0318b 100644 --- a/llvm/test/CodeGen/X86/fma-intrinsics-phi-213-to-231.ll +++ b/llvm/test/CodeGen/X86/fma-intrinsics-phi-213-to-231.ll @@ -2,7 +2,7 @@ ; CHECK-LABEL: fmaddsubpd_loop_128: ; CHECK: vfmaddsub231pd %xmm1, %xmm0, %xmm2 -; CHECK: vmovapd %xmm2, %xmm0 +; CHECK: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq define <2 x double> @fmaddsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) { entry: @@ -28,7 +28,7 @@ for.end: ; CHECK-LABEL: fmsubaddpd_loop_128: ; CHECK: vfmsubadd231pd %xmm1, %xmm0, %xmm2 -; CHECK: vmovapd %xmm2, %xmm0 +; CHECK: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq define <2 x double> @fmsubaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) { entry: @@ -54,7 +54,7 @@ for.end: ; CHECK-LABEL: fmaddpd_loop_128: ; CHECK: vfmadd231pd %xmm1, %xmm0, %xmm2 -; CHECK: vmovapd %xmm2, %xmm0 +; CHECK: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq define <2 x double> @fmaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) { entry: @@ -80,7 +80,7 @@ for.end: ; CHECK-LABEL: fmsubpd_loop_128: ; CHECK: vfmsub231pd %xmm1, %xmm0, %xmm2 -; CHECK: vmovapd %xmm2, %xmm0 +; CHECK: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq define <2 x double> @fmsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) { entry: @@ -106,7 +106,7 @@ for.end: ; CHECK-LABEL: fnmaddpd_loop_128: ; CHECK: vfnmadd231pd %xmm1, %xmm0, %xmm2 -; CHECK: vmovapd %xmm2, %xmm0 +; CHECK: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq define <2 x double> @fnmaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) { entry: @@ -132,7 +132,7 @@ for.end: ; CHECK-LABEL: fnmsubpd_loop_128: ; CHECK: vfnmsub231pd %xmm1, %xmm0, %xmm2 -; CHECK: vmovapd %xmm2, %xmm0 +; CHECK: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq define <2 x double> @fnmsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) { entry: @@ -329,7 +329,7 @@ declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x floa ; CHECK-LABEL: fmaddsubpd_loop_256: ; CHECK: vfmaddsub231pd %ymm1, %ymm0, %ymm2 -; CHECK: vmovapd %ymm2, %ymm0 +; CHECK: vmovaps %ymm2, %ymm0 ; CHECK-NEXT: retq define <4 x double> @fmaddsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) { entry: @@ -355,7 +355,7 @@ for.end: ; CHECK-LABEL: fmsubaddpd_loop_256: ; CHECK: vfmsubadd231pd %ymm1, %ymm0, %ymm2 -; CHECK: vmovapd %ymm2, %ymm0 +; CHECK: vmovaps %ymm2, %ymm0 ; CHECK-NEXT: retq define <4 x double> @fmsubaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) { entry: @@ -381,7 +381,7 @@ for.end: ; CHECK-LABEL: fmaddpd_loop_256: ; CHECK: vfmadd231pd %ymm1, %ymm0, %ymm2 -; CHECK: vmovapd %ymm2, %ymm0 +; CHECK: vmovaps %ymm2, %ymm0 ; CHECK-NEXT: retq define <4 x double> @fmaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) { entry: @@ -407,7 +407,7 @@ for.end: ; CHECK-LABEL: fmsubpd_loop_256: ; CHECK: vfmsub231pd %ymm1, %ymm0, %ymm2 -; CHECK: vmovapd %ymm2, %ymm0 +; CHECK: vmovaps %ymm2, %ymm0 ; CHECK-NEXT: retq define <4 x double> @fmsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) { entry: @@ -433,7 +433,7 @@ for.end: ; CHECK-LABEL: fnmaddpd_loop_256: ; CHECK: vfnmadd231pd %ymm1, %ymm0, %ymm2 -; CHECK: vmovapd %ymm2, %ymm0 +; CHECK: vmovaps %ymm2, %ymm0 ; CHECK-NEXT: retq define <4 x double> @fnmaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) { entry: @@ -459,7 +459,7 @@ for.end: ; CHECK-LABEL: fnmsubpd_loop_256: ; CHECK: vfnmsub231pd %ymm1, %ymm0, %ymm2 -; CHECK: vmovapd %ymm2, %ymm0 +; CHECK: vmovaps %ymm2, %ymm0 ; CHECK-NEXT: retq define <4 x double> @fnmsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) { entry: diff --git a/llvm/test/CodeGen/X86/fp-une-cmp.ll b/llvm/test/CodeGen/X86/fp-une-cmp.ll index e3b2a04..6530400 100644 --- a/llvm/test/CodeGen/X86/fp-une-cmp.ll +++ b/llvm/test/CodeGen/X86/fp-une-cmp.ll @@ -56,11 +56,11 @@ define double @profile_metadata(double %x, double %y) { ; CHECK-NEXT: ucomisd %xmm1, %xmm0 ; CHECK-NEXT: jne .LBB1_1 ; CHECK-NEXT: jp .LBB1_1 -; CHECK-NEXT: # %bb2 +; CHECK-NEXT: .LBB1_2: # %bb2 ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB1_1: # %bb1 ; CHECK-NEXT: addsd {{.*}}(%rip), %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: jmp .LBB1_2 entry: %mul = fmul double %x, %y diff --git a/llvm/test/CodeGen/X86/pr11202.ll b/llvm/test/CodeGen/X86/pr11202.ll index cb1a749..13070d1 100644 --- a/llvm/test/CodeGen/X86/pr11202.ll +++ b/llvm/test/CodeGen/X86/pr11202.ll @@ -15,8 +15,5 @@ l2: ; preds = %l1 br label %l1 } -; It is correct for either l1 or l2 to be removed. -; If l2 is removed, the message should be "Address of block that was removed by CodeGen" -; If l1 is removed, it should be "Block address taken." -; CHECK: .Ltmp0: # {{Address of block that was removed by CodeGen|Block address taken}} +; CHECK: .Ltmp0: # Address of block that was removed by CodeGen ; CHECK: .quad .Ltmp0 diff --git a/llvm/test/CodeGen/X86/ragreedy-bug.ll b/llvm/test/CodeGen/X86/ragreedy-bug.ll index bfeb041..e842631 100644 --- a/llvm/test/CodeGen/X86/ragreedy-bug.ll +++ b/llvm/test/CodeGen/X86/ragreedy-bug.ll @@ -3,34 +3,16 @@ ; This testing case is reduced from 197.parser prune_match function. ; We make sure register copies are not generated on isupper.exit blocks. -; isupper.exit and isupper.exit223 get tail-duplicated into all their -; predecessors. -; CHECK: cond.true.i.i +; CHECK: isupper.exit ; CHECK-NEXT: in Loop -; Mem-move -; CHECK-NEXT: movl -; CHECK-NEXT: andl ; CHECK-NEXT: testl ; CHECK-NEXT: jne -; CHECK: cond.true.i.i217 +; CHECK: isupper.exit ; CHECK-NEXT: in Loop -; Mem-move -; CHECK-NEXT: movl -; CHECK-NEXT: andl ; CHECK-NEXT: testl ; CHECK-NEXT: je -; CHECK: cond.false.i.i ; CHECK: maskrune -; CHECK-NEXT: movzbl -; CHECK-NEXT: movzbl -; CHECK-NEXT: testl -; CHECK-NEXT: je -; CHECK: cond.false.i.i219 ; CHECK: maskrune -; CHECK-NEXT: movzbl -; CHECK-NEXT: movzbl -; CHECK-NEXT: testl -; CHECK-NEXT: jne %struct.List_o_links_struct = type { i32, i32, i32, %struct.List_o_links_struct* } %struct.Connector_struct = type { i16, i16, i8, i8, %struct.Connector_struct*, i8* } diff --git a/llvm/test/CodeGen/X86/sse1.ll b/llvm/test/CodeGen/X86/sse1.ll index beedb1d..3ac6ea6 100644 --- a/llvm/test/CodeGen/X86/sse1.ll +++ b/llvm/test/CodeGen/X86/sse1.ll @@ -58,23 +58,21 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) { ; X32-NEXT: je .LBB1_1 ; X32-NEXT: # BB#2: # %entry ; X32-NEXT: xorps %xmm1, %xmm1 -; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; X32-NEXT: jne .LBB1_5 -; X32-NEXT: jmp .LBB1_4 +; X32-NEXT: jmp .LBB1_3 ; X32-NEXT: .LBB1_1: ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-NEXT: .LBB1_3: # %entry ; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp) ; X32-NEXT: je .LBB1_4 -; X32-NEXT: .LBB1_5: # %entry +; X32-NEXT: # BB#5: # %entry ; X32-NEXT: xorps %xmm2, %xmm2 -; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; X32-NEXT: jne .LBB1_8 -; X32-NEXT: jmp .LBB1_7 +; X32-NEXT: jmp .LBB1_6 ; X32-NEXT: .LBB1_4: ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X32-NEXT: .LBB1_6: # %entry ; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp) ; X32-NEXT: je .LBB1_7 -; X32-NEXT: .LBB1_8: # %entry +; X32-NEXT: # BB#8: # %entry ; X32-NEXT: xorps %xmm3, %xmm3 ; X32-NEXT: jmp .LBB1_9 ; X32-NEXT: .LBB1_7: @@ -97,23 +95,21 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) { ; X64-NEXT: je .LBB1_1 ; X64-NEXT: # BB#2: # %entry ; X64-NEXT: xorps %xmm1, %xmm1 -; X64-NEXT: testl %edx, %edx -; X64-NEXT: jne .LBB1_5 -; X64-NEXT: jmp .LBB1_4 +; X64-NEXT: jmp .LBB1_3 ; X64-NEXT: .LBB1_1: ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: .LBB1_3: # %entry ; X64-NEXT: testl %edx, %edx ; X64-NEXT: je .LBB1_4 -; X64-NEXT: .LBB1_5: # %entry +; X64-NEXT: # BB#5: # %entry ; X64-NEXT: xorps %xmm2, %xmm2 -; X64-NEXT: testl %r8d, %r8d -; X64-NEXT: jne .LBB1_8 -; X64-NEXT: jmp .LBB1_7 +; X64-NEXT: jmp .LBB1_6 ; X64-NEXT: .LBB1_4: ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X64-NEXT: .LBB1_6: # %entry ; X64-NEXT: testl %r8d, %r8d ; X64-NEXT: je .LBB1_7 -; X64-NEXT: .LBB1_8: # %entry +; X64-NEXT: # BB#8: # %entry ; X64-NEXT: xorps %xmm3, %xmm3 ; X64-NEXT: jmp .LBB1_9 ; X64-NEXT: .LBB1_7: diff --git a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll deleted file mode 100644 index 2c8c05b..0000000 --- a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll +++ /dev/null @@ -1,190 +0,0 @@ -; RUN: llc -O2 -o - %s | FileCheck %s -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind uwtable -; CHECK-LABEL: tail_dup_merge_loops -; CHECK: # %entry -; CHECK-NOT: # %{{[a-zA-Z_]+}} -; CHECK: # %inner_loop_exit -; CHECK-NOT: # %{{[a-zA-Z_]+}} -; CHECK: # %inner_loop_latch -; CHECK-NOT: # %{{[a-zA-Z_]+}} -; CHECK: # %inner_loop_test -; CHECK-NOT: # %{{[a-zA-Z_]+}} -; CHECK: # %exit -define void @tail_dup_merge_loops(i32 %a, i8* %b, i8* %c) local_unnamed_addr #0 { -entry: - %notlhs674.i = icmp eq i32 %a, 0 - br label %outer_loop_top - -outer_loop_top: ; preds = %inner_loop_exit, %entry - %dst.0.ph.i = phi i8* [ %b, %entry ], [ %scevgep679.i, %inner_loop_exit ] - br i1 %notlhs674.i, label %exit, label %inner_loop_preheader - -inner_loop_preheader: ; preds = %outer_loop_top - br label %inner_loop_top - -inner_loop_top: ; preds = %inner_loop_latch, %inner_loop_preheader - %dst.0.i = phi i8* [ %inc, %inner_loop_latch ], [ %dst.0.ph.i, %inner_loop_preheader ] - %var = load i8, i8* %dst.0.i - %tobool1.i = icmp slt i8 %var, 0 - br label %inner_loop_test - -inner_loop_test: ; preds = %inner_loop_top - br i1 %tobool1.i, label %inner_loop_exit, label %inner_loop_latch - -inner_loop_exit: ; preds = %inner_loop_test - %scevgep.i = getelementptr i8, i8* %dst.0.i, i64 1 - %scevgep679.i = getelementptr i8, i8* %scevgep.i, i64 0 - br label %outer_loop_top - -inner_loop_latch: ; preds = %inner_loop_test - %cmp75.i = icmp ult i8* %dst.0.i, %c - %inc = getelementptr i8, i8* %dst.0.i, i64 2 - br label %inner_loop_top - -exit: ; preds = %outer_loop_top - ret void -} - -@.str.6 = external unnamed_addr constant [23 x i8], align 1 - -; There is an erroneus check in LoopBase::addBasicBlockToLoop(), where it -; assumes that the header block for a loop is unique. -; For most of compilation this assumption is true, but during layout we allow -; this assumption to be violated. The following code will trigger the bug: - -; The loops in question is eventually headed by the block shared_loop_header -; -; During layout The block labeled outer_loop_header gets tail-duplicated into -; outer_loop_latch, and into shared_preheader, and then removed. This leaves -; shared_loop_header as the header of both loops. The end result -; is that there are 2 valid loops, and that they share a header. If we re-ran -; the loop analysis, it would classify this as a single loop. -; So far this is fine as far as layout is concerned. -; After layout we tail merge blocks merge_other and merge_predecessor_split. -; We do this even though they share only a single instruction, because -; merge_predecessor_split falls through to their shared successor: -; outer_loop_latch. -; The rest of the blocks in the function are noise unfortunately. Bugpoint -; couldn't shrink the test any further. - -; CHECK-LABEL: loop_shared_header -; CHECK: # %entry -; CHECK: # %shared_preheader -; CHECK: # %shared_loop_header -; CHECK: # %inner_loop_body -; CHECK: # %merge_predecessor_split -; CHECK: # %outer_loop_latch -; CHECK: # %outer_loop_latch -; CHECK: # %cleanup -define i32 @loop_shared_header(i8* %exe, i32 %exesz, i32 %headsize, i32 %min, i32 %wwprva, i32 %e_lfanew, i8* readonly %wwp, i32 %wwpsz, i16 zeroext %sects) local_unnamed_addr #0 { -entry: - %0 = load i32, i32* undef, align 4 - %mul = shl nsw i32 %0, 2 - br i1 undef, label %if.end19, label %cleanup - -if.end19: ; preds = %entry - %conv = zext i32 %mul to i64 - %call = tail call i8* @cli_calloc(i64 %conv, i64 1) - %1 = icmp eq i32 %exesz, 0 - %notrhs = icmp eq i32 %0, 0 - %or.cond117.not = or i1 %1, %notrhs - %or.cond202 = or i1 %or.cond117.not, undef - %cmp35 = icmp ult i8* undef, %exe - %or.cond203 = or i1 %or.cond202, %cmp35 - br i1 %or.cond203, label %cleanup, label %if.end50 - -if.end50: ; preds = %if.end19 - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %call, i8* undef, i64 %conv, i32 1, i1 false) - %cmp1.i.i = icmp ugt i32 %mul, 3 - br i1 %cmp1.i.i, label %shared_preheader, label %wunpsect.exit.thread.loopexit391 - -shared_preheader: ; preds = %if.end50 - br label %outer_loop_header - -outer_loop_header: ; preds = %outer_loop_latch, %shared_preheader - %bits.1.i = phi i8 [ 32, %shared_preheader ], [ %bits.43.i, %outer_loop_latch ] - %dst.0.ph.i = phi i8* [ undef, %shared_preheader ], [ %scevgep679.i, %outer_loop_latch ] - %2 = icmp eq i32 undef, 0 - br i1 %2, label %while.cond.us1412.i, label %shared_loop_header - -while.cond.us1412.i: ; preds = %outer_loop_header - %.pre.i = add i8 %bits.1.i, -1 - %tobool2.us1420.i = icmp eq i8 %.pre.i, 0 - %or.cond.us1421.i = or i1 undef, %tobool2.us1420.i - br i1 %or.cond.us1421.i, label %if.end41.us1436.i, label %cleanup - -if.end41.us1436.i: ; preds = %while.cond.us1412.i - unreachable - -shared_loop_header: ; preds = %dup_early2, %dup_early1 - %dst.0.i = phi i8* [ undef, %inner_loop_body ], [ %dst.0.ph.i, %outer_loop_header ], [ undef, %dead_block ] - %cmp3.i1172.i = icmp ult i8* undef, %call - br i1 %cmp3.i1172.i, label %wunpsect.exit.thread.loopexit389, label %inner_loop_body - -inner_loop_body: ; preds = %shared_loop_header - %3 = icmp slt i32 undef, 0 - br i1 %3, label %if.end96.i, label %shared_loop_header - -dead_block: ; preds = %inner_loop_body - %cmp75.i = icmp ult i8* %dst.0.i, undef - br label %shared_loop_header - -if.end96.i: ; preds = %inner_loop_body - %cmp97.i = icmp ugt i32 undef, 2 - br i1 %cmp97.i, label %if.then99.i, label %if.end287.i - -if.then99.i: ; preds = %if.end96.i - tail call void (i8*, ...) @cli_dbgmsg(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.6, i64 0, i64 0), i32 undef) - br label %cleanup - -if.end287.i: ; preds = %if.end96.i - %cmp291.i = icmp ne i32 undef, 1 - %conv294.i = select i1 %cmp291.i, i16 4, i16 3 - br i1 undef, label %if.end308.i, label %outer_loop_latch - -if.end308.i: ; preds = %if.end287.i - br i1 undef, label %if.end335.i, label %merge_predecessor_split - -merge_predecessor_split: ; preds = %if.end308.i - %4 = bitcast i8* undef to i32* - br label %outer_loop_latch - -if.end335.i: ; preds = %if.end308.i - br i1 undef, label %outer_loop_latch, label %merge_other - -merge_other: ; preds = %if.end335.i - br label %outer_loop_latch - -outer_loop_latch: ; preds = %merge_other, %if.end335.i, %merge_predecessor_split, %if.end287.i - %bits.43.i = phi i8 [ undef, %if.end287.i ], [ undef, %merge_other ], [ 32, %merge_predecessor_split ], [ 0, %if.end335.i ] - %backsize.0.i = phi i16 [ %conv294.i, %if.end287.i ], [ 0, %merge_other ], [ 0, %merge_predecessor_split ], [ 0, %if.end335.i ] - %5 = add i16 %backsize.0.i, -1 - %6 = zext i16 %5 to i64 - %scevgep.i = getelementptr i8, i8* %dst.0.ph.i, i64 1 - %scevgep679.i = getelementptr i8, i8* %scevgep.i, i64 %6 - br label %outer_loop_header - -wunpsect.exit.thread.loopexit389: ; preds = %shared_loop_header - unreachable - -wunpsect.exit.thread.loopexit391: ; preds = %if.end50 - unreachable - -cleanup: ; preds = %if.then99.i, %while.cond.us1412.i, %if.end19, %entry - %retval.0 = phi i32 [ 0, %if.then99.i ], [ 1, %entry ], [ 1, %if.end19 ], [ 1, %while.cond.us1412.i ] - ret i32 %retval.0 -} - -; Function Attrs: nounwind -declare void @cli_dbgmsg(i8*, ...) local_unnamed_addr #0 - -; Function Attrs: nounwind -declare i8* @cli_calloc(i64, i64) local_unnamed_addr #0 - -; Function Attrs: argmemonly nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1 -attributes #0 = { nounwind } -attributes #1 = { argmemonly nounwind } diff --git a/llvm/test/CodeGen/X86/tail-dup-repeat.ll b/llvm/test/CodeGen/X86/tail-dup-repeat.ll deleted file mode 100644 index 21b48e1..0000000 --- a/llvm/test/CodeGen/X86/tail-dup-repeat.ll +++ /dev/null @@ -1,53 +0,0 @@ -; RUN: llc -O2 -tail-dup-placement-threshold=4 -o - %s | FileCheck %s -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: uwtable -; When tail-duplicating during placement, we work backward from blocks with -; multiple successors. In this case, the block dup1 gets duplicated into dup2 -; and if.then64, and then the block dup2 gets duplicated into land.lhs.true -; and if.end70 -; CHECK-LABEL: repeated_tail_dup: -define void @repeated_tail_dup(i1 %a1, i1 %a2, i32* %a4, i32* %a5, i8* %a6) #0 align 2 { -entry: - br label %for.cond - -; CHECK: {{^}}.[[HEADER:LBB0_[1-9]]]: # %for.cond -for.cond: ; preds = %dup1, %entry - br i1 %a1, label %land.lhs.true, label %if.end56 - -land.lhs.true: ; preds = %for.cond - store i32 10, i32* %a4, align 8 - br label %dup2 - -if.end56: ; preds = %for.cond - br i1 %a2, label %if.then64, label %if.end70 - -if.then64: ; preds = %if.end56 - store i8 1, i8* %a6, align 1 - br label %dup1 - -; CHECK: # %if.end70 -; CHECK-NEXT: # in Loop: -; CHECK-NEXT: movl $12, (%rdx) -; CHECK-NEXT: movl $2, (%rcx) -; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: je .[[HEADER]] -if.end70: ; preds = %if.end56 - store i32 12, i32* %a4, align 8 - br label %dup2 - -dup2: ; preds = %if.end70, %land.lhs.true - store i32 2, i32* %a5, align 4 - br label %dup1 - -dup1: ; preds = %dup2, %if.then64 - %val = load i32, i32* %a4, align 8 - %switch = icmp ult i32 undef, 1 - br i1 %switch, label %for.cond, label %for.end - -for.end: ; preds = %dup1 - ret void -} - -attributes #0 = { uwtable } diff --git a/llvm/test/CodeGen/X86/update-terminator.mir b/llvm/test/CodeGen/X86/update-terminator.mir index 2e8e85b..1e75c6a 100644 --- a/llvm/test/CodeGen/X86/update-terminator.mir +++ b/llvm/test/CodeGen/X86/update-terminator.mir @@ -5,30 +5,17 @@ @a = external global i16 @b = external global i32 - declare void @dummy1() - declare void @dummy2() - declare void @dummy3() - ; Function Attrs: nounwind define void @f2() { br i1 undef, label %bb1, label %bb3 bb1: - call void @dummy1() - call void @dummy1() - call void @dummy1() br i1 undef, label %bb2, label %bb2 bb2: - call void @dummy2() - call void @dummy2() - call void @dummy2() br label %bb4 bb3: - call void @dummy3() - call void @dummy3() - call void @dummy3() br label %bb2 bb4: @@ -53,24 +40,15 @@ body: | bb.1: successors: %bb.2(100) - CALL64pcrel32 @dummy1, csr_64, implicit %rsp, implicit-def %rsp - CALL64pcrel32 @dummy1, csr_64, implicit %rsp, implicit-def %rsp - CALL64pcrel32 @dummy1, csr_64, implicit %rsp, implicit-def %rsp JNE_1 %bb.2, implicit %eflags bb.2: successors: %bb.4(100) - CALL64pcrel32 @dummy2, csr_64, implicit %rsp, implicit-def %rsp - CALL64pcrel32 @dummy2, csr_64, implicit %rsp, implicit-def %rsp - CALL64pcrel32 @dummy2, csr_64, implicit %rsp, implicit-def %rsp JMP_1 %bb.4 bb.3: successors: %bb.2(100) - CALL64pcrel32 @dummy3, csr_64, implicit %rsp, implicit-def %rsp - CALL64pcrel32 @dummy3, csr_64, implicit %rsp, implicit-def %rsp - CALL64pcrel32 @dummy3, csr_64, implicit %rsp, implicit-def %rsp JMP_1 %bb.2 bb.4: