#ifndef LLVM_CODEGEN_TAILDUPLICATOR_H
#define LLVM_CODEGEN_TAILDUPLICATOR_H
-#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
MachineRegisterInfo *MRI;
MachineFunction *MF;
bool PreRegAlloc;
- bool LayoutMode;
unsigned TailDupSize;
// A list of virtual registers for which to update SSA form.
public:
/// Prepare to run on a specific machine function.
- /// @param MF - Function that will be processed
- /// @param MBPI - Branch Probability Info. Used to propagate correct
- /// probabilities when modifying the CFG.
- /// @param LayoutMode - When true, don't use the existing layout to make
- /// decisions.
- /// @param TailDupSize - Maxmimum size of blocks to tail-duplicate. Zero
- /// default implies using the command line value TailDupSize.
+ /// @param TailDupSize - Maxmimum size of blocks to tail-duplicate.
void initMF(MachineFunction &MF,
const MachineBranchProbabilityInfo *MBPI,
- bool LayoutMode, unsigned TailDupSize = 0);
+ unsigned TailDupSize = 0);
bool tailDuplicateBlocks();
static bool isSimpleBB(MachineBasicBlock *TailBB);
bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB);
/// up.
/// If \p DuplicatePreds is not null, it will be updated to contain the list
/// of predecessors that received a copy of \p MBB.
- /// If \p RemovalCallback is non-null. It will be called before MBB is
- /// deleted.
bool tailDuplicateAndUpdate(
bool IsSimple, MachineBasicBlock *MBB,
- MachineBasicBlock *ForcedLayoutPred,
- SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds = nullptr,
- llvm::function_ref<void(MachineBasicBlock *)> *RemovalCallback = nullptr);
+ SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds = nullptr);
private:
typedef TargetInstrInfo::RegSubRegPair RegSubRegPair;
SmallVectorImpl<MachineBasicBlock *> &TDBBs,
const DenseSet<unsigned> &RegsUsedByPhi,
SmallVectorImpl<MachineInstr *> &Copies);
- bool tailDuplicate(bool IsSimple,
- MachineBasicBlock *TailBB,
- MachineBasicBlock *ForcedLayoutPred,
+ bool tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
SmallVectorImpl<MachineBasicBlock *> &TDBBs,
SmallVectorImpl<MachineInstr *> &Copies);
void appendCopies(MachineBasicBlock *MBB,
SmallVectorImpl<std::pair<unsigned,RegSubRegPair>> &CopyInfos,
SmallVectorImpl<MachineInstr *> &Copies);
- void removeDeadBlock(
- MachineBasicBlock *MBB,
- llvm::function_ref<void(MachineBasicBlock *)> *RemovalCallback = nullptr);
+ void removeDeadBlock(MachineBasicBlock *MBB);
};
} // End llvm namespace
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/TailDuplicator.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
static cl::opt<unsigned> JumpInstCost("jump-inst-cost",
cl::desc("Cost of jump instructions."),
cl::init(1), cl::Hidden);
-static cl::opt<bool>
-TailDupPlacement("tail-dup-placement",
- cl::desc("Perform tail duplication during placement. "
- "Creates more fallthrough opportunites in "
- "outline branches."),
- cl::init(true), cl::Hidden);
static cl::opt<bool>
BranchFoldPlacement("branch-fold-placement",
"Reduces code size."),
cl::init(true), cl::Hidden);
-// Heuristic for tail duplication.
-static cl::opt<unsigned> TailDuplicatePlacementThreshold(
- "tail-dup-placement-threshold",
- cl::desc("Instruction cutoff for tail duplication during layout. "
- "Tail merging during layout is forced to have a threshold "
- "that won't conflict."), cl::init(2),
- cl::Hidden);
-
extern cl::opt<unsigned> StaticLikelyProb;
extern cl::opt<unsigned> ProfileLikelyProb;
/// \brief End of blocks within the chain.
iterator end() { return Blocks.end(); }
- bool remove(MachineBasicBlock* BB) {
- for(iterator i = begin(); i != end(); ++i) {
- if (*i == BB) {
- Blocks.erase(i);
- return true;
- }
- }
- return false;
- }
-
/// \brief Merge a block chain into this one.
///
/// This routine merges a block chain into this one. It takes care of forming
/// \brief A handle to the post dominator tree.
MachineDominatorTree *MDT;
- /// \brief Duplicator used to duplicate tails during placement.
- ///
- /// Placement decisions can open up new tail duplication opportunities, but
- /// since tail duplication affects placement decisions of later blocks, it
- /// must be done inline.
- TailDuplicator TailDup;
-
/// \brief A set of blocks that are unavoidably execute, i.e. they dominate
/// all terminators of the MachineFunction.
SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks;
/// between basic blocks.
DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
- /// Decrease the UnscheduledPredecessors count for all blocks in chain, and
- /// if the count goes to 0, add them to the appropriate work list.
void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
const BlockFilterSet *BlockFilter = nullptr);
-
- /// Decrease the UnscheduledPredecessors count for a single block, and
- /// if the count goes to 0, add them to the appropriate work list.
- void markBlockSuccessors(
- BlockChain &Chain, MachineBasicBlock *BB, MachineBasicBlock *LoopHeaderBB,
- const BlockFilterSet *BlockFilter = nullptr);
-
-
BranchProbability
collectViableSuccessors(MachineBasicBlock *BB, BlockChain &Chain,
const BlockFilterSet *BlockFilter,
const BlockFilterSet *BlockFilter,
BranchProbability SuccProb,
BranchProbability HotProb);
- bool repeatedlyTailDuplicateBlock(
- MachineBasicBlock *BB, MachineBasicBlock *&LPred,
- MachineBasicBlock *LoopHeaderBB,
- BlockChain &Chain, BlockFilterSet *BlockFilter,
- MachineFunction::iterator &PrevUnplacedBlockIt);
- bool maybeTailDuplicateBlock(MachineBasicBlock *BB, MachineBasicBlock *LPred,
- const BlockChain &Chain,
- BlockFilterSet *BlockFilter,
- MachineFunction::iterator &PrevUnplacedBlockIt,
- bool &DuplicatedToPred);
bool
hasBetterLayoutPredecessor(MachineBasicBlock *BB, MachineBasicBlock *Succ,
BlockChain &SuccChain, BranchProbability SuccProb,
SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
const BlockFilterSet *BlockFilter);
void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
- BlockFilterSet *BlockFilter = nullptr);
+ const BlockFilterSet *BlockFilter = nullptr);
MachineBasicBlock *findBestLoopTop(MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
MachineBasicBlock *findBestLoopExit(MachineLoop &L,
/// When a chain is being merged into the "placed" chain, this routine will
/// quickly walk the successors of each block in the chain and mark them as
/// having one fewer active predecessor. It also adds any successors of this
-/// chain which reach the zero-predecessor state to the appropriate worklist.
+/// chain which reach the zero-predecessor state to the worklist passed in.
void MachineBlockPlacement::markChainSuccessors(
BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
const BlockFilterSet *BlockFilter) {
// Walk all the blocks in this chain, marking their successors as having
// a predecessor placed.
for (MachineBasicBlock *MBB : Chain) {
- markBlockSuccessors(Chain, MBB, LoopHeaderBB, BlockFilter);
- }
-}
-
-/// \brief Mark a single block's successors as having one fewer preds.
-///
-/// Under normal circumstances, this is only called by markChainSuccessors,
-/// but if a block that was to be placed is completely tail-duplicated away,
-/// and was duplicated into the chain end, we need to redo markBlockSuccessors
-/// for just that block.
-void MachineBlockPlacement::markBlockSuccessors(
- BlockChain &Chain, MachineBasicBlock *MBB, MachineBasicBlock *LoopHeaderBB,
- const BlockFilterSet *BlockFilter) {
- // Add any successors for which this is the only un-placed in-loop
- // predecessor to the worklist as a viable candidate for CFG-neutral
- // placement. No subsequent placement of this block will violate the CFG
- // shape, so we get to use heuristics to choose a favorable placement.
- for (MachineBasicBlock *Succ : MBB->successors()) {
- if (BlockFilter && !BlockFilter->count(Succ))
- continue;
- BlockChain &SuccChain = *BlockToChain[Succ];
- // Disregard edges within a fixed chain, or edges to the loop header.
- if (&Chain == &SuccChain || Succ == LoopHeaderBB)
- continue;
+ // Add any successors for which this is the only un-placed in-loop
+ // predecessor to the worklist as a viable candidate for CFG-neutral
+ // placement. No subsequent placement of this block will violate the CFG
+ // shape, so we get to use heuristics to choose a favorable placement.
+ for (MachineBasicBlock *Succ : MBB->successors()) {
+ if (BlockFilter && !BlockFilter->count(Succ))
+ continue;
+ BlockChain &SuccChain = *BlockToChain[Succ];
+ // Disregard edges within a fixed chain, or edges to the loop header.
+ if (&Chain == &SuccChain || Succ == LoopHeaderBB)
+ continue;
- // This is a cross-chain edge that is within the loop, so decrement the
- // loop predecessor count of the destination chain.
- if (SuccChain.UnscheduledPredecessors == 0 ||
- --SuccChain.UnscheduledPredecessors > 0)
- continue;
+ // This is a cross-chain edge that is within the loop, so decrement the
+ // loop predecessor count of the destination chain.
+ if (SuccChain.UnscheduledPredecessors == 0 ||
+ --SuccChain.UnscheduledPredecessors > 0)
+ continue;
- auto *MBB = *SuccChain.begin();
- if (MBB->isEHPad())
- EHPadWorkList.push_back(MBB);
- else
- BlockWorkList.push_back(MBB);
+ auto *MBB = *SuccChain.begin();
+ if (MBB->isEHPad())
+ EHPadWorkList.push_back(MBB);
+ else
+ BlockWorkList.push_back(MBB);
+ }
}
}
void MachineBlockPlacement::buildChain(
MachineBasicBlock *BB, BlockChain &Chain,
- BlockFilterSet *BlockFilter) {
+ const BlockFilterSet *BlockFilter) {
assert(BB && "BB must not be null.\n");
assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match.\n");
MachineFunction::iterator PrevUnplacedBlockIt = F->begin();
"layout successor until the CFG reduces\n");
}
- // Placement may have changed tail duplication opportunities.
- // Check for that now.
- if (TailDupPlacement && BestSucc) {
- // If the chosen successor was duplicated into all its predecessors,
- // don't bother laying it out, just go round the loop again with BB as
- // the chain end.
- if (repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
- BlockFilter, PrevUnplacedBlockIt))
- continue;
- }
-
// Place this block, updating the datastructures to reflect its placement.
BlockChain &SuccChain = *BlockToChain[BestSucc];
// Zero out UnscheduledPredecessors for the successor we're about to merge in case
}
}
-/// Tail duplicate \p BB into (some) predecessors if profitable, repeating if
-/// it was duplicated into its chain predecessor and removed.
-/// \p BB - Basic block that may be duplicated.
-///
-/// \p LPred - Chosen layout predecessor of \p BB.
-/// Updated to be the chain end if LPred is removed.
-/// \p Chain - Chain to which \p LPred belongs, and \p BB will belong.
-/// \p BlockFilter - Set of blocks that belong to the loop being laid out.
-/// Used to identify which blocks to update predecessor
-/// counts.
-/// \p PrevUnplacedBlockIt - Iterator pointing to the last block that was
-/// chosen in the given order due to unnatural CFG
-/// only needed if \p BB is removed and
-/// \p PrevUnplacedBlockIt pointed to \p BB.
-/// @return true if \p BB was removed.
-bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
- MachineBasicBlock *BB, MachineBasicBlock *&LPred,
- MachineBasicBlock *LoopHeaderBB,
- BlockChain &Chain, BlockFilterSet *BlockFilter,
- MachineFunction::iterator &PrevUnplacedBlockIt) {
- bool Removed, DuplicatedToLPred;
- Removed = maybeTailDuplicateBlock(BB, LPred, Chain, BlockFilter,
- PrevUnplacedBlockIt,
- DuplicatedToLPred);
- if (!Removed)
- return false;
- // If BB was duplicated into LPred, it is now scheduled. But because it was
- // removed, markChainSuccessors won't be called for its chain. Instead we call
- // markBlockSuccessors for LPred to achieve the same effect.
- if (DuplicatedToLPred)
- markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
- // Iteratively try to duplicate again. It can happen that a block that is
- // duplicated into is still small enough to be duplicated again.
- // No need to call markBlockSuccessors in this case, as the blocks being
- // duplicated from here on are already scheduled.
- // Note that DuplicatedToLPred always implies Removed.
- while (DuplicatedToLPred) {
- assert (Removed && "Block must have been removed to be duplicated into its "
- "layout predecessor.");
- MachineBasicBlock *DupBB, *DupPred;
- // The removal callback causes Chain.end() to be updated when a block is
- // removed. On the first pass through the loop, the chain end should be the
- // same as it was on function entry. On subsequent passes, because we are
- // duplicating the block at the end of the chain, if it is removed the
- // chain will have shrunk by one block.
- BlockChain::iterator ChainEnd = Chain.end();
- DupBB = *(--ChainEnd);
- // Now try to duplicate again.
- if (ChainEnd == Chain.begin())
- break;
- DupPred = *std::prev(ChainEnd);
- Removed = maybeTailDuplicateBlock(DupBB, DupPred, Chain, BlockFilter,
- PrevUnplacedBlockIt,
- DuplicatedToLPred);
- }
- LPred = *std::prev(Chain.end());
- return true;
-}
-
-/// Tail duplicate \p BB into (some) predecessors if profitable.
-/// \p BB - Basic block that may be duplicated
-/// \p LPred - Chosen layout predecessor of \p BB
-/// \p Chain - Chain to which \p LPred belongs, and \p BB will belong.
-/// \p BlockFilter - Set of blocks that belong to the loop being laid out.
-/// Used to identify which blocks to update predecessor
-/// counts.
-/// \p PrevUnplacedBlockIt - Iterator pointing to the last block that was
-/// chosen in the given order due to unnatural CFG
-/// only needed if \p BB is removed and
-/// \p PrevUnplacedBlockIt pointed to \p BB.
-/// \p DuplicatedToLPred - True if the block was duplicated into LPred. Will
-/// only be true if the block was removed.
-/// \return - True if the block was duplicated into all preds and removed.
-bool MachineBlockPlacement::maybeTailDuplicateBlock(
- MachineBasicBlock *BB, MachineBasicBlock *LPred,
- const BlockChain &Chain, BlockFilterSet *BlockFilter,
- MachineFunction::iterator &PrevUnplacedBlockIt,
- bool &DuplicatedToLPred) {
-
- DuplicatedToLPred = false;
- DEBUG(dbgs() << "Redoing tail duplication for Succ#"
- << BB->getNumber() << "\n");
- bool IsSimple = TailDup.isSimpleBB(BB);
- // Blocks with single successors don't create additional fallthrough
- // opportunities. Don't duplicate them. TODO: When conditional exits are
- // analyzable, allow them to be duplicated.
- if (!IsSimple && BB->succ_size() == 1)
- return false;
- if (!TailDup.shouldTailDuplicate(IsSimple, *BB))
- return false;
- // This has to be a callback because none of it can be done after
- // BB is deleted.
- bool Removed = false;
- auto RemovalCallback =
- [&](MachineBasicBlock *RemBB) {
- // Signal to outer function
- Removed = true;
-
- // Conservative default.
- bool InWorkList = true;
- // Remove from the Chain and Chain Map
- if (BlockToChain.count(RemBB)) {
- BlockChain *Chain = BlockToChain[RemBB];
- InWorkList = Chain->UnscheduledPredecessors == 0;
- Chain->remove(RemBB);
- BlockToChain.erase(RemBB);
- }
-
- // Handle the unplaced block iterator
- if (&(*PrevUnplacedBlockIt) == RemBB) {
- PrevUnplacedBlockIt++;
- }
-
- // Handle the Work Lists
- if (InWorkList) {
- SmallVectorImpl<MachineBasicBlock *> &RemoveList = BlockWorkList;
- if (RemBB->isEHPad())
- RemoveList = EHPadWorkList;
- RemoveList.erase(
- remove_if(RemoveList,
- [RemBB](MachineBasicBlock *BB) {return BB == RemBB;}),
- RemoveList.end());
- }
-
- // Handle the filter set
- if (BlockFilter) {
- BlockFilter->erase(RemBB);
- }
-
- // Remove the block from loop info.
- MLI->removeBlock(RemBB);
-
- // TailDuplicator handles removing it from loops.
- DEBUG(dbgs() << "TailDuplicator deleted block: "
- << getBlockName(RemBB) << "\n");
- };
- auto RemovalCallbackRef =
- llvm::function_ref<void(MachineBasicBlock*)>(RemovalCallback);
-
- SmallVector<MachineBasicBlock *, 8> DuplicatedPreds;
- TailDup.tailDuplicateAndUpdate(IsSimple, BB, LPred,
- &DuplicatedPreds, &RemovalCallbackRef);
-
- // Update UnscheduledPredecessors to reflect tail-duplication.
- DuplicatedToLPred = false;
- for (MachineBasicBlock *Pred : DuplicatedPreds) {
- // We're only looking for unscheduled predecessors that match the filter.
- BlockChain* PredChain = BlockToChain[Pred];
- if (Pred == LPred)
- DuplicatedToLPred = true;
- if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred))
- || PredChain == &Chain)
- continue;
- for (MachineBasicBlock *NewSucc : Pred->successors()) {
- if (BlockFilter && !BlockFilter->count(NewSucc))
- continue;
- BlockChain *NewChain = BlockToChain[NewSucc];
- if (NewChain != &Chain && NewChain != PredChain)
- NewChain->UnscheduledPredecessors++;
- }
- }
- return Removed;
-}
-
bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()))
return false;
TII = MF.getSubtarget().getInstrInfo();
TLI = MF.getSubtarget().getTargetLowering();
MDT = &getAnalysis<MachineDominatorTree>();
- if (TailDupPlacement) {
- unsigned TailDupSize = TailDuplicatePlacementThreshold;
- if (MF.getFunction()->optForSize())
- TailDupSize = 1;
- TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize);
- }
-
assert(BlockToChain.empty());
buildCFGChains();
BranchFoldPlacement;
// No tail merging opportunities if the block number is less than four.
if (MF.size() > 3 && EnableTailMerge) {
- unsigned TailMergeSize = TailDuplicatePlacementThreshold + 1;
+ // Default to the standard tail-merge-size option.
+ unsigned TailMergeSize = 0;
BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
*MBPI, TailMergeSize);
/*AfterBlockPlacement=*/true)) {
// Redo the layout if tail merging creates/removes/moves blocks.
BlockToChain.clear();
- // Must redo the dominator tree if blocks were changed.
- MDT->runOnMachineFunction(MF);
ChainAllocator.DestroyAll();
buildCFGChains();
}
auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
- Duplicator.initMF(MF, MBPI, /* LayoutMode */ false);
+ Duplicator.initMF(MF, MBPI);
bool MadeChange = false;
while (Duplicator.tailDuplicateBlocks())
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
void TailDuplicator::initMF(MachineFunction &MFin,
const MachineBranchProbabilityInfo *MBPIin,
- bool LayoutModeIn, unsigned TailDupSizeIn) {
+ unsigned TailDupSizeIn) {
MF = &MFin;
TII = MF->getSubtarget().getInstrInfo();
TRI = MF->getSubtarget().getRegisterInfo();
assert(MBPI != nullptr && "Machine Branch Probability Info required");
- LayoutMode = LayoutModeIn;
PreRegAlloc = MRI->isSSA();
}
/// Tail duplicate the block and cleanup.
/// \p IsSimple - return value of isSimpleBB
/// \p MBB - block to be duplicated
-/// \p ForcedLayoutPred - If non-null, treat this block as the layout
-/// predecessor, instead of using the ordering in MF
/// \p DuplicatedPreds - if non-null, \p DuplicatedPreds will contain a list of
/// all Preds that received a copy of \p MBB.
-/// \p RemovalCallback - if non-null, called just before MBB is deleted.
bool TailDuplicator::tailDuplicateAndUpdate(
bool IsSimple, MachineBasicBlock *MBB,
- MachineBasicBlock *ForcedLayoutPred,
- SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds,
- llvm::function_ref<void(MachineBasicBlock *)> *RemovalCallback) {
+ SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds) {
// Save the successors list.
SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(),
MBB->succ_end());
SmallVector<MachineBasicBlock *, 8> TDBBs;
SmallVector<MachineInstr *, 16> Copies;
- if (!tailDuplicate(IsSimple, MBB, ForcedLayoutPred, TDBBs, Copies))
+ if (!tailDuplicate(IsSimple, MBB, TDBBs, Copies))
return false;
++NumTails;
// If it is dead, remove it.
if (isDead) {
NumTailDupRemoved += MBB->size();
- removeDeadBlock(MBB, RemovalCallback);
+ removeDeadBlock(MBB);
++NumDeadBlocks;
}
if (!shouldTailDuplicate(IsSimple, *MBB))
continue;
- MadeChange |= tailDuplicateAndUpdate(IsSimple, MBB, nullptr);
+ MadeChange |= tailDuplicateAndUpdate(IsSimple, MBB);
}
if (PreRegAlloc && TailDupVerify)
/// Determine if it is profitable to duplicate this block.
bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
MachineBasicBlock &TailBB) {
- // When doing tail-duplication during layout, the block ordering is in flux,
- // so canFallThrough returns a result based on incorrect information and
- // should just be ignored.
- if (!LayoutMode && TailBB.canFallThrough())
+ // Only duplicate blocks that end with unconditional branches.
+ if (TailBB.canFallThrough())
return false;
// Don't try to tail-duplicate single-block loops.
bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB,
MachineBasicBlock *PredBB) {
- // EH edges are ignored by analyzeBranch.
+ // EH edges are ignored by AnalyzeBranch.
if (PredBB->succ_size() > 1)
return false;
/// If it is profitable, duplicate TailBB's contents in each
/// of its predecessors.
-/// \p IsSimple result of isSimpleBB
-/// \p TailBB Block to be duplicated.
-/// \p ForcedLayoutPred When non-null, use this block as the layout predecessor
-/// instead of the previous block in MF's order.
-/// \p TDBBs A vector to keep track of all blocks tail-duplicated
-/// into.
-/// \p Copies A vector of copy instructions inserted. Used later to
-/// walk all the inserted copies and remove redundant ones.
bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
- MachineBasicBlock *ForcedLayoutPred,
SmallVectorImpl<MachineBasicBlock *> &TDBBs,
SmallVectorImpl<MachineInstr *> &Copies) {
DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
continue;
// Don't duplicate into a fall-through predecessor (at least for now).
- bool IsLayoutSuccessor = false;
- if (ForcedLayoutPred)
- IsLayoutSuccessor = (ForcedLayoutPred == PredBB);
- else if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
- IsLayoutSuccessor = true;
- if (IsLayoutSuccessor)
+ if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
continue;
DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
// If TailBB was duplicated into all its predecessors except for the prior
// block, which falls through unconditionally, move the contents of this
// block into the prior block.
- MachineBasicBlock *PrevBB = ForcedLayoutPred;
- if (!PrevBB)
- PrevBB = &*std::prev(TailBB->getIterator());
+ MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator());
MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
SmallVector<MachineOperand, 4> PriorCond;
// This has to check PrevBB->succ_size() because EH edges are ignored by
- // analyzeBranch.
+ // AnalyzeBranch.
if (PrevBB->succ_size() == 1 &&
// Layout preds are not always CFG preds. Check.
*PrevBB->succ_begin() == TailBB &&
!TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
- PriorCond.empty() &&
- (!PriorTBB || PriorTBB == TailBB) &&
- TailBB->pred_size() == 1 &&
+ PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
!TailBB->hasAddressTaken()) {
DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
<< "From MBB: " << *TailBB);
}
appendCopies(PrevBB, CopyInfos, Copies);
} else {
- TII->removeBranch(*PrevBB);
// No PHIs to worry about, just splice the instructions over.
PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
}
/// Remove the specified dead machine basic block from the function, updating
/// the CFG.
-void TailDuplicator::removeDeadBlock(
- MachineBasicBlock *MBB,
- llvm::function_ref<void(MachineBasicBlock *)> *RemovalCallback) {
+void TailDuplicator::removeDeadBlock(MachineBasicBlock *MBB) {
assert(MBB->pred_empty() && "MBB must be dead!");
DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
- if (RemovalCallback)
- (*RemovalCallback)(MBB);
-
// Remove all successors.
while (!MBB->succ_empty())
MBB->removeSuccessor(MBB->succ_end() - 1);
%tmp2 = load i16, i16* %ptr, align 2
br label %bb2
bb2:
+; CHECK: %bb2
; CHECK-NOT: and {{w[0-9]+}}, [[REG]], #0xffff
; CHECK: cmp [[REG]], #23
%tmp3 = phi i16 [ 0, %entry ], [ %tmp2, %bb1 ]
-; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 -tail-dup-placement=0 | FileCheck %s
-; -tail-dup-placement causes tail duplication during layout. This breaks the
-; assumptions of the test case as written (specifically, it creates an
-; additional cmp instruction, creating a false positive), so we pass
-; -tail-dup-placement=0 to restore the original behavior
+; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 | FileCheck %s
; marked as external to prevent possible optimizations
@a = external global i32
br label %bb2
bb2: ; preds = %bb1, %entry
+; CHECK: bb2
; CHECK: cmp [[REG]], #0
; CHECK: ble
%indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
-; RUN: llc -verify-machineinstrs < %s -march=ppc32 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -march=ppc32 | \
+; RUN: grep "b LBB.*" | count 4
target datalayout = "E-p:32:32"
target triple = "powerpc-apple-darwin8.7.0"
-;CHECK-LABEL: foo:
-; There are 4 inner loops (%bb, %bb12, %bb25, %bb38) that all exit to %cond_next48
-; The last (whichever it is) should have a fallthrough exit, and the other three
-; need an unconditional branch. No other block should have an unconditional
-; branch to cond_next48
-; One of the blocks ends up with a loop exit block that gets a tail-duplicated copy
-; of %cond_next48, so there should only be two unconditional branches.
-
-;CHECK: b LBB0_13
-;CHECK: b LBB0_13
-;CHECK-NOT: b LBB0_13
-;CHECK: LBB0_13: ; %cond_next48
-
define void @foo(i32 %W, i32 %X, i32 %Y, i32 %Z) {
entry:
%tmp1 = and i32 %W, 1 ; <i32> [#uses=1]
; CHECK-DAG: std [[REGA]], [[OFF:[0-9]+]](31) # 8-byte Folded Spill
; CHECK-DAG: std 1, 16([[REGA]])
; CHECK-DAG: std 2, 24([[REGA]])
-; CHECK: bcl 20, 31, .LBB1_3
+; CHECK: bcl 20, 31, .LBB1_5
; CHECK: li 3, 1
-; CHECK: #EH_SjLj_Setup .LBB1_3
+; CHECK: #EH_SjLj_Setup .LBB1_5
; CHECK: b .LBB1_1
-; CHECK: .LBB1_3:
-; CHECK: mflr [[REGL:[0-9]+]]
-; CHECK: ld [[REG2:[0-9]+]], [[OFF]](31) # 8-byte Folded Reload
-; CHECK: std [[REGL]], 8([[REG2]])
-; CHECK: li 3, 0
-
-; CHECK: .LBB1_5:
+; CHECK: .LBB1_4:
; CHECK: lfd
; CHECK: lvx
; CHECK: ld
; CHECK: blr
+; CHECK: .LBB1_5:
+; CHECK: mflr [[REGL:[0-9]+]]
+; CHECK: ld [[REG2:[0-9]+]], [[OFF]](31) # 8-byte Folded Reload
+; CHECK: std [[REGL]], 8([[REG2]])
+; CHECK: li 3, 0
+
; CHECK-NOAV: @main
; CHECK-NOAV-NOT: stvx
; CHECK-NOAV: bcl
+++ /dev/null
-; RUN: llc -outline-optional-branches -O2 < %s | FileCheck %s
-target datalayout = "e-m:e-i64:64-n32:64"
-target triple = "powerpc64le-grtev4-linux-gnu"
-
-; Intended layout:
-; The outlining flag produces the layout
-; test1
-; test2
-; test3
-; test4
-; exit
-; optional1
-; optional2
-; optional3
-; optional4
-; Tail duplication puts test n+1 at the end of optional n
-; so optional1 includes a copy of test2 at the end, and branches
-; to test3 (at the top) or falls through to optional 2.
-; The CHECK statements check for the whole string of tests and exit block,
-; and then check that the correct test has been duplicated into the end of
-; the optional blocks and that the optional blocks are in the correct order.
-;CHECK-LABEL: f:
-; test1 may have been merged with entry
-;CHECK: mr [[TAGREG:[0-9]+]], 3
-;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
-;CHECK-NEXT: bc 12, 1, [[OPT1LABEL:[._0-9A-Za-z]+]]
-;CHECK-NEXT: [[TEST2LABEL:[._0-9A-Za-z]+]]: # %test2
-;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
-;CHECK-NEXT: bne 0, [[OPT2LABEL:[._0-9A-Za-z]+]]
-;CHECK-NEXT: [[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3
-;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
-;CHECK-NEXT: bne 0, .[[OPT3LABEL:[._0-9A-Za-z]+]]
-;CHECK-NEXT: [[TEST4LABEL:[._0-9A-Za-z]+]]: # %test4
-;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
-;CHECK-NEXT: bne 0, .[[OPT4LABEL:[._0-9A-Za-z]+]]
-;CHECK-NEXT: [[EXITLABEL:[._0-9A-Za-z]+]]: # %exit
-;CHECK: blr
-;CHECK-NEXT: [[OPT1LABEL]]
-;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
-;CHECK-NEXT: beq 0, [[TEST3LABEL]]
-;CHECK-NEXT: [[OPT2LABEL]]
-;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
-;CHECK-NEXT: beq 0, [[TEST4LABEL]]
-;CHECK-NEXT: [[OPT3LABEL]]
-;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
-;CHECK-NEXT: beq 0, [[EXITLABEL]]
-;CHECK-NEXT: [[OPT4LABEL]]
-;CHECK: b [[EXITLABEL]]
-
-define void @f(i32 %tag) {
-entry:
- br label %test1
-test1:
- %tagbit1 = and i32 %tag, 1
- %tagbit1eq0 = icmp eq i32 %tagbit1, 0
- br i1 %tagbit1eq0, label %test2, label %optional1
-optional1:
- call void @a()
- call void @a()
- call void @a()
- call void @a()
- br label %test2
-test2:
- %tagbit2 = and i32 %tag, 2
- %tagbit2eq0 = icmp eq i32 %tagbit2, 0
- br i1 %tagbit2eq0, label %test3, label %optional2
-optional2:
- call void @b()
- call void @b()
- call void @b()
- call void @b()
- br label %test3
-test3:
- %tagbit3 = and i32 %tag, 4
- %tagbit3eq0 = icmp eq i32 %tagbit3, 0
- br i1 %tagbit3eq0, label %test4, label %optional3
-optional3:
- call void @c()
- call void @c()
- call void @c()
- call void @c()
- br label %test4
-test4:
- %tagbit4 = and i32 %tag, 8
- %tagbit4eq0 = icmp eq i32 %tagbit4, 0
- br i1 %tagbit4eq0, label %exit, label %optional4
-optional4:
- call void @d()
- call void @d()
- call void @d()
- call void @d()
- br label %exit
-exit:
- ret void
-}
-
-declare void @a()
-declare void @b()
-declare void @c()
-declare void @d()
; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false | FileCheck %s
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -tail-dup-placement=0 -verify-machineinstrs -fast-isel=false | FileCheck -check-prefix=OPT %s
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -verify-machineinstrs -fast-isel=false | FileCheck -check-prefix=OPT %s
; Test the CFG stackifier pass.
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -tail-dup-placement=0| FileCheck %s
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt | FileCheck %s
; Test memcpy, memmove, and memset intrinsics.
ret i32 %sum
}
-; Tail duplication during layout can entirely remove body0 by duplicating it
-; into the entry block and into body1. This is a good thing but it isn't what
-; this test is looking for. So to make the blocks longer so they don't get
-; duplicated, we add some calls to dummy.
-declare void @dummy()
-
define i32 @test_loop_rotate(i32 %i, i32* %a) {
; Check that we rotate conditional exits from the loop to the bottom of the
; loop, eliminating unconditional branches to the top.
%base = phi i32 [ 0, %entry ], [ %sum, %body1 ]
%next = add i32 %iv, 1
%exitcond = icmp eq i32 %next, %i
- call void @dummy()
- call void @dummy()
br i1 %exitcond, label %exit, label %body1
body1:
; First rotated loop top.
; CHECK: .p2align
; CHECK: %while.end
-; %for.cond gets completely tail-duplicated away.
+; CHECK: %for.cond
; CHECK: %if.then
; CHECK: %if.else
; CHECK: %if.end10
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: je [[LABEL_BB6:.*]]
; CHECK: movl %edi, %eax
-; CHECK-NEXT: retq
; CHECK: [[LABEL_BB6]]
; CHECK-NEXT: movl %esi, %edi
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: retq
+; CHECK-NEXT: jmp
;
%cmp = icmp ne i32 %a, 0
%sel = select i1 %cmp, i32 %a, i32 %b, !prof !2
; CHECK-LABEL: fmaddsubpd_loop_128:
; CHECK: vfmaddsub231pd %xmm1, %xmm0, %xmm2
-; CHECK: vmovapd %xmm2, %xmm0
+; CHECK: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
define <2 x double> @fmaddsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
entry:
; CHECK-LABEL: fmsubaddpd_loop_128:
; CHECK: vfmsubadd231pd %xmm1, %xmm0, %xmm2
-; CHECK: vmovapd %xmm2, %xmm0
+; CHECK: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
define <2 x double> @fmsubaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
entry:
; CHECK-LABEL: fmaddpd_loop_128:
; CHECK: vfmadd231pd %xmm1, %xmm0, %xmm2
-; CHECK: vmovapd %xmm2, %xmm0
+; CHECK: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
define <2 x double> @fmaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
entry:
; CHECK-LABEL: fmsubpd_loop_128:
; CHECK: vfmsub231pd %xmm1, %xmm0, %xmm2
-; CHECK: vmovapd %xmm2, %xmm0
+; CHECK: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
define <2 x double> @fmsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
entry:
; CHECK-LABEL: fnmaddpd_loop_128:
; CHECK: vfnmadd231pd %xmm1, %xmm0, %xmm2
-; CHECK: vmovapd %xmm2, %xmm0
+; CHECK: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
define <2 x double> @fnmaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
entry:
; CHECK-LABEL: fnmsubpd_loop_128:
; CHECK: vfnmsub231pd %xmm1, %xmm0, %xmm2
-; CHECK: vmovapd %xmm2, %xmm0
+; CHECK: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
define <2 x double> @fnmsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
entry:
; CHECK-LABEL: fmaddsubpd_loop_256:
; CHECK: vfmaddsub231pd %ymm1, %ymm0, %ymm2
-; CHECK: vmovapd %ymm2, %ymm0
+; CHECK: vmovaps %ymm2, %ymm0
; CHECK-NEXT: retq
define <4 x double> @fmaddsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
entry:
; CHECK-LABEL: fmsubaddpd_loop_256:
; CHECK: vfmsubadd231pd %ymm1, %ymm0, %ymm2
-; CHECK: vmovapd %ymm2, %ymm0
+; CHECK: vmovaps %ymm2, %ymm0
; CHECK-NEXT: retq
define <4 x double> @fmsubaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
entry:
; CHECK-LABEL: fmaddpd_loop_256:
; CHECK: vfmadd231pd %ymm1, %ymm0, %ymm2
-; CHECK: vmovapd %ymm2, %ymm0
+; CHECK: vmovaps %ymm2, %ymm0
; CHECK-NEXT: retq
define <4 x double> @fmaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
entry:
; CHECK-LABEL: fmsubpd_loop_256:
; CHECK: vfmsub231pd %ymm1, %ymm0, %ymm2
-; CHECK: vmovapd %ymm2, %ymm0
+; CHECK: vmovaps %ymm2, %ymm0
; CHECK-NEXT: retq
define <4 x double> @fmsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
entry:
; CHECK-LABEL: fnmaddpd_loop_256:
; CHECK: vfnmadd231pd %ymm1, %ymm0, %ymm2
-; CHECK: vmovapd %ymm2, %ymm0
+; CHECK: vmovaps %ymm2, %ymm0
; CHECK-NEXT: retq
define <4 x double> @fnmaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
entry:
; CHECK-LABEL: fnmsubpd_loop_256:
; CHECK: vfnmsub231pd %ymm1, %ymm0, %ymm2
-; CHECK: vmovapd %ymm2, %ymm0
+; CHECK: vmovaps %ymm2, %ymm0
; CHECK-NEXT: retq
define <4 x double> @fnmsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
entry:
; CHECK-NEXT: ucomisd %xmm1, %xmm0
; CHECK-NEXT: jne .LBB1_1
; CHECK-NEXT: jp .LBB1_1
-; CHECK-NEXT: # %bb2
+; CHECK-NEXT: .LBB1_2: # %bb2
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB1_1: # %bb1
; CHECK-NEXT: addsd {{.*}}(%rip), %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: jmp .LBB1_2
entry:
%mul = fmul double %x, %y
br label %l1
}
-; It is correct for either l1 or l2 to be removed.
-; If l2 is removed, the message should be "Address of block that was removed by CodeGen"
-; If l1 is removed, it should be "Block address taken."
-; CHECK: .Ltmp0: # {{Address of block that was removed by CodeGen|Block address taken}}
+; CHECK: .Ltmp0: # Address of block that was removed by CodeGen
; CHECK: .quad .Ltmp0
; This testing case is reduced from 197.parser prune_match function.
; We make sure register copies are not generated on isupper.exit blocks.
-; isupper.exit and isupper.exit223 get tail-duplicated into all their
-; predecessors.
-; CHECK: cond.true.i.i
+; CHECK: isupper.exit
; CHECK-NEXT: in Loop
-; Mem-move
-; CHECK-NEXT: movl
-; CHECK-NEXT: andl
; CHECK-NEXT: testl
; CHECK-NEXT: jne
-; CHECK: cond.true.i.i217
+; CHECK: isupper.exit
; CHECK-NEXT: in Loop
-; Mem-move
-; CHECK-NEXT: movl
-; CHECK-NEXT: andl
; CHECK-NEXT: testl
; CHECK-NEXT: je
-; CHECK: cond.false.i.i
; CHECK: maskrune
-; CHECK-NEXT: movzbl
-; CHECK-NEXT: movzbl
-; CHECK-NEXT: testl
-; CHECK-NEXT: je
-; CHECK: cond.false.i.i219
; CHECK: maskrune
-; CHECK-NEXT: movzbl
-; CHECK-NEXT: movzbl
-; CHECK-NEXT: testl
-; CHECK-NEXT: jne
%struct.List_o_links_struct = type { i32, i32, i32, %struct.List_o_links_struct* }
%struct.Connector_struct = type { i16, i16, i8, i8, %struct.Connector_struct*, i8* }
; X32-NEXT: je .LBB1_1
; X32-NEXT: # BB#2: # %entry
; X32-NEXT: xorps %xmm1, %xmm1
-; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; X32-NEXT: jne .LBB1_5
-; X32-NEXT: jmp .LBB1_4
+; X32-NEXT: jmp .LBB1_3
; X32-NEXT: .LBB1_1:
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT: .LBB1_3: # %entry
; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; X32-NEXT: je .LBB1_4
-; X32-NEXT: .LBB1_5: # %entry
+; X32-NEXT: # BB#5: # %entry
; X32-NEXT: xorps %xmm2, %xmm2
-; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; X32-NEXT: jne .LBB1_8
-; X32-NEXT: jmp .LBB1_7
+; X32-NEXT: jmp .LBB1_6
; X32-NEXT: .LBB1_4:
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X32-NEXT: .LBB1_6: # %entry
; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; X32-NEXT: je .LBB1_7
-; X32-NEXT: .LBB1_8: # %entry
+; X32-NEXT: # BB#8: # %entry
; X32-NEXT: xorps %xmm3, %xmm3
; X32-NEXT: jmp .LBB1_9
; X32-NEXT: .LBB1_7:
; X64-NEXT: je .LBB1_1
; X64-NEXT: # BB#2: # %entry
; X64-NEXT: xorps %xmm1, %xmm1
-; X64-NEXT: testl %edx, %edx
-; X64-NEXT: jne .LBB1_5
-; X64-NEXT: jmp .LBB1_4
+; X64-NEXT: jmp .LBB1_3
; X64-NEXT: .LBB1_1:
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT: .LBB1_3: # %entry
; X64-NEXT: testl %edx, %edx
; X64-NEXT: je .LBB1_4
-; X64-NEXT: .LBB1_5: # %entry
+; X64-NEXT: # BB#5: # %entry
; X64-NEXT: xorps %xmm2, %xmm2
-; X64-NEXT: testl %r8d, %r8d
-; X64-NEXT: jne .LBB1_8
-; X64-NEXT: jmp .LBB1_7
+; X64-NEXT: jmp .LBB1_6
; X64-NEXT: .LBB1_4:
; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X64-NEXT: .LBB1_6: # %entry
; X64-NEXT: testl %r8d, %r8d
; X64-NEXT: je .LBB1_7
-; X64-NEXT: .LBB1_8: # %entry
+; X64-NEXT: # BB#8: # %entry
; X64-NEXT: xorps %xmm3, %xmm3
; X64-NEXT: jmp .LBB1_9
; X64-NEXT: .LBB1_7:
+++ /dev/null
-; RUN: llc -O2 -tail-dup-placement-threshold=4 -o - %s | FileCheck %s
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-; Function Attrs: uwtable
-; When tail-duplicating during placement, we work backward from blocks with
-; multiple successors. In this case, the block dup1 gets duplicated into dup2
-; and if.then64, and then the block dup2 gets duplicated into land.lhs.true
-; and if.end70
-; CHECK-LABEL: repeated_tail_dup:
-define void @repeated_tail_dup(i1 %a1, i1 %a2, i32* %a4, i32* %a5, i8* %a6) #0 align 2 {
-entry:
- br label %for.cond
-
-; CHECK: {{^}}.[[HEADER:LBB0_[1-9]]]: # %for.cond
-for.cond: ; preds = %dup1, %entry
- br i1 %a1, label %land.lhs.true, label %if.end56
-
-land.lhs.true: ; preds = %for.cond
- store i32 10, i32* %a4, align 8
- br label %dup2
-
-if.end56: ; preds = %for.cond
- br i1 %a2, label %if.then64, label %if.end70
-
-if.then64: ; preds = %if.end56
- store i8 1, i8* %a6, align 1
- br label %dup1
-
-; CHECK: # %if.end70
-; CHECK-NEXT: # in Loop:
-; CHECK-NEXT: movl $12, (%rdx)
-; CHECK-NEXT: movl $2, (%rcx)
-; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: je .[[HEADER]]
-if.end70: ; preds = %if.end56
- store i32 12, i32* %a4, align 8
- br label %dup2
-
-dup2: ; preds = %if.end70, %land.lhs.true
- store i32 2, i32* %a5, align 4
- br label %dup1
-
-dup1: ; preds = %dup2, %if.then64
- %val = load i32, i32* %a4, align 8
- %switch = icmp ult i32 undef, 1
- br i1 %switch, label %for.cond, label %for.end
-
-for.end: ; preds = %dup1
- ret void
-}
-
-attributes #0 = { uwtable }
@a = external global i16
@b = external global i32
- declare void @dummy1()
- declare void @dummy2()
- declare void @dummy3()
-
; Function Attrs: nounwind
define void @f2() {
br i1 undef, label %bb1, label %bb3
bb1:
- call void @dummy1()
- call void @dummy1()
- call void @dummy1()
br i1 undef, label %bb2, label %bb2
bb2:
- call void @dummy2()
- call void @dummy2()
- call void @dummy2()
br label %bb4
bb3:
- call void @dummy3()
- call void @dummy3()
- call void @dummy3()
br label %bb2
bb4:
bb.1:
successors: %bb.2(100)
- CALL64pcrel32 @dummy1, csr_64, implicit %rsp, implicit-def %rsp
- CALL64pcrel32 @dummy1, csr_64, implicit %rsp, implicit-def %rsp
- CALL64pcrel32 @dummy1, csr_64, implicit %rsp, implicit-def %rsp
JNE_1 %bb.2, implicit %eflags
bb.2:
successors: %bb.4(100)
- CALL64pcrel32 @dummy2, csr_64, implicit %rsp, implicit-def %rsp
- CALL64pcrel32 @dummy2, csr_64, implicit %rsp, implicit-def %rsp
- CALL64pcrel32 @dummy2, csr_64, implicit %rsp, implicit-def %rsp
JMP_1 %bb.4
bb.3:
successors: %bb.2(100)
- CALL64pcrel32 @dummy3, csr_64, implicit %rsp, implicit-def %rsp
- CALL64pcrel32 @dummy3, csr_64, implicit %rsp, implicit-def %rsp
- CALL64pcrel32 @dummy3, csr_64, implicit %rsp, implicit-def %rsp
JMP_1 %bb.2
bb.4: