/// This pass adds dead/undef flags after analyzing subregister lanes.
extern char &DetectDeadLanesID;
+ /// This pass perform post-ra machine sink for COPY instructions.
+ extern char &PostRAMachineSinkingID;
+
/// FastRegisterAllocation Pass - This pass register allocates as fast as
/// possible. It is best suited for debug code where live ranges are short.
///
/// even if it has glue.
virtual bool canCopyGluedNodeDuringSchedule(SDNode *N) const { return false; }
+ /// Remember what registers the specified instruction uses and modifies.
+ virtual void trackRegDefsUses(const MachineInstr &MI, BitVector &ModifiedRegs,
+ BitVector &UsedRegs,
+ const TargetRegisterInfo *TRI) const;
+
protected:
/// Target-dependent implementation for foldMemoryOperand.
/// Target-independent code in foldMemoryOperand will
void initializePostMachineSchedulerPass(PassRegistry&);
void initializePostOrderFunctionAttrsLegacyPassPass(PassRegistry&);
void initializePostRAHazardRecognizerPass(PassRegistry&);
+void initializePostRAMachineSinkingPass(PassRegistry&);
void initializePostRASchedulerPass(PassRegistry&);
void initializePreISelIntrinsicLoweringLegacyPassPass(PassRegistry&);
void initializePredicateInfoPrinterLegacyPassPass(PassRegistry&);
initializePeepholeOptimizerPass(Registry);
initializePostMachineSchedulerPass(Registry);
initializePostRAHazardRecognizerPass(Registry);
+ initializePostRAMachineSinkingPass(Registry);
initializePostRASchedulerPass(Registry);
initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
initializeProcessImplicitDefsPass(Registry);
STATISTIC(NumSunk, "Number of machine instructions sunk");
STATISTIC(NumSplit, "Number of critical edges split");
STATISTIC(NumCoalesces, "Number of copies coalesced");
+STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
namespace {
return true;
}
+
+//===----------------------------------------------------------------------===//
+// This pass is not intended to be a replacement or a complete alternative
+// for the pre-ra machine sink pass. It is only designed to sink COPY
+// instructions which should be handled after RA.
+//
+// This pass sinks COPY instructions into a successor block, if the COPY is not
+// used in the current block and the COPY is live-in to a single successor
+// (i.e., doesn't require the COPY to be duplicated). This avoids executing the
+// copy on paths where their results aren't needed. This also exposes
+// additional opportunites for dead copy elimination and shrink wrapping.
+//
+// These copies were either not handled by or are inserted after the MachineSink
+// pass. As an example of the former case, the MachineSink pass cannot sink
+// COPY instructions with allocatable source registers; for AArch64 these type
+// of copy instructions are frequently used to move function parameters (PhyReg)
+// into virtual registers in the entry block.
+//
+// For the machine IR below, this pass will sink %w19 in the entry into its
+// successor (%bb.1) because %w19 is only live-in in %bb.1.
+// %bb.0:
+// %wzr = SUBSWri %w1, 1
+// %w19 = COPY %w0
+// Bcc 11, %bb.2
+// %bb.1:
+// Live Ins: %w19
+// BL @fun
+// %w0 = ADDWrr %w0, %w19
+// RET %w0
+// %bb.2:
+// %w0 = COPY %wzr
+// RET %w0
+// As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be
+// able to see %bb.0 as a candidate.
+//===----------------------------------------------------------------------===//
+namespace {
+
+class PostRAMachineSinking : public MachineFunctionPass {
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ static char ID;
+ PostRAMachineSinking() : MachineFunctionPass(ID) {}
+ StringRef getPassName() const override { return "PostRA Machine Sink"; }
+
+private:
+ /// Track which registers have been modified and used.
+ BitVector ModifiedRegs, UsedRegs;
+
+ /// Sink Copy instructions unused in the same block close to their uses in
+ /// successors.
+ bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF,
+ const TargetRegisterInfo *TRI, const TargetInstrInfo *TII);
+};
+} // namespace
+
+char PostRAMachineSinking::ID = 0;
+char &llvm::PostRAMachineSinkingID = PostRAMachineSinking::ID;
+
+INITIALIZE_PASS(PostRAMachineSinking, "postra-machine-sink",
+ "PostRA Machine Sink", false, false)
+
+static MachineBasicBlock *
+getSingleLiveInSuccBB(MachineBasicBlock &CurBB,
+ ArrayRef<MachineBasicBlock *> SinkableBBs, unsigned Reg,
+ const TargetRegisterInfo *TRI) {
+ SmallSet<unsigned, 8> AliasedRegs;
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ AliasedRegs.insert(*AI);
+
+ // Try to find a single sinkable successor in which Reg is live-in.
+ MachineBasicBlock *BB = nullptr;
+ for (auto *SI : SinkableBBs) {
+ if (SI->isLiveIn(Reg)) {
+ // If BB is set here, Reg is live-in to at least two sinkable successors,
+ // so quit.
+ if (BB)
+ return nullptr;
+ BB = SI;
+ }
+ }
+ // Reg is not live-in to any sinkable successors.
+ if (!BB)
+ return nullptr;
+
+ // Check if any register aliased with Reg is live-in in other successors.
+ for (auto *SI : CurBB.successors()) {
+ if (SI == BB)
+ continue;
+ for (const auto LI : SI->liveins())
+ if (AliasedRegs.count(LI.PhysReg))
+ return nullptr;
+ }
+ return BB;
+}
+
+bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
+ MachineFunction &MF,
+ const TargetRegisterInfo *TRI,
+ const TargetInstrInfo *TII) {
+ SmallVector<MachineBasicBlock *, 2> SinkableBBs;
+ // FIXME: For now, we sink only to a successor which has a single predecessor
+ // so that we can directly sink COPY instructions to the successor without
+ // adding any new block or branch instruction.
+ for (MachineBasicBlock *SI : CurBB.successors())
+ if (!SI->livein_empty() && SI->pred_size() == 1)
+ SinkableBBs.push_back(SI);
+
+ if (SinkableBBs.empty())
+ return false;
+
+ bool Changed = false;
+
+ // Track which registers have been modified and used between the end of the
+ // block and the current instruction.
+ ModifiedRegs.reset();
+ UsedRegs.reset();
+
+ for (auto I = CurBB.rbegin(), E = CurBB.rend(); I != E;) {
+ MachineInstr *MI = &*I;
+ ++I;
+
+ // Do not move any instruction across function call.
+ if (MI->isCall())
+ return false;
+
+ if (!MI->isCopy() || !MI->getOperand(0).isRenamable()) {
+ TII->trackRegDefsUses(*MI, ModifiedRegs, UsedRegs, TRI);
+ continue;
+ }
+
+ unsigned DefReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ // Don't sink the COPY if it would violate a register dependency.
+ if (ModifiedRegs[DefReg] || ModifiedRegs[SrcReg] || UsedRegs[DefReg]) {
+ TII->trackRegDefsUses(*MI, ModifiedRegs, UsedRegs, TRI);
+ continue;
+ }
+
+ MachineBasicBlock *SuccBB =
+ getSingleLiveInSuccBB(CurBB, SinkableBBs, DefReg, TRI);
+ // Don't sink if we cannot find a single sinkable successor in which Reg
+ // is live-in.
+ if (!SuccBB) {
+ TII->trackRegDefsUses(*MI, ModifiedRegs, UsedRegs, TRI);
+ continue;
+ }
+ assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) &&
+ "Unexpected predecessor");
+
+ // Clear the kill flag if SrcReg is killed between MI and the end of the
+ // block.
+ if (UsedRegs[SrcReg]) {
+ MachineBasicBlock::iterator NI = std::next(MI->getIterator());
+ for (MachineInstr &UI : make_range(NI, CurBB.end())) {
+ if (UI.killsRegister(SrcReg, TRI)) {
+ UI.clearRegisterKills(SrcReg, TRI);
+ MI->getOperand(1).setIsKill(true);
+ break;
+ }
+ }
+ }
+
+ MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI();
+ SuccBB->splice(InsertPos, &CurBB, MI);
+ SuccBB->removeLiveIn(DefReg);
+ if (!SuccBB->isLiveIn(SrcReg))
+ SuccBB->addLiveIn(SrcReg);
+
+ Changed = true;
+ ++NumPostRACopySink;
+ }
+ return Changed;
+}
+
+bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ ModifiedRegs.resize(TRI->getNumRegs());
+ UsedRegs.resize(TRI->getNumRegs());
+
+ for (auto &BB : MF)
+ Changed |= tryToSinkCopy(BB, MF, TRI, TII);
+
+ return Changed;
+}
reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
}
+void TargetInstrInfo::trackRegDefsUses(const MachineInstr &MI,
+ BitVector &ModifiedRegs,
+ BitVector &UsedRegs,
+ const TargetRegisterInfo *TRI) const {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isRegMask())
+ ModifiedRegs.setBitsNotInMask(MO.getRegMask());
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef()) {
+ // Some architectures (e.g. AArch64 XZR/WZR) have registers that are
+ // constant and may be used as destinations to indicate the generated
+ // value is discarded. No need to track such case as a def.
+ if (!TRI->isConstantPhysReg(Reg))
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ ModifiedRegs.set(*AI);
+ } else {
+ assert(MO.isUse() && "Reg operand not a def and not a use");
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ UsedRegs.set(*AI);
+ }
+ }
+}
+
bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
const MachineInstr &MI, AliasAnalysis *AA) const {
const MachineFunction &MF = *MI.getMF();
cl::desc("Disable Machine LICM"));
static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden,
cl::desc("Disable Machine Sinking"));
+static cl::opt<bool> DisablePostRAMachineSink("disable-postra-machine-sink",
+ cl::Hidden,
+ cl::desc("Disable PostRA Machine Sinking"));
static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden,
cl::desc("Disable Loop Strength Reduction Pass"));
static cl::opt<bool> DisableConstantHoisting("disable-constant-hoisting",
if (StandardID == &MachineSinkingID)
return applyDisable(TargetID, DisableMachineSink);
+ if (StandardID == &PostRAMachineSinkingID)
+ return applyDisable(TargetID, DisablePostRAMachineSink);
+
if (StandardID == &MachineCopyPropagationID)
return applyDisable(TargetID, DisableCopyProp);
addPostRegAlloc();
// Insert prolog/epilog code. Eliminate abstract frame index references...
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOpt::None) {
+ addPass(&PostRAMachineSinkingID);
addPass(&ShrinkWrapID);
+ }
// Prolog/Epilog inserter needs a TargetMachine to instantiate. But only
// do so if it hasn't been disabled, substituted, or overridden.
return NextI;
}
-/// trackRegDefsUses - Remember what registers the specified instruction uses
-/// and modifies.
-static void trackRegDefsUses(const MachineInstr &MI, BitVector &ModifiedRegs,
- BitVector &UsedRegs,
- const TargetRegisterInfo *TRI) {
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isRegMask())
- ModifiedRegs.setBitsNotInMask(MO.getRegMask());
-
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
- if (!Reg)
- continue;
- if (MO.isDef()) {
- // WZR/XZR are not modified even when used as a destination register.
- if (Reg != AArch64::WZR && Reg != AArch64::XZR)
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- ModifiedRegs.set(*AI);
- } else {
- assert(MO.isUse() && "Reg operand not a def and not a use?!?");
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- UsedRegs.set(*AI);
- }
- }
-}
-
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
// Convert the byte-offset used by unscaled into an "element" offset used
// by the scaled pair load/store instructions.
return false;
// Update modified / uses register lists.
- trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+ TII->trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
// Otherwise, if the base register is modified, we have no match, so
// return early.
// If the unscaled offset isn't a multiple of the MemSize, we can't
// pair the operations together: bail and keep looking.
if (MIOffset % MemSize) {
- trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+ TII->trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
MemInsns.push_back(&MI);
continue;
}
// the stored value is the same (i.e., WZR).
if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||
(IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
- trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+ TII->trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
MemInsns.push_back(&MI);
continue;
}
// immediate offset of merging these instructions is out of range for
// a pairwise instruction, bail and keep looking.
if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
- trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+ TII->trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
MemInsns.push_back(&MI);
continue;
}
// can't express the offset of the unscaled input, bail and keep
// looking.
if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
- trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+ TII->trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
MemInsns.push_back(&MI);
continue;
}
// and keep looking. A load-pair instruction with both destination
// registers the same is UNPREDICTABLE and will result in an exception.
if (MayLoad && Reg == getLdStRegOp(MI).getReg()) {
- trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+ TII->trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
MemInsns.push_back(&MI);
continue;
}
return E;
// Update modified / uses register lists.
- trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+ TII->trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
// Otherwise, if the base register is modified, we have no match, so
// return early.
return MBBI;
// Update the status of what the instruction clobbered and used.
- trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+ TII->trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
// Otherwise, if the base register is used or modified, we have no match, so
// return early.
return MBBI;
// Update the status of what the instruction clobbered and used.
- trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+ TII->trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
// Otherwise, if the base register is used or modified, we have no match, so
// return early.
--- /dev/null
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=postra-machine-sink -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+# Sink w19 to %bb.1.
+# CHECK-LABEL: name: sinkcopy1
+# CHECK-LABEL: bb.0:
+# CHECK-NOT: $w19 = COPY killed $w0
+# CHECK-LABEL: bb.1:
+# CHECK: liveins: $w1, $w0
+# CHECK: renamable $w19 = COPY killed $w0
+
+name: sinkcopy1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv
+ renamable $w19 = COPY killed $w0
+ Bcc 11, %bb.1, implicit $nzcv
+ B %bb.2
+
+ bb.1:
+ liveins: $w1, $w19
+ $w0 = ADDWrr $w1, $w19
+ RET $x0
+
+ bb.2:
+ $w0 = COPY $wzr
+ RET $x0
+...
+
+---
+# Sink w19 to %bb.2.
+# CHECK-LABEL: name: sinkcopy2
+# CHECK-LABEL: bb.0:
+# CHECK-NOT: renamable $w19 = COPY killed $w0
+# CHECK-LABEL: bb.2:
+# CHECK: liveins: $w1, $w0
+# CHECK: renamable $w19 = COPY killed $w0
+name: sinkcopy2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv
+ renamable $w19 = COPY killed $w0
+ Bcc 11, %bb.2, implicit $nzcv
+ B %bb.1
+
+ bb.1:
+ $w0 = COPY $wzr
+ RET $x0
+
+ bb.2:
+ liveins: $w1, $w19
+ $w0 = ADDWrr $w1, $w19
+ RET $x0
+...
+
+---
+# Sink w19 and w20 to %bb.1.
+# CHECK-LABEL: name: sinkcopy3
+# CHECK-LABEL: bb.0:
+# CHECK-NOT: renamable $w19 = COPY killed $w0
+# CHECK-LABEL: bb.1:
+# CHECK: liveins: $w1, $w0
+# CHECK: renamable $w19 = COPY killed $w0
+# CHECK: renamable $w20 = COPY killed $w1
+name: sinkcopy3
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv
+ renamable $w19 = COPY killed $w0
+ renamable $w20 = COPY killed $w1
+
+ bb.1:
+ liveins: $w19, $w20
+ $w0 = COPY $w19
+ $w1 = COPY $w20
+ RET $x0
+...
+
+
+# Sink w19 to %bb.1 and w20 to %bb.2.
+# CHECK-LABEL: name: sinkcopy4
+# CHECK-LABEL: bb.0:
+# CHECK-NOT: renamable $w19 = COPY killed $w0
+# CHECK-NOT: renamable $w20 = COPY killed $w1
+# CHECK-LABEL: bb.1:
+# CHECK: liveins: $w1, $w0
+# CHECK: renamable $w19 = COPY killed $w0
+# CHECK-LABEL: bb.2:
+# CHECK: liveins: $w0, $w1
+# CHECK: renamable $w20 = COPY killed $w1
+name: sinkcopy4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv
+ renamable $w19 = COPY killed $w0
+ renamable $w20 = COPY killed $w1
+ Bcc 11, %bb.2, implicit $nzcv
+ B %bb.1
+
+ bb.1:
+ liveins: $w1, $w19
+ $w0 = ADDWrr $w1, $w19
+ RET $x0
+
+ bb.2:
+ liveins: $w0, $w20
+ $w0 = ADDWrr $w0, $w20
+ RET $x0
+...
+
+# Sink w19 to %bb.3 through %bb.2.
+# CHECK-LABEL: name: sinkcopy5
+# CHECK-LABEL: bb.0:
+# CHECK-NOT: renamable $w19 = COPY $w0
+# CHECK-LABEL: bb.2:
+# CHECK: $w1 = ADDWrr $w1, $w0
+# CHECK-LABEL: bb.3:
+# CHECK: liveins: $w1, $w0
+# CHECK: renamable $w19 = COPY killed $w0
+name: sinkcopy5
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv
+ renamable $w19 = COPY $w0
+ Bcc 11, %bb.2, implicit $nzcv
+
+ bb.1:
+ liveins: $x0
+ $w19 = COPY $wzr
+ RET $x0
+
+ bb.2:
+ liveins: $w0, $w1, $w19
+ $w1 = ADDWrr $w1, killed $w0
+
+ bb.3:
+ liveins: $w1, $w19
+ $w0 = ADDWrr $w1, $w19
+ RET $x0
+...
+
+# Sink w19 to %bb.3, but through %bb.2.
+# CHECK-LABEL: name: sinkcopy6
+# CHECK-LABEL: bb.0:
+# CHECK-NOT: renamable $w19 = COPY $w0
+# CHECK-NOT: renamable $w20 = COPY $w0
+# CHECK-LABEL: bb.2:
+# CHECK: liveins: $w1, $w0
+# CHECK: renamable $w19 = COPY $w0
+# CHECK: renamable $w20 = COPY $w19
+name: sinkcopy6
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv
+ renamable $w19 = COPY $w0
+ renamable $w20 = COPY $w19
+ Bcc 11, %bb.2, implicit $nzcv
+
+ bb.1:
+ $w0 = COPY $wzr
+ RET $x0
+
+ bb.2:
+ liveins: $w1, $w20
+ $w0 = ADDWrr killed $w1, $w20
+ RET $x0
+...
+
+---
+# Sink w19 regardless of the def of wzr in bb.0.
+# CHECK-LABEL: name: sinkcopy7
+# CHECK-LABEL: bb.0:
+# CHECK-NOT: renamable $w19 = COPY $w0
+# CHECK-LABEL: bb.2:
+# CHECK: renamable $w19 = COPY $wzr
+name: sinkcopy7
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ renamable $w19 = COPY $wzr
+ $wzr = SUBSWri $w1, 1, 0, implicit-def $nzcv
+ Bcc 11, %bb.2, implicit $nzcv
+ B %bb.1
+
+ bb.1:
+ $x0 = COPY $xzr
+ RET $x0
+
+ bb.2:
+ liveins: $w0, $w19
+ $w0 = ADDWrr $w0, $w19
+ RET $x0
+---
+
+# Don't sink w19 as w0 is defined in bb.0.
+# CHECK-LABEL: name: donotsinkcopy1
+# CHECK-LABEL: bb.0:
+# CHECK: renamable $w19 = COPY $w0
+# CHECK: $w0 = LDRWui $sp, 0
+name: donotsinkcopy1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv
+ renamable $w19 = COPY $w0
+ $w0 = LDRWui $sp, 0 :: (load 4)
+ Bcc 11, %bb.2, implicit $nzcv
+ B %bb.1
+
+ bb.1:
+ $x0 = COPY $xzr
+ RET $x0
+
+ bb.2:
+ liveins: $w0, $w19
+ $w0 = ADDWrr $w0, $w19
+ RET $x0
+...
+
+---
+# Don't sink w19 as w19 is used in bb.0.
+# CHECK-LABEL: name: donotsinkcopy2
+# CHECK-LABEL: bb.0:
+# CHECK: renamable $w19 = COPY $w0
+# CHECK: STRWui $w1, $x19, 0
+name: donotsinkcopy2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv
+ renamable $w19 = COPY $w0
+ STRWui $w1, $x19, 0 :: (store 4)
+ Bcc 11, %bb.2, implicit $nzcv
+ B %bb.1
+
+ bb.1:
+ $x0 = COPY $xzr
+ RET $x0
+
+ bb.2:
+ liveins: $w0, $w19
+ $w0 = ADDWrr $w0, $w19
+ RET $x0
+...
+
+---
+# Don't sink w19 as w19 is used in both %bb.1 and %bb.2.
+# CHECK-LABEL: name: donotsinkcopy3
+# CHECK-LABEL: bb.0:
+# CHECK: renamable $w19 = COPY $w0
+name: donotsinkcopy3
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv
+ renamable $w19 = COPY $w0
+ Bcc 11, %bb.2, implicit $nzcv
+ B %bb.1
+
+ bb.1:
+ liveins: $w19
+ $w0 = COPY $w19
+ RET $x0
+
+ bb.2:
+ liveins: $w0, $w19
+ $w0 = ADDWrr $w0, $w19
+ RET $x0
+...
+
+---
+# Don't sink w19 as %bb.2 has multiple predecessors.
+# CHECK-LABEL: name: donotsinkcopy4
+# CHECK-LABEL: bb.0:
+# CHECK: renamable $w19 = COPY $w0
+name: donotsinkcopy4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv
+ renamable $w19 = COPY $w0
+ Bcc 11, %bb.2, implicit $nzcv
+ B %bb.1
+
+ bb.1:
+ liveins: $w0
+ $w19 = COPY $w0
+ B %bb.2
+
+ bb.2:
+ liveins: $w0, $w19
+ $w0 = ADDWrr $w0, $w19
+ RET $x0
+...
+
+
+# Don't sink w19 after sinking w20.
+# CHECK-LABEL: name: donotsinkcopy5
+# CHECK-LABEL: bb.0:
+# CHECK: renamable $w19 = COPY $w0
+# CHECK-LABEL: bb.2:
+# CHECK: liveins: $w0, $w19
+# CHECK: renamable $w20 = COPY $w19
+name: donotsinkcopy5
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv
+ renamable $w19 = COPY $w0
+ renamable $w20 = COPY $w19
+ Bcc 11, %bb.2, implicit $nzcv
+
+ bb.1:
+ liveins: $w19
+ $w0 = COPY $w19
+ RET $x0
+
+ bb.2:
+ liveins: $w0, $w20
+ $w0 = ADDWrr killed $w0, $w20
+ RET $x0
+...
+
+---
+# Don't sink w19 as x19 is live-in in %bb.2.
+# CHECK-LABEL: name: donotsinkcopy6
+# CHECK-LABEL: bb.0:
+name: donotsinkcopy6
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $w1
+ $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv
+ renamable $x19 = COPY $x0
+ Bcc 11, %bb.2, implicit $nzcv
+ B %bb.1
+
+ bb.1:
+ liveins: $w19
+ $w0 = COPY $w19
+ RET $x0
+
+ bb.2:
+ liveins: $x0, $x19
+ $x0 = ADDXrr $x0, $x19
+ RET $x0
+...
--- /dev/null
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
+
+; CHECK-LABEL: %bb.0:
+; CHECK-NOT: stp
+; CHECK-NOT: mov w{{[0-9]+}}, w0
+; CHECK-LABEL: %bb.1:
+; CHECK: stp x19
+; CHECK: mov w{{[0-9]+}}, w0
+
+define i32 @shrinkwrapme(i32 %paramAcrossCall, i32 %paramNotAcrossCall) {
+entry:
+ %cmp5 = icmp sgt i32 %paramNotAcrossCall, 0
+ br i1 %cmp5, label %CallBB, label %Exit
+CallBB:
+ %call = call i32 @fun()
+ %add = add i32 %call, %paramAcrossCall
+ ret i32 %add
+Exit:
+ ret i32 0
+}
+
+declare i32 @fun()
; RUN: llc -march=hexagon < %s | FileCheck %s
+;
+; XFAIL: *
+; This test is failing after post-ra machine sinking.
+;
; Check that no epilogue is inserted after a noreturn call.
;
; CHECK-LABEL: f1:
; RUN: llc -march=hexagon -enable-pipeliner -enable-bsb-sched=0 -join-liveintervals=false < %s | FileCheck %s
+; XFAIL: *
+; This test is failing after post-ra machine sinking.
+
; Test that we generate the correct Phi values when there is a Phi that
; references another Phi. We need to examine the other Phi to get the
; correct value. We need to do this even if we haven't generated the
}
; CHECK-LABEL: diamond1:
-; CHECK: ite eq
+; CHECK: itee eq
; CHECK: ldreq
; CHECK: strne
define i32 @diamond1(i32 %n, i32* %p) {
; CHECK-NOBP: ldreq
; CHECK-NOBP: strne
; CHECK-NOBP: strne
-define i32 @diamond2(i32 %n, i32 %m, i32* %p, i32* %q) {
+define i32 @diamond2(i32 %n, i32* %p, i32* %q) {
entry:
%tobool = icmp eq i32 %n, 0
br i1 %tobool, label %if.else, label %if.then
br label %if.end
if.else:
- store i32 %m, i32* %q, align 4
+ store i32 %n, i32* %q, align 4
%0 = load i32, i32* %p, align 4
br label %if.end
; CHECK-NOT: # %for.body
; CHECK: .loc 1 6 3
; CHECK-NEXT: je [[BB:.LBB[^ ]+]]
-; CHECK: [[BB]]:{{.}}# %for.end
+; CHECK: [[BB]]:
+; CHECK: xorl %ebp, %ebp
+; CHECK-NEXT: .LBB{{.*}} # %for.end
target triple = "x86_64-unknown-linux-gnu"
;
; X64-NOBMI-LABEL: mul1:
; X64-NOBMI: # %bb.0: # %entry
-; X64-NOBMI-NEXT: movq %rcx, %r8
-; X64-NOBMI-NEXT: movq %rdx, %r9
; X64-NOBMI-NEXT: testq %rdi, %rdi
; X64-NOBMI-NEXT: je .LBB1_3
; X64-NOBMI-NEXT: # %bb.1: # %for.body.preheader
+; X64-NOBMI-NEXT: movq %rcx, %r8
+; X64-NOBMI-NEXT: movq %rdx, %r9
; X64-NOBMI-NEXT: xorl %r10d, %r10d
; X64-NOBMI-NEXT: xorl %ecx, %ecx
; X64-NOBMI-NEXT: .p2align 4, 0x90
;
; X64-BMI-LABEL: mul1:
; X64-BMI: # %bb.0: # %entry
-; X64-BMI-NEXT: movq %rcx, %r8
-; X64-BMI-NEXT: movq %rdx, %r9
; X64-BMI-NEXT: testq %rdi, %rdi
; X64-BMI-NEXT: je .LBB1_3
; X64-BMI-NEXT: # %bb.1: # %for.body.preheader
+; X64-BMI-NEXT: movq %rcx, %r8
+; X64-BMI-NEXT: movq %rdx, %r9
; X64-BMI-NEXT: xorl %r10d, %r10d
; X64-BMI-NEXT: xorl %eax, %eax
; X64-BMI-NEXT: .p2align 4, 0x90
define i32 @t1(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: t1:
; CHECK: ## %bb.0: ## %entry
-; CHECK-NEXT: movl %esi, %edx
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: testl %esi, %esi
; CHECK-NEXT: je LBB0_1
+; CHECK-NEXT: ## %bb.2: ## %while.body.preheader
+; CHECK-NEXT: movl %esi, %edx
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: LBB0_2: ## %while.body
+; CHECK-NEXT: LBB0_3: ## %while.body
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movl %edx, %ecx
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl %ecx
; CHECK-NEXT: testl %edx, %edx
; CHECK-NEXT: movl %ecx, %eax
-; CHECK-NEXT: jne LBB0_2
-; CHECK-NEXT: ## %bb.3: ## %while.end
+; CHECK-NEXT: jne LBB0_3
+; CHECK-NEXT: ## %bb.4: ## %while.end
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: retq
; CHECK-NEXT: LBB0_1:
define i32 @t3(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: t3:
; CHECK: ## %bb.0: ## %entry
-; CHECK-NEXT: movq %rsi, %rdx
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: testq %rsi, %rsi
; CHECK-NEXT: je LBB2_1
+; CHECK-NEXT: ## %bb.2: ## %while.body.preheader
+; CHECK-NEXT: movq %rsi, %rdx
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: LBB2_2: ## %while.body
+; CHECK-NEXT: LBB2_3: ## %while.body
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movq %rdx, %rcx
; CHECK-NEXT: cqto
; CHECK-NEXT: idivq %rcx
; CHECK-NEXT: testq %rdx, %rdx
; CHECK-NEXT: movq %rcx, %rax
-; CHECK-NEXT: jne LBB2_2
-; CHECK-NEXT: ## %bb.3: ## %while.end
+; CHECK-NEXT: jne LBB2_3
+; CHECK-NEXT: ## %bb.4: ## %while.end
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: retq
; CHECK-NEXT: LBB2_1:
define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) {
; CHECK-LABEL: test_int_div:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movl %edx, %r9d
; CHECK-NEXT: testl %edx, %edx
; CHECK-NEXT: jle .LBB12_3
; CHECK-NEXT: # %bb.1: # %bb.nph
+; CHECK-NEXT: movl %edx, %r9d
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB12_2: # %for.body
; with the Orders insertion point vector.
; CHECK-LABEL: f: # @f
-; CHECK: .LBB0_1: # %while.body
+; CHECK: .LBB0_2: # %while.body
; CHECK: movl $32, %ecx
; CHECK: testl {{.*}}
-; CHECK: jne .LBB0_3
-; CHECK: # %bb.2: # %if.then
+; CHECK: jne .LBB0_4
+; CHECK: # %bb.3: # %if.then
; CHECK: callq if_then
; CHECK: movl %eax, %ecx
-; CHECK: .LBB0_3: # %if.end
+; CHECK: .LBB0_4: # %if.end
; Check that this DEBUG_VALUE comes before the left shift.
; CHECK: #DEBUG_VALUE: bit_offset <- $ecx
; CHECK: .cv_loc 0 1 8 28 # t.c:8:28