return true;
}
+/// Return true if a target defined block prologue instruction interferes
+/// with a sink candidate.
+static bool blockPrologueInterferes(MachineBasicBlock *BB,
+ MachineBasicBlock::iterator End,
+ MachineInstr &MI,
+ const TargetRegisterInfo *TRI,
+ const TargetInstrInfo *TII,
+ const MachineRegisterInfo *MRI) {
+ if (BB->begin() == End)
+ return false; // no prologue
+ for (MachineBasicBlock::iterator PI = BB->getFirstNonPHI(); PI != End; ++PI) {
+ // Only check target defined prologue instructions
+ if (!TII->isBasicBlockPrologue(*PI))
+ continue;
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isUse()) {
+ if (Register::isPhysicalRegister(Reg) &&
+ (TII->isIgnorableUse(MO) || (MRI && MRI->isConstantPhysReg(Reg))))
+ continue;
+ if (PI->modifiesRegister(Reg, TRI))
+ return true;
+ } else {
+ if (PI->readsRegister(Reg, TRI))
+ return true;
+ // Check for interference with non-dead defs
+ auto *DefOp = PI->findRegisterDefOperand(Reg, false, true, TRI);
+ if (DefOp && !DefOp->isDead())
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
/// SinkInstruction - Determine whether it is safe to sink the specified machine
/// instruction out of its current block into a successor.
bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
// Determine where to insert into. Skip phi nodes.
MachineBasicBlock::iterator InsertPos =
SuccToSinkTo->SkipPHIsAndLabels(SuccToSinkTo->begin());
+ if (blockPrologueInterferes(SuccToSinkTo, InsertPos, MI, TRI, TII, MRI)) {
+ LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n");
+ return false;
+ }
// Collect debug users of any vreg that this inst defines.
SmallVector<MIRegs, 4> DbgUsersToSink;
}
auto DbgValsToSink = DbgValsToSinkMap.takeVector();
+ LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccBB);
+
+ MachineBasicBlock::iterator InsertPos =
+ SuccBB->SkipPHIsAndLabels(SuccBB->begin());
+ if (blockPrologueInterferes(SuccBB, InsertPos, MI, TRI, TII, nullptr)) {
+ LLVM_DEBUG(
+ dbgs() << " *** Not sinking: prologue interference\n");
+ continue;
+ }
+
// Clear the kill flag if SrcReg is killed between MI and the end of the
// block.
clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
- MachineBasicBlock::iterator InsertPos =
- SuccBB->SkipPHIsAndLabels(SuccBB->begin());
performSink(MI, *SuccBB, InsertPos, DbgValsToSink);
updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
# past block prologues which would overwrite their uses.
---
+# Make sure COPY to $sgpr9 is not sunk after S_AND_SAVEEXEC_B64.
name: _amdgpu_ps_main
alignment: 1
tracksRegLiveness: true
; GFX10-NEXT: successors: %bb.1(0x80000000)
; GFX10-NEXT: liveins: $sgpr4
; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: renamable $sgpr9 = COPY $sgpr4
; GFX10-NEXT: renamable $vgpr5 = IMPLICIT_DEF
; GFX10-NEXT: renamable $sgpr0_sgpr1 = nofpexcept V_CMP_NGT_F32_e64 0, 0, 0, $vgpr5, 0, implicit $mode, implicit $exec
; GFX10-NEXT: S_BRANCH %bb.1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
- ; GFX10-NEXT: liveins: $sgpr4:0x0000000000000003, $sgpr6, $sgpr0_sgpr1
+ ; GFX10-NEXT: liveins: $sgpr6, $sgpr9, $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX10-NEXT: renamable $sgpr9 = COPY $sgpr4
; GFX10-NEXT: renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; GFX10-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
; GFX10-NEXT: S_BRANCH %bb.2
S_ENDPGM 0
...
+---
+# Make sure COPY to $sgpr0_sgpr1 is not sunk after S_AND_SAVEEXEC_B64.
+name: _amdgpu_ps_main2
+alignment: 1
+tracksRegLiveness: true
+registers: []
+liveins:
+ - { reg: '$sgpr4', virtual-reg: '' }
+ - { reg: '$sgpr6_sgpr7', virtual-reg: '' }
+body: |
+ ; GFX10-LABEL: name: _amdgpu_ps_main2
+ ; GFX10: bb.0:
+ ; GFX10-NEXT: successors: %bb.1(0x80000000)
+ ; GFX10-NEXT: liveins: $sgpr4, $sgpr6_sgpr7
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: renamable $sgpr9 = COPY $sgpr4
+ ; GFX10-NEXT: renamable $vgpr5 = IMPLICIT_DEF
+ ; GFX10-NEXT: renamable $sgpr0_sgpr1 = COPY $sgpr6_sgpr7
+ ; GFX10-NEXT: S_BRANCH %bb.1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.1:
+ ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; GFX10-NEXT: liveins: $sgpr6, $sgpr9, $sgpr0_sgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
+ ; GFX10-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
+ ; GFX10-NEXT: S_BRANCH %bb.2
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.2:
+ ; GFX10-NEXT: successors: %bb.3(0x80000000)
+ ; GFX10-NEXT: liveins: $sgpr6
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $m0 = COPY killed renamable $sgpr6
+ ; GFX10-NEXT: S_BRANCH %bb.3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.3:
+ ; GFX10-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x80000000)
+ liveins: $sgpr4, $sgpr6_sgpr7
+
+ renamable $sgpr9 = COPY $sgpr4
+ renamable $vgpr5 = IMPLICIT_DEF
+ renamable $sgpr0_sgpr1 = COPY $sgpr6_sgpr7
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x40000000), %bb.8(0x40000000)
+ liveins: $sgpr6, $sgpr9, $sgpr0_sgpr1
+
+ $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec
+ renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
+ S_CBRANCH_EXECZ %bb.8, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ successors: %bb.8(0x40000000)
+ liveins: $sgpr6
+
+ $m0 = COPY killed renamable $sgpr6
+ S_BRANCH %bb.8
+
+ bb.8:
+
+ S_ENDPGM 0
+
+...
+---
+# Make sure COPY to $sgpr2_sgpr3 is not sunk after S_AND_SAVEEXEC_B32.
+name: _amdgpu_ps_main3
+alignment: 1
+tracksRegLiveness: true
+registers: []
+liveins:
+ - { reg: '$sgpr6_sgpr7', virtual-reg: '' }
+ - { reg: '$sgpr8', virtual-reg: '' }
+body: |
+ ; GFX10-LABEL: name: _amdgpu_ps_main3
+ ; GFX10: bb.0:
+ ; GFX10-NEXT: successors: %bb.1(0x80000000)
+ ; GFX10-NEXT: liveins: $sgpr6_sgpr7, $sgpr8
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: renamable $vgpr5 = IMPLICIT_DEF
+ ; GFX10-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
+ ; GFX10-NEXT: renamable $sgpr2_sgpr3 = COPY $sgpr6_sgpr7
+ ; GFX10-NEXT: S_BRANCH %bb.1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.1:
+ ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; GFX10-NEXT: liveins: $sgpr6, $sgpr8, $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $sgpr2 = S_AND_SAVEEXEC_B32 $sgpr8, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
+ ; GFX10-NEXT: S_NOP 0, implicit $sgpr2_sgpr3
+ ; GFX10-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
+ ; GFX10-NEXT: S_BRANCH %bb.2
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.2:
+ ; GFX10-NEXT: successors: %bb.3(0x80000000)
+ ; GFX10-NEXT: liveins: $sgpr6
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $m0 = COPY killed renamable $sgpr6
+ ; GFX10-NEXT: S_BRANCH %bb.3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.3:
+ ; GFX10-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x80000000)
+ liveins: $sgpr6_sgpr7, $sgpr8
+
+ renamable $vgpr5 = IMPLICIT_DEF
+ renamable $sgpr0_sgpr1 = IMPLICIT_DEF
+ renamable $sgpr2_sgpr3 = COPY $sgpr6_sgpr7
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x40000000), %bb.8(0x40000000)
+ liveins: $sgpr6, $sgpr8, $sgpr0_sgpr1, $sgpr2_sgpr3
+
+ $sgpr2 = S_AND_SAVEEXEC_B32 $sgpr8, implicit-def $exec, implicit-def $scc, implicit $exec
+ $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec
+ renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
+ S_NOP 0, implicit $sgpr2_sgpr3
+ S_CBRANCH_EXECZ %bb.8, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ successors: %bb.8(0x40000000)
+ liveins: $sgpr6
+
+ $m0 = COPY killed renamable $sgpr6
+ S_BRANCH %bb.8
+
+ bb.8:
+
+ S_ENDPGM 0
+
+...