SmallSet<Register, 16> Defs;
+ void collectUsedRegUnits(const MachineInstr &MI,
+ BitVector &UsedRegUnits) const;
+
bool isBundleCandidate(const MachineInstr &MI) const;
bool isDependentLoad(const MachineInstr &MI) const;
bool canBundle(const MachineInstr &MI, const MachineInstr &NextMI) const;
return false;
}
+void SIPostRABundler::collectUsedRegUnits(const MachineInstr &MI,
+ BitVector &UsedRegUnits) const {
+ for (const MachineOperand &Op : MI.operands()) {
+ if (!Op.isReg() || !Op.readsReg())
+ continue;
+
+ Register Reg = Op.getReg();
+ assert(!Op.getSubReg() &&
+ "subregister indexes should not be present after RA");
+
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ UsedRegUnits.set(*Units);
+ }
+}
+
bool SIPostRABundler::isBundleCandidate(const MachineInstr &MI) const {
const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags;
return IMemFlags != 0 && MI.mayLoadOrStore() && !MI.isBundled();
return false;
TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
+ BitVector BundleUsedRegUnits(TRI->getNumRegUnits());
+ BitVector KillUsedRegUnits(TRI->getNumRegUnits());
+
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock::instr_iterator Next;
Next = std::next(BundleEnd);
if (ClauseLength > 1) {
Changed = true;
+
+ // Before register allocation, kills are inserted after potential soft
+ // clauses to hint register allocation. Look for kills that look like
+ // this, and erase them.
+ if (Next != E && Next->isKill()) {
+ MachineInstr &Kill = *Next;
+
+ // TODO: Should maybe back-propagate kill flags to the bundle.
+ for (const MachineInstr &BundleMI : make_range(BundleStart, Next))
+ collectUsedRegUnits(BundleMI, BundleUsedRegUnits);
+ collectUsedRegUnits(Kill, KillUsedRegUnits);
+
+ BundleUsedRegUnits.flip();
+ KillUsedRegUnits &= BundleUsedRegUnits;
+
+ // Erase the kill if it's a subset of the used registers.
+ //
+ // TODO: Should we just remove all kills? Is there any real reason to
+ // keep them after RA?
+ if (KillUsedRegUnits.none()) {
+ ++Next;
+ Kill.eraseFromParent();
+ }
+
+ BundleUsedRegUnits.reset();
+ KillUsedRegUnits.reset();
+ }
+
finalizeBundle(MBB, BundleStart, Next);
}
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
...
+
+# Before register allocation, KILL hints are inserted after potential soft
+# clauses to hint the register allocator to not clobber the input
+# registers. Kills that look like this should be erased.
+---
+name: post_bundle_kill
+body: |
+ bb.0:
+ liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
+
+ ; GCN-LABEL: name: post_bundle_kill
+ ; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+ ; GCN: }
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+ KILL killed $vgpr3_vgpr4, killed $vgpr5_vgpr6
+...
+
+# Kill some other register not used in the bundle, should not be touched.
+---
+name: post_bundle_kill_other
+body: |
+ bb.0:
+ liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
+ ; GCN-LABEL: name: post_bundle_kill_other
+ ; GCN: $vgpr7 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+ ; GCN: }
+ ; GCN: KILL killed $vgpr7
+ $vgpr7 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+ KILL killed $vgpr7
+...
+
+# Kill some other register not used in the bundle, but also some
+# from the bundle.
+---
+name: post_bundle_kill_plus_other
+body: |
+ bb.0:
+ liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
+ ; GCN-LABEL: name: post_bundle_kill_plus_other
+ ; GCN: $vgpr7 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+ ; GCN: }
+ ; GCN: KILL killed $vgpr7, killed $vgpr3
+ $vgpr7 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+ KILL killed $vgpr7, killed $vgpr3
+...