TRI(TII.getRegisterInfo()),
ClauseUses(TRI.getNumRegUnits()),
ClauseDefs(TRI.getNumRegUnits()) {
- MaxLookAhead = 5;
+ MaxLookAhead = MF.getRegInfo().isPhysRegUsed(AMDGPU::AGPR0) ? 18 : 5;
+ TSchedModel.init(&ST);
}
void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
checkReadM0Hazards(MI) > 0)
return NoopHazard;
+ if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0)
+ return NoopHazard;
+
+ if ((MI->mayLoad() || MI->mayStore()) && checkMAILdStHazards(MI) > 0)
+ return NoopHazard;
+
if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
return NoopHazard;
if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
return std::max(WaitStates, checkReadM0Hazards(MI));
+ if (SIInstrInfo::isMAI(*MI))
+ return std::max(WaitStates, checkMAIHazards(MI));
+
+ if (MI->mayLoad() || MI->mayStore())
+ return std::max(WaitStates, checkMAILdStHazards(MI));
+
return WaitStates;
}
return FPAtomicToDenormModeWaitStates -
::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn);
}
+
+int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
+ assert(SIInstrInfo::isMAI(*MI));
+
+ int WaitStatesNeeded = 0;
+ unsigned Opc = MI->getOpcode();
+
+ auto IsVALUFn = [] (MachineInstr *MI) {
+ return SIInstrInfo::isVALU(*MI);
+ };
+
+ if (Opc != AMDGPU::V_ACCVGPR_READ_B32) { // MFMA or v_accvgpr_write
+ const int LegacyVALUWritesVGPRWaitStates = 2;
+ const int VALUWritesExecWaitStates = 4;
+ const int MaxWaitStates = 4;
+
+ int WaitStatesNeededForUse = VALUWritesExecWaitStates -
+ getWaitStatesSinceDef(AMDGPU::EXEC, IsVALUFn, MaxWaitStates);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+
+ if (WaitStatesNeeded < MaxWaitStates) {
+ for (const MachineOperand &Use : MI->explicit_uses()) {
+ const int MaxWaitStates = 2;
+
+ if (!Use.isReg() || !TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
+ continue;
+
+ int WaitStatesNeededForUse = LegacyVALUWritesVGPRWaitStates -
+ getWaitStatesSinceDef(Use.getReg(), IsVALUFn, MaxWaitStates);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+
+ if (WaitStatesNeeded == MaxWaitStates)
+ break;
+ }
+ }
+ }
+
+ auto IsMFMAFn = [] (MachineInstr *MI) {
+ return SIInstrInfo::isMAI(*MI) &&
+ MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32 &&
+ MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32;
+ };
+
+ for (const MachineOperand &Op : MI->explicit_operands()) {
+ if (!Op.isReg() || !TRI.isAGPR(MF.getRegInfo(), Op.getReg()))
+ continue;
+
+ if (Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32)
+ continue;
+
+ const int MFMAWritesAGPROverlappedSrcABWaitStates = 4;
+ const int MFMAWritesAGPROverlappedSrcCWaitStates = 2;
+ const int MFMA4x4WritesAGPRAccVgprReadWaitStates = 4;
+ const int MFMA16x16WritesAGPRAccVgprReadWaitStates = 10;
+ const int MFMA32x32WritesAGPRAccVgprReadWaitStates = 18;
+ const int MFMA4x4WritesAGPRAccVgprWriteWaitStates = 1;
+ const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7;
+ const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15;
+ const int MaxWaitStates = 18;
+ unsigned Reg = Op.getReg();
+ unsigned HazardDefLatency = 0;
+
+ auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency, this]
+ (MachineInstr *MI) {
+ if (!IsMFMAFn(MI))
+ return false;
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (DstReg == Reg)
+ return false;
+ HazardDefLatency = std::max(HazardDefLatency,
+ TSchedModel.computeInstrLatency(MI));
+ return TRI.regsOverlap(DstReg, Reg);
+ };
+
+ int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn,
+ MaxWaitStates);
+ int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates;
+ int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
+ int OpNo = MI->getOperandNo(&Op);
+ if (OpNo == SrcCIdx) {
+ NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates;
+ } else if (Opc == AMDGPU::V_ACCVGPR_READ_B32) {
+ switch (HazardDefLatency) {
+ case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates;
+ break;
+ case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprReadWaitStates;
+ break;
+ case 16: LLVM_FALLTHROUGH;
+ default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprReadWaitStates;
+ break;
+ }
+ } else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
+ switch (HazardDefLatency) {
+ case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprWriteWaitStates;
+ break;
+ case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprWriteWaitStates;
+ break;
+ case 16: LLVM_FALLTHROUGH;
+ default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprWriteWaitStates;
+ break;
+ }
+ }
+
+ int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+
+ if (WaitStatesNeeded == MaxWaitStates)
+ return WaitStatesNeeded; // Early exit.
+
+ auto IsAccVgprWriteFn = [Reg, this] (MachineInstr *MI) {
+ if (MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32)
+ return false;
+ unsigned DstReg = MI->getOperand(0).getReg();
+ return TRI.regsOverlap(Reg, DstReg);
+ };
+
+ const int AccVGPRWriteMFMAReadSrcCWaitStates = 1;
+ const int AccVGPRWriteMFMAReadSrcABWaitStates = 3;
+ const int AccVGPRWriteAccVgprReadWaitStates = 3;
+ NeedWaitStates = AccVGPRWriteMFMAReadSrcABWaitStates;
+ if (OpNo == SrcCIdx)
+ NeedWaitStates = AccVGPRWriteMFMAReadSrcCWaitStates;
+ else if (Opc == AMDGPU::V_ACCVGPR_READ_B32)
+ NeedWaitStates = AccVGPRWriteAccVgprReadWaitStates;
+
+ WaitStatesNeededForUse = NeedWaitStates -
+ getWaitStatesSinceDef(Reg, IsAccVgprWriteFn, MaxWaitStates);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+
+ if (WaitStatesNeeded == MaxWaitStates)
+ return WaitStatesNeeded; // Early exit.
+ }
+
+ if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
+ const int MFMA4x4ReadSrcCAccVgprWriteWaitStates = 0;
+ const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5;
+ const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13;
+ const int MaxWaitStates = 13;
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned HazardDefLatency = 0;
+
+ auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency, this]
+ (MachineInstr *MI) {
+ if (!IsMFMAFn(MI))
+ return false;
+ unsigned Reg = TII.getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg();
+ HazardDefLatency = std::max(HazardDefLatency,
+ TSchedModel.computeInstrLatency(MI));
+ return TRI.regsOverlap(Reg, DstReg);
+ };
+
+ int WaitStatesSince = getWaitStatesSince(IsSrcCMFMAFn, MaxWaitStates);
+ int NeedWaitStates;
+ switch (HazardDefLatency) {
+ case 2: NeedWaitStates = MFMA4x4ReadSrcCAccVgprWriteWaitStates;
+ break;
+ case 8: NeedWaitStates = MFMA16x16ReadSrcCAccVgprWriteWaitStates;
+ break;
+ case 16: LLVM_FALLTHROUGH;
+ default: NeedWaitStates = MFMA32x32ReadSrcCAccVgprWriteWaitStates;
+ break;
+ }
+
+ int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSince;
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+ }
+
+ return WaitStatesNeeded;
+}
+
+int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
+ if (!ST.hasMAIInsts())
+ return 0;
+
+ int WaitStatesNeeded = 0;
+
+ auto IsAccVgprReadFn = [] (MachineInstr *MI) {
+ return MI->getOpcode() == AMDGPU::V_ACCVGPR_READ_B32;
+ };
+
+ for (const MachineOperand &Op : MI->explicit_uses()) {
+ if (!Op.isReg() || !TRI.isVGPR(MF.getRegInfo(), Op.getReg()))
+ continue;
+
+ unsigned Reg = Op.getReg();
+
+ const int AccVgprReadLdStWaitStates = 2;
+ const int VALUWriteAccVgprReadLdStDepVALUWaitStates = 1;
+ const int MaxWaitStates = 2;
+
+ int WaitStatesNeededForUse = AccVgprReadLdStWaitStates -
+ getWaitStatesSinceDef(Reg, IsAccVgprReadFn, MaxWaitStates);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+
+ if (WaitStatesNeeded == MaxWaitStates)
+ return WaitStatesNeeded; // Early exit.
+
+ auto IsVALUAccVgprReadCheckFn = [Reg, this] (MachineInstr *MI) {
+ if (MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32)
+ return false;
+ auto IsVALUFn = [] (MachineInstr *MI) {
+ return SIInstrInfo::isVALU(*MI) && !SIInstrInfo::isMAI(*MI);
+ };
+ return getWaitStatesSinceDef(Reg, IsVALUFn, 2 /*MaxWaitStates*/) <
+ std::numeric_limits<int>::max();
+ };
+
+ WaitStatesNeededForUse = VALUWriteAccVgprReadLdStDepVALUWaitStates -
+ getWaitStatesSince(IsVALUAccVgprReadCheckFn, MaxWaitStates);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+ }
+
+ return WaitStatesNeeded;
+}
--- /dev/null
+# RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s
+
+# GCN-LABEL: name: valu_write_vgpr_mfma_read
+# GCN: V_MOV_B32
+# GCN: V_MOV_B32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_MFMA
+name: valu_write_vgpr_mfma_read
+body: |
+ bb.0:
+ $vgpr0 = V_MOV_B32_e32 1, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 1, implicit $exec
+ $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
+...
+---
+
+# GCN-LABEL: name: valu_write_vgpr_accvgpr_write_read
+# GCN: V_MOV_B32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_ACCVGPR_WRITE_B32
+name: valu_write_vgpr_accvgpr_write_read
+body: |
+ bb.0:
+ $vgpr0 = V_MOV_B32_e32 1, implicit $exec
+ $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+...
+---
+
+# GCN-LABEL: name: mfma_write_agpr_mfma_read_same_agpr
+# GCN: V_MFMA
+# GCN-NEXT: V_MFMA
+name: mfma_write_agpr_mfma_read_same_agpr
+body: |
+ bb.0:
+ $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
+ $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
+...
+---
+
+# GCN-LABEL: name: mfma_write_agpr_mfma_read_overlap
+# GCN: V_MFMA
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_MFMA
+name: mfma_write_agpr_mfma_read_overlap
+body: |
+ bb.0:
+ $agpr1_agpr2_agpr3_agpr4 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
+ $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
+...
+---
+
+# GCN-LABEL: name: mfma_write_agpr_mfma_read_partial
+# GCN: V_MFMA
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_MFMA
+name: mfma_write_agpr_mfma_read_partial
+body: |
+ bb.0:
+ $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec
+ $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
+...
+---
+
+# GCN-LABEL: name: mfma_write_agpr_mfma_srca_read_overlap
+# GCN: V_MFMA
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_MFMA
+name: mfma_write_agpr_mfma_srca_read_overlap
+body: |
+ bb.0:
+ $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
+ $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $agpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
+...
+---
+
+# GCN-LABEL: name: mfma_write_agpr_mfma_srcb_read_overlap
+# GCN: V_MFMA
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_MFMA
+name: mfma_write_agpr_mfma_srcb_read_overlap
+body: |
+ bb.0:
+ $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
+ $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $agpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
+...
+---
+
+# GCN-LABEL: name: mfma_4x4_write_agpr_accvgpr_read
+# GCN: V_MFMA_F32_4X4X1F32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_ACCVGPR_READ_B32
+name: mfma_4x4_write_agpr_accvgpr_read
+body: |
+ bb.0:
+ $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
+ $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
+...
+---
+
+# GCN-LABEL: name: mfma_16x16_write_agpr_accvgpr_read
+# GCN: V_MFMA_F32_16X16X1F32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_ACCVGPR_READ_B32
+name: mfma_16x16_write_agpr_accvgpr_read
+body: |
+ bb.0:
+ $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec
+ $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
+...
+---
+
+# GCN-LABEL: name: mfma_32x32_write_agpr_accvgpr_read
+# GCN: V_MFMA_F32_32X32X2F32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_ACCVGPR_READ_B32
+name: mfma_32x32_write_agpr_accvgpr_read
+body: |
+ bb.0:
+ $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec
+ $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
+...
+---
+
+# GCN-LABEL: name: mfma_4x4_write_agpr_accvgpr_write
+# GCN: V_MFMA_F32_4X4X1F32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_ACCVGPR_WRITE_B32
+name: mfma_4x4_write_agpr_accvgpr_write
+body: |
+ bb.0:
+ $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
+ $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+...
+---
+
+# GCN-LABEL: name: mfma_16x16_write_agpr_accvgpr_write
+# GCN: V_MFMA_F32_16X16X1F32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_ACCVGPR_WRITE_B32
+name: mfma_16x16_write_agpr_accvgpr_write
+body: |
+ bb.0:
+ $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec
+ $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+...
+---
+
+# GCN-LABEL: name: mfma_32x32_write_agpr_accvgpr_write
+# GCN: V_MFMA_F32_32X32X2F32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_ACCVGPR_WRITE_B32
+name: mfma_32x32_write_agpr_accvgpr_write
+body: |
+ bb.0:
+ $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec
+ $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+...
+---
+
+# GCN-LABEL: name: mfma_4x4_read_srcc_accvgpr_write
+# GCN: V_MFMA_F32_4X4X1F32
+# GCN-NEXT: V_ACCVGPR_WRITE_B32
+name: mfma_4x4_read_srcc_accvgpr_write
+body: |
+ bb.0:
+ $agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
+ $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
+...
+---
+
+# GCN-LABEL: name: mfma_16x16_read_srcc_accvgpr_write
+# GCN: V_MFMA_F32_16X16X1F32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_ACCVGPR_WRITE_B32
+name: mfma_16x16_read_srcc_accvgpr_write
+body: |
+ bb.0:
+ $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_16X16X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec
+ $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
+...
+---
+
+# GCN-LABEL: name: mfma_32x32_read_srcc_accvgpr_write
+# GCN: V_MFMA_F32_32X32X2F32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_ACCVGPR_WRITE_B32
+name: mfma_32x32_read_srcc_accvgpr_write
+body: |
+ bb.0:
+ $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X2F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec
+ $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
+...
+---
+
+# GCN-LABEL: name: accvgpr_read_write_vgpr_valu_read
+# GCN: V_ACCVGPR_READ_B32
+# GCN-NEXT: V_ADD_F32
+name: accvgpr_read_write_vgpr_valu_read
+body: |
+ bb.0:
+ $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr4, implicit $exec
+ $vgpr1 = V_ADD_F32_e32 0, killed $vgpr0, implicit $exec
+...
+---
+
+# GCN-LABEL: name: accvgpr_read_write_vgpr_mfma_read
+# GCN: V_ACCVGPR_READ_B32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_MFMA
+name: accvgpr_read_write_vgpr_mfma_read
+body: |
+ bb.0:
+ $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr4, implicit $exec
+ $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr0, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
+...
+---
+
+# GCN-LABEL: name: accvgpr_read_write_vgpr_accvgpr_write_read
+# GCN: V_ACCVGPR_READ_B32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_ACCVGPR_WRITE_B32
+name: accvgpr_read_write_vgpr_accvgpr_write_read
+body: |
+ bb.0:
+ $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec
+ $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+...
+---
+
+# GCN-LABEL: name: accvgpr_write_agpr_mfma_read_srcc
+# GCN: V_ACCVGPR_WRITE_B32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_MFMA
+name: accvgpr_write_agpr_mfma_read_srcc
+body: |
+ bb.0:
+ $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+ $agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr2, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
+...
+---
+
+# GCN-LABEL: name: accvgpr_write_agpr_mfma_read_srca
+# GCN: V_ACCVGPR_WRITE_B32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_MFMA
+name: accvgpr_write_agpr_mfma_read_srca
+body: |
+ bb.0:
+ $agpr8 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+ $agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $agpr8, killed $vgpr1, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
+...
+---
+
+# GCN-LABEL: name: accvgpr_write_agpr_mfma_read_srcb
+# GCN: V_ACCVGPR_WRITE_B32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_MFMA
+name: accvgpr_write_agpr_mfma_read_srcb
+body: |
+ bb.0:
+ $agpr8 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+ $agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $agpr8, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
+...
+---
+
+# GCN-LABEL: name: accvgpr_write_agpr_accvgpr_read
+# GCN: V_ACCVGPR_WRITE_B32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_ACCVGPR_READ_B32
+name: accvgpr_write_agpr_accvgpr_read
+body: |
+ bb.0:
+ $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+ $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
+...
+---
+
+# GCN-LABEL: name: vcmpx_write_exec_mfma
+# GCN: V_CMPX_EQ_I32_e32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_MFMA
+name: vcmpx_write_exec_mfma
+body: |
+ bb.0:
+ implicit $exec, implicit $vcc = V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec
+ $agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $agpr8, killed $vgpr1, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec
+...
+---
+
+# GCN-LABEL: name: vcmpx_write_exec_accvgpr_write
+# GCN: V_CMPX_EQ_I32_e32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_ACCVGPR_WRITE_B32
+name: vcmpx_write_exec_accvgpr_write
+body: |
+ bb.0:
+ implicit $exec, implicit $vcc = V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec
+ $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
+...
+---
+
+# GCN-LABEL: name: accvgpr_read_write_vgpr_load
+# GCN: V_ACCVGPR_READ_B32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: FLAT_LOAD_DWORD
+name: accvgpr_read_write_vgpr_load
+body: |
+ bb.0:
+ $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
+ $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+...
+---
+
+# GCN-LABEL: name: accvgpr_read_write_vgpr_store
+# GCN: V_ACCVGPR_READ_B32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: DS_WRITE_B32
+name: accvgpr_read_write_vgpr_store
+body: |
+ bb.0:
+ $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
+ DS_WRITE_B32 $vgpr0, $vgpr1, 0, 0, implicit $m0, implicit $exec
+...
+---
+
+# GCN-LABEL: name: valu_write_vgpr_accvgpr_read_load_no_dependency
+# GCN: V_MOV_B32
+# GCN-NEXT: V_ACCVGPR_READ_B32
+# GCN-NEXT: FLAT_LOAD_DWORD
+name: valu_write_vgpr_accvgpr_read_load_no_dependency
+body: |
+ bb.0:
+ $vgpr0 = V_MOV_B32_e32 1, implicit $exec
+ $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
+ $vgpr4 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+...
+---
+
+# GCN-LABEL: name: valu_write_vgpr_accvgpr_read_load_1_and_3_depend
+# GCN: V_MOV_B32
+# GCN-NEXT: V_ACCVGPR_READ_B32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: FLAT_LOAD_DWORD
+name: valu_write_vgpr_accvgpr_read_load_1_and_3_depend
+body: |
+ bb.0:
+ $vgpr0 = V_MOV_B32_e32 1, implicit $exec
+ $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
+ $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+...
+---
+
+# GCN-LABEL: name: valu_write_vgpr_accvgpr_read_load_2_and_3_depend
+# GCN: V_MOV_B32
+# GCN-NEXT: V_ACCVGPR_READ_B32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: FLAT_LOAD_DWORD
+name: valu_write_vgpr_accvgpr_read_load_2_and_3_depend
+body: |
+ bb.0:
+ $vgpr0 = V_MOV_B32_e32 1, implicit $exec
+ $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
+ $vgpr4 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+...
+---