There shall be 1 wait state between M0 write and LDS DMA/LDS_DIRECT use.
Differential Revision: https://reviews.llvm.org/D124550
Opcode == AMDGPU::V_PERMLANEX16_B32_e64;
}
+static bool isLdsDma(const MachineInstr &MI) {
+ return SIInstrInfo::isVALU(MI) &&
+ (SIInstrInfo::isMUBUF(MI) || SIInstrInfo::isFLAT(MI));
+}
+
static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
AMDGPU::OpName::simm16);
if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
return HazardType;
- if (ST.hasReadM0MovRelInterpHazard() &&
- (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
- checkReadM0Hazards(MI) > 0)
- return HazardType;
-
- if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
+ if (((ST.hasReadM0MovRelInterpHazard() &&
+ (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))) ||
+ (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) ||
+ (ST.hasReadM0LdsDmaHazard() && isLdsDma(*MI)) ||
+ (ST.hasReadM0LdsDirectHazard() &&
+ MI->readsRegister(AMDGPU::LDS_DIRECT))) &&
checkReadM0Hazards(MI) > 0)
return HazardType;
if (isRFE(MI->getOpcode()))
return std::max(WaitStates, checkRFEHazards(MI));
- if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) ||
- isSMovRel(MI->getOpcode())))
- return std::max(WaitStates, checkReadM0Hazards(MI));
-
- if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
+ if ((ST.hasReadM0MovRelInterpHazard() &&
+ (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))) ||
+ (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) ||
+ (ST.hasReadM0LdsDmaHazard() && isLdsDma(*MI)) ||
+ (ST.hasReadM0LdsDirectHazard() && MI->readsRegister(AMDGPU::LDS_DIRECT)))
return std::max(WaitStates, checkReadM0Hazards(MI));
if (SIInstrInfo::isMAI(*MI))
int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
const SIInstrInfo *TII = ST.getInstrInfo();
- const int SMovRelWaitStates = 1;
+ const int ReadM0WaitStates = 1;
auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isSALU(MI); };
- return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn,
- SMovRelWaitStates);
+ return ReadM0WaitStates -
+ getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn, ReadM0WaitStates);
}
void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
getGeneration() <= AMDGPUSubtarget::GFX9;
}
+ bool hasReadM0LdsDmaHazard() const {
+ return getGeneration() == AMDGPUSubtarget::GFX9;
+ }
+
+ bool hasReadM0LdsDirectHazard() const {
+ return getGeneration() == AMDGPUSubtarget::GFX9;
+ }
+
bool hasVcmpxPermlaneHazard() const {
return HasVcmpxPermlaneHazard;
}
S_SENDMSG 3, implicit $exec, implicit $m0
S_ENDPGM 0
...
+
+# GCN-LABEL: name: buffer_store_lds_dword
+# GCN: $m0 = S_MOV_B32 0
+# GFX9-NEXT: S_NOP 0
+# GCN-NEXT: BUFFER_STORE_LDS_DWORD
+---
+name: buffer_store_lds_dword
+body: |
+ bb.0:
+ $m0 = S_MOV_B32 0
+ BUFFER_STORE_LDS_DWORD $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec, implicit $m0
+...
+
+# GCN-LABEL: name: lds_direct_read_m0
+# GCN: $m0 = S_MOV_B32 0
+# GFX9-NEXT: S_NOP 0
+# GCN-NEXT: V_MOV_B32
+---
+name: lds_direct_read_m0
+body: |
+ bb.0:
+ $m0 = S_MOV_B32 0
+ $vgpr0 = V_MOV_B32_e32 $lds_direct, implicit $exec, implicit $m0
+...
--- /dev/null
+# RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck --check-prefix=GCN %s
+
+# GCN-LABEL: name: buffer_load_dword_lds
+# GCN: $m0 = S_MOV_B32 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: BUFFER_LOAD_DWORD_LDS_ADDR64
+---
+name: buffer_load_dword_lds
+body: |
+ bb.0:
+ $m0 = S_MOV_B32 0
+ BUFFER_LOAD_DWORD_LDS_ADDR64 $vgpr0_vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec, implicit $m0
+...
+
+# GCN-LABEL: name: buffer_store_lds_dword
+# GCN: $m0 = S_MOV_B32 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: BUFFER_STORE_LDS_DWORD
+---
+name: buffer_store_lds_dword
+body: |
+ bb.0:
+ $m0 = S_MOV_B32 0
+ BUFFER_STORE_LDS_DWORD $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec, implicit $m0
+...
+
+# GCN-LABEL: name: global_load_lds_dword
+# GCN: $m0 = S_MOV_B32 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: GLOBAL_LOAD_LDS_DWORD
+---
+name: global_load_lds_dword
+body: |
+ bb.0:
+ $m0 = S_MOV_B32 0
+ GLOBAL_LOAD_LDS_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $m0
+...
+
+# GCN-LABEL: name: scratch_load_lds_dword
+# GCN: $m0 = S_MOV_B32 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: SCRATCH_LOAD_LDS_DWORD
+---
+name: scratch_load_lds_dword
+body: |
+ bb.0:
+ $m0 = S_MOV_B32 0
+ SCRATCH_LOAD_LDS_DWORD $vgpr2, 0, 0, implicit $exec, implicit $m0
+...