[AMDGPU] Increate hazard for store dwordx3/4 to 2 waitstates on gfx940
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Wed, 13 Apr 2022 16:27:09 +0000 (09:27 -0700)
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Wed, 13 Apr 2022 21:21:45 +0000 (14:21 -0700)
Fixes: SWDEV-327053
Differential Revision: https://reviews.llvm.org/D123687

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
llvm/test/CodeGen/AMDGPU/gfx940-hazards.mir
llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir

index f61d71b02b7a6ac7d0dc8c09d7b9653a6be1ad61..7f5bc9af66196c2b47653adc61dbde6401b19d0f 100644 (file)
@@ -794,7 +794,7 @@ GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
   // 8 bytes can have there store data over written by the next instruction.
   const SIRegisterInfo *TRI = ST.getRegisterInfo();
 
-  const int VALUWaitStates = 1;
+  const int VALUWaitStates = ST.hasGFX940Insts() ? 2 : 1;
   int WaitStatesNeeded = 0;
 
   if (!TRI->isVectorRegister(MRI, Def.getReg()))
index 7f84babc72c21016fb3ef6c27f8a5b322e8143e5..0b5cc6c3552d2bb877951c14c653b0c78e2ae776 100644 (file)
@@ -215,3 +215,29 @@ body:             |
     $vgpr1 = V_ADD_CO_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $exec
     $sgpr1 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
 ...
+
+# GCN-LABEL: name: global_store_dwordx4_data_hazard
+# GCN:      GLOBAL_STORE_DWORDX4
+# GCN-NEXT: S_NOP 1
+# GCN-NEXT: V_MOV_B32_e32
+name: global_store_dwordx4_data_hazard
+
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    GLOBAL_STORE_DWORDX4 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+...
+
+# GCN-LABEL: name: global_store_dwordx3_data_hazard
+# GCN:      GLOBAL_STORE_DWORDX3
+# GCN-NEXT: S_NOP 1
+# GCN-NEXT: V_MOV_B32_e32
+name: global_store_dwordx3_data_hazard
+
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    GLOBAL_STORE_DWORDX3 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, 0, 0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+...
index d335cf73f510a3913ab2e12625cdc9ea3b746365..91dcfd7a6b3ec2b45e7fe8adafbf88fa0c69345f 100644 (file)
@@ -1155,7 +1155,7 @@ body:             |
 ...
 # GCN-LABEL: name: flat_store_data_agpr_overwritten
 # GCN:      FLAT_STORE_DWORDX4
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 1
 # GCN-NEXT: V_ACCVGPR_WRITE_B32_e64
 name: flat_store_data_agpr_overwritten
 body: |