[AMDGPU] Add test for no waitcnt before issuing LDS DMA. NFC.
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Mon, 16 May 2022 22:58:28 +0000 (15:58 -0700)
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Tue, 17 May 2022 17:45:55 +0000 (10:45 -0700)
A wait is only needed after the DMA before LDS can be read.

llvm/test/CodeGen/AMDGPU/lds-dma-waitcnt.mir

index 83f7fdf..499bd6a 100644 (file)
@@ -96,3 +96,24 @@ body:             |
     S_ENDPGM 0
 
 ...
+
+# No need to wait before load from VMEM to LDS.
+# GCN-LABEL: name: series_of_buffer_load_dword_lds_ds_read
+# GCN:      BUFFER_LOAD_DWORD_LDS_IDXEN
+# GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN
+# GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN
+# GCN-NEXT: S_WAITCNT 3952
+#                     vmcnt(0)
+# GCN-NEXT: DS_READ_B32_gfx9
+---
+name: series_of_buffer_load_dword_lds_ds_read
+body:             |
+  bb.0:
+    $m0 = S_MOV_B32 0
+    BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `i32 addrspace(1)* undef`), (store (s32) into `i32 addrspace(3)* undef`)
+    BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `i32 addrspace(1)* undef` + 4), (store (s32) into `i32 addrspace(3)* undef` + 4)
+    BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `i32 addrspace(1)* undef` + 8), (store (s32) into `i32 addrspace(3)* undef` + 8)
+    $vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `i32 addrspace(3)* undef`)
+    S_ENDPGM 0
+
+...