; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-BACKOFF %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-back-off-barrier -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-NO-BACKOFF %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-BACKOFF %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-BACKOFF %s
; Subtargets must wait for outstanding memory instructions before a barrier if
; they cannot back off of the barrier.
; GFX10-BACKOFF-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-BACKOFF-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-BACKOFF-LABEL: back_off_barrier_no_fence:
+; GFX11-BACKOFF: ; %bb.0:
+; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-BACKOFF-NEXT: flat_load_b32 v0, v[0:1]
+; GFX11-BACKOFF-NEXT: s_barrier
+; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-BACKOFF-NEXT: flat_store_b32 v[2:3], v0
+; GFX11-BACKOFF-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-BACKOFF-NEXT: s_setpc_b64 s[30:31]
%load = load i32, ptr %in
call void @llvm.amdgcn.s.barrier()
store i32 %load, ptr %out
; GFX10-BACKOFF-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-BACKOFF-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-BACKOFF-LABEL: back_off_barrier_with_fence:
+; GFX11-BACKOFF: ; %bb.0:
+; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-BACKOFF-NEXT: flat_load_b32 v0, v[0:1]
+; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-BACKOFF-NEXT: s_barrier
+; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-BACKOFF-NEXT: buffer_gl0_inv
+; GFX11-BACKOFF-NEXT: flat_store_b32 v[2:3], v0
+; GFX11-BACKOFF-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-BACKOFF-NEXT: s_setpc_b64 s[30:31]
%load = load i32, ptr %in
fence syncscope("workgroup") release
call void @llvm.amdgcn.s.barrier()
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX10 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX11 %s
---
name: test_waitcnt_preexisting_vscnt_unmodified
; GFX10-NEXT: S_WAITCNT 112
; GFX10-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
; GFX10-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: test_waitcnt_preexisting_vscnt_unmodified
+ ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: S_WAITCNT 0
+ ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
+ ; GFX11-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
+ ; GFX11-NEXT: S_BARRIER
+ ; GFX11-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX11-NEXT: S_WAITCNT 7
+ ; GFX11-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX11-NEXT: S_ENDPGM 0
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
S_WAITCNT_VSCNT undef $sgpr_null, 0
S_BARRIER
; GFX10-NEXT: S_WAITCNT 112
; GFX10-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
; GFX10-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: test_waitcnt_preexisting_vscnt_needs_vscnt
+ ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: S_WAITCNT 0
+ ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
+ ; GFX11-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 1
+ ; GFX11-NEXT: S_BARRIER
+ ; GFX11-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX11-NEXT: S_WAITCNT 7
+ ; GFX11-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX11-NEXT: S_ENDPGM 0
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
S_WAITCNT_VSCNT undef $sgpr_null, 1
S_BARRIER
; GFX10-NEXT: S_WAITCNT 112
; GFX10-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
; GFX10-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: test_waitcnt_preexisting_vscnt_with_other_waitcnt
+ ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: S_WAITCNT 0
+ ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
+ ; GFX11-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT 112
+ ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
+ ; GFX11-NEXT: S_BARRIER
+ ; GFX11-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX11-NEXT: S_WAITCNT 7
+ ; GFX11-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX11-NEXT: S_ENDPGM 0
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
S_WAITCNT 112
S_WAITCNT_VSCNT undef $sgpr_null, 0
; GFX10-NEXT: S_WAITCNT 112
; GFX10-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
; GFX10-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: test_waitcnt_preexisting_vscnt_combined
+ ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: S_WAITCNT 0
+ ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
+ ; GFX11-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
+ ; GFX11-NEXT: S_BARRIER
+ ; GFX11-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX11-NEXT: S_WAITCNT 7
+ ; GFX11-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX11-NEXT: S_ENDPGM 0
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
S_WAITCNT_VSCNT undef $sgpr_null, 0
S_WAITCNT_VSCNT undef $sgpr_null, 1
; GFX10-NEXT: S_WAITCNT 112
; GFX10-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
; GFX10-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: test_waitcnt_preexisting_vscnt_combined_both_types
+ ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: S_WAITCNT 0
+ ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
+ ; GFX11-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT 0
+ ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 1
+ ; GFX11-NEXT: S_BARRIER
+ ; GFX11-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX11-NEXT: S_WAITCNT 7
+ ; GFX11-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX11-NEXT: S_ENDPGM 0
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
S_WAITCNT 0
S_WAITCNT_VSCNT undef $sgpr_null, 1