; GCN-NEXT: v_sub_nc_u32_e32 v0, v1, v0
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: global_store_b32 v3, v2, s[0:1]
+; GCN-NEXT: s_wait_storecnt 0x0
; GCN-NEXT: s_barrier_signal -1
; GCN-NEXT: s_barrier_wait -1
; GCN-NEXT: global_store_b32 v3, v0, s[0:1]
; GLOBAL-ISEL-NEXT: v_sub_nc_u32_e32 v0, v1, v0
; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
; GLOBAL-ISEL-NEXT: global_store_b32 v3, v2, s[0:1]
+; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
; GLOBAL-ISEL-NEXT: s_barrier_signal -1
; GLOBAL-ISEL-NEXT: s_barrier_wait -1
; GLOBAL-ISEL-NEXT: global_store_b32 v3, v0, s[0:1]
; GCN-NEXT: v_sub_nc_u32_e32 v0, v1, v0
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: global_store_b32 v3, v2, s[0:1]
+; GCN-NEXT: s_wait_storecnt 0x0
; GCN-NEXT: s_barrier_signal 1
; GCN-NEXT: s_barrier_wait 1
; GCN-NEXT: global_store_b32 v3, v0, s[0:1]
; GLOBAL-ISEL-NEXT: v_sub_nc_u32_e32 v0, v1, v0
; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
; GLOBAL-ISEL-NEXT: global_store_b32 v3, v2, s[0:1]
+; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
; GLOBAL-ISEL-NEXT: s_barrier_signal 1
; GLOBAL-ISEL-NEXT: s_barrier_wait 1
; GLOBAL-ISEL-NEXT: global_store_b32 v3, v0, s[0:1]
; GCN-NEXT: v_sub_nc_u32_e32 v0, v1, v0
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: global_store_b32 v3, v2, s[0:1]
+; GCN-NEXT: s_wait_storecnt 0x0
; GCN-NEXT: s_barrier_signal 0
; GCN-NEXT: s_barrier_wait 0
; GCN-NEXT: global_store_b32 v3, v0, s[0:1]
; GLOBAL-ISEL-NEXT: v_sub_nc_u32_e32 v0, v1, v0
; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
; GLOBAL-ISEL-NEXT: global_store_b32 v3, v2, s[0:1]
+; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
; GLOBAL-ISEL-NEXT: s_barrier_signal 0
; GLOBAL-ISEL-NEXT: s_barrier_wait 0
; GLOBAL-ISEL-NEXT: global_store_b32 v3, v0, s[0:1]
; GCN-NEXT: v_sub_nc_u32_e32 v0, v2, v0
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: global_store_b32 v3, v1, s[0:1]
+; GCN-NEXT: s_wait_storecnt 0x0
; GCN-NEXT: s_barrier_signal m0
; GCN-NEXT: s_barrier_wait 1
; GCN-NEXT: global_store_b32 v3, v0, s[0:1]
; GLOBAL-ISEL-NEXT: v_sub_nc_u32_e32 v0, v1, v0
; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
; GLOBAL-ISEL-NEXT: global_store_b32 v3, v2, s[0:1]
+; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
; GLOBAL-ISEL-NEXT: s_barrier_signal m0
; GLOBAL-ISEL-NEXT: s_barrier_wait 1
; GLOBAL-ISEL-NEXT: global_store_b32 v3, v0, s[0:1]
; GCN-NEXT: v_readfirstlane_b32 s0, v0
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GCN-NEXT: s_mov_b32 m0, s0
+; GCN-NEXT: s_wait_storecnt 0x0
; GCN-NEXT: s_barrier_signal m0
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GLOBAL-ISEL-NEXT: s_wait_bvhcnt 0x0
; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
; GLOBAL-ISEL-NEXT: v_readfirstlane_b32 m0, v0
+; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
; GLOBAL-ISEL-NEXT: s_barrier_signal m0
; GLOBAL-ISEL-NEXT: s_setpc_b64 s[30:31]
call void @llvm.amdgcn.s.barrier.signal.var(i32 %arg)
; GCN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: global_store_b32 v0, v1, s[6:7]
+; GCN-NEXT: s_wait_storecnt 0x0
; GCN-NEXT: s_barrier_signal_isfirst -1
; GCN-NEXT: s_cselect_b32 s3, s3, s5
; GCN-NEXT: s_cselect_b32 s2, s2, s4
; GLOBAL-ISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
; GLOBAL-ISEL-NEXT: global_store_b32 v0, v1, s[6:7]
+; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
; GLOBAL-ISEL-NEXT: s_barrier_signal_isfirst -1
; GLOBAL-ISEL-NEXT: s_cselect_b32 s8, 1, 0
; GLOBAL-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GCN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: global_store_b32 v0, v1, s[6:7]
+; GCN-NEXT: s_wait_storecnt 0x0
; GCN-NEXT: s_barrier_signal_isfirst 1
; GCN-NEXT: s_cselect_b32 s3, s3, s5
; GCN-NEXT: s_cselect_b32 s2, s2, s4
; GLOBAL-ISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
; GLOBAL-ISEL-NEXT: global_store_b32 v0, v1, s[6:7]
+; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
; GLOBAL-ISEL-NEXT: s_barrier_signal_isfirst 1
; GLOBAL-ISEL-NEXT: s_cselect_b32 s8, 1, 0
; GLOBAL-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GCN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: global_store_b32 v0, v1, s[6:7]
+; GCN-NEXT: s_wait_storecnt 0x0
; GCN-NEXT: s_barrier_signal_isfirst 1
; GCN-NEXT: s_cselect_b32 s3, s3, s5
; GCN-NEXT: s_cselect_b32 s2, s2, s4
; GLOBAL-ISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
; GLOBAL-ISEL-NEXT: global_store_b32 v0, v1, s[6:7]
+; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
; GLOBAL-ISEL-NEXT: s_barrier_signal_isfirst 1
; GLOBAL-ISEL-NEXT: s_cselect_b32 s8, 1, 0
; GLOBAL-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GCN-NEXT: s_mov_b32 m0, 1
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: global_store_b32 v0, v1, s[6:7]
+; GCN-NEXT: s_wait_storecnt 0x0
; GCN-NEXT: s_barrier_signal_isfirst m0
; GCN-NEXT: s_cselect_b32 s3, s3, s5
; GCN-NEXT: s_cselect_b32 s2, s2, s4
; GLOBAL-ISEL-NEXT: s_mov_b32 m0, 1
; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
; GLOBAL-ISEL-NEXT: global_store_b32 v0, v1, s[6:7]
+; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
; GLOBAL-ISEL-NEXT: s_barrier_signal_isfirst m0
; GLOBAL-ISEL-NEXT: s_cselect_b32 s8, 1, 0
; GLOBAL-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GCN-NEXT: v_add_co_u32 v7, vcc_lo, v7, v9
; GCN-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v8, vcc_lo
; GCN-NEXT: global_store_b32 v[7:8], v10, off
+; GCN-NEXT: s_wait_storecnt 0x0
; GCN-NEXT: s_barrier_signal_isfirst m0
; GCN-NEXT: s_cselect_b32 vcc_lo, -1, 0
; GCN-NEXT: v_dual_cndmask_b32 v2, v4, v2 :: v_dual_cndmask_b32 v3, v5, v3
; GLOBAL-ISEL-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v8, vcc_lo
; GLOBAL-ISEL-NEXT: v_mov_b32_e32 v9, 0
; GLOBAL-ISEL-NEXT: global_store_b32 v[7:8], v9, off
+; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
; GLOBAL-ISEL-NEXT: s_barrier_signal_isfirst m0
; GLOBAL-ISEL-NEXT: s_cselect_b32 s0, 1, 0
; GLOBAL-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GCN-NEXT: v_sub_nc_u32_e32 v0, v1, v0
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: global_store_b32 v3, v2, s[0:1]
+; GCN-NEXT: s_wait_storecnt 0x0
; GCN-NEXT: s_barrier_signal -1
; GCN-NEXT: s_barrier_wait -1
; GCN-NEXT: global_store_b32 v3, v0, s[0:1]
; GLOBAL-ISEL-NEXT: v_sub_nc_u32_e32 v0, v1, v0
; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
; GLOBAL-ISEL-NEXT: global_store_b32 v3, v2, s[0:1]
+; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
; GLOBAL-ISEL-NEXT: s_barrier_signal -1
; GLOBAL-ISEL-NEXT: s_barrier_wait -1
; GLOBAL-ISEL-NEXT: global_store_b32 v3, v0, s[0:1]