// \returns the number of address arguments from which to enable MIMG NSA
// on supported architectures.
unsigned getNSAThreshold(const MachineFunction &MF) const;
+
+ // \returns true if the subtarget has a hazard requiring an "s_nop 0"
+ // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".
+ bool requiresNopBeforeDeallocVGPRs() const {
+ // Currently all targets that support the dealloc VGPRs message also require
+ // the nop.
+ return true;
+ }
};
} // end namespace llvm
// Insert DEALLOC_VGPR messages before previously identified S_ENDPGM
// instructions.
for (MachineInstr *MI : ReleaseVGPRInsts) {
+ if (ST->requiresNopBeforeDeallocVGPRs()) {
+ BuildMI(*MI->getParent(), MI, DebugLoc(), TII->get(AMDGPU::S_NOP))
+ .addImm(0);
+ }
BuildMI(*MI->getParent(), MI, DebugLoc(), TII->get(AMDGPU::S_SENDMSG))
.addImm(AMDGPU::SendMsg::ID_DEALLOC_VGPRS_GFX11Plus);
Modified = true;
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i32 42 seq_cst, align 4
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 42 seq_cst, align 4
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%id = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v2, v0, s[2:3]
; GFX11-NEXT: global_store_b32 v2, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #2
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i64 42 seq_cst, align 8
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 42 seq_cst, align 8
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: global_store_b64 v3, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%id = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v3, v0, s[2:3]
; GFX11-NEXT: global_store_b64 v3, v[1:2], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #2
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i32 42 seq_cst, align 4
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i32 42 seq_cst, align 4
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%id = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v2, v0, s[2:3]
; GFX11-NEXT: global_store_b32 v2, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #2
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i64 42 seq_cst, align 8
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i64 42 seq_cst, align 8
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: global_store_b64 v3, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%id = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v3, v0, s[2:3]
; GFX11-NEXT: global_store_b64 v3, v[1:2], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #2
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v1, v2, s[0:1]
; GFX11-NEXT: global_store_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result0 = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i32 42 seq_cst, align 4
; GFX11-NEXT: s_movrels_b64 s[0:1], s[4:5]
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s14, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s15, vcc_lo
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_movrels_b32_e32 v16, v0
; GFX11-NEXT: v_movrels_b32_e32 v17, v1
; GFX11-NEXT: global_store_b64 v[0:1], v[16:17], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_movrels_b64 s[0:1], s[0:1]
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_movrels_b64 s[0:1], s[0:1]
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_cselect_b32 s2, 4.0, s3
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3]
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_dual_max_f32 v1, v1, v2 :: v_dual_max_f32 v2, v3, v3
; GFX11-NEXT: v_minmax_f32 v1, v1, v2, v4
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_med3_f32 v1, v1, |v2|, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_or_b32 s0, s0, s1
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x i16>, ptr addrspace(4) %ptr
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_and_or_b32 v2, v2, s1, s0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x i16>, ptr addrspace(1 ) %ptr
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_lshl_or_b32 v2, v2, s1, s0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x i16>, ptr addrspace(4) %ptr
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_and_or_b32 v2, s0, v3, v2
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x i16>, ptr addrspace(4) %ptr
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_and_or_b32 v2, s0, v2, v3
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x i16>, ptr addrspace(4) %ptr
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_and_or_b32 v2, v3, v4, v2
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x i16>, ptr addrspace(1) %ptr
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_and_or_b32 v2, v3, s0, v2
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x i16>, ptr addrspace(1) %ptr
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_and_or_b32 v2, v4, v2, v3
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x i16>, ptr addrspace(1) %ptr
; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <4 x i16>, ptr addrspace(1 ) %ptr
; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1
; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <4 x i16>, ptr addrspace(4) %ptr
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v5, s0
; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <4 x i16>, ptr addrspace(4) %ptr
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v5, s0
; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <4 x i16>, ptr addrspace(4) %ptr
; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <4 x i16>, ptr addrspace(1) %ptr
; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <4 x i16>, ptr addrspace(1) %ptr
; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <4 x i16>, ptr addrspace(1) %ptr
; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
; GFX11-NEXT: v_mov_b32_e32 v3, s3
; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <8 x i16>, ptr addrspace(4) %ptr
; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v6, s0
; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v6, s1
; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <8 x i16>, ptr addrspace(1 ) %ptr
; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s5, 3
; GFX11-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc_lo
; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <8 x i16>, ptr addrspace(4) %ptr
; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v7, s0
; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v7, s1
; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <8 x i16>, ptr addrspace(4) %ptr
; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v7, s0
; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v7, s1
; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <8 x i16>, ptr addrspace(4) %ptr
; GFX11-NEXT: v_cndmask_b32_e64 v2, v5, v9, s0
; GFX11-NEXT: v_cndmask_b32_e64 v3, v6, v9, s1
; GFX11-NEXT: global_store_b128 v[7:8], v[0:3], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <8 x i16>, ptr addrspace(1) %ptr
; GFX11-NEXT: v_cndmask_b32_e64 v2, v5, v9, s0
; GFX11-NEXT: v_cndmask_b32_e64 v3, v6, v9, s1
; GFX11-NEXT: global_store_b128 v[7:8], v[0:3], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <8 x i16>, ptr addrspace(1) %ptr
; GFX11-NEXT: v_cndmask_b32_e64 v2, v6, v3, s0
; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, v3, s1
; GFX11-NEXT: global_store_b128 v[8:9], v[0:3], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <8 x i16>, ptr addrspace(1) %ptr
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v[8:9], v[0:3], off
; GFX11-NEXT: global_store_b128 v[10:11], v[4:7], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <16 x i16>, ptr addrspace(4) %ptr
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off
; GFX11-NEXT: global_store_b128 v[10:11], v[6:9], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <16 x i16>, ptr addrspace(1 ) %ptr
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v[8:9], v[0:3], off
; GFX11-NEXT: global_store_b128 v[10:11], v[4:7], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <16 x i16>, ptr addrspace(4) %ptr
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v[8:9], v[0:3], off
; GFX11-NEXT: global_store_b128 v[10:11], v[4:7], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <16 x i16>, ptr addrspace(4) %ptr
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v[8:9], v[0:3], off
; GFX11-NEXT: global_store_b128 v[10:11], v[4:7], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <16 x i16>, ptr addrspace(4) %ptr
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v[11:12], v[0:3], off
; GFX11-NEXT: global_store_b128 v[13:14], v[4:7], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <16 x i16>, ptr addrspace(1) %ptr
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v[0:1], v[3:6], off
; GFX11-NEXT: global_store_b128 v[11:12], v[7:10], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <16 x i16>, ptr addrspace(1) %ptr
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v[12:13], v[0:3], off
; GFX11-NEXT: global_store_b128 v[14:15], v[4:7], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <16 x i16>, ptr addrspace(1) %ptr
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: v_or_b32_e32 v2, v2, v3
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x i8>, ptr addrspace(4) %ptr
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: v_or_b32_e32 v2, v2, v3
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x i8>, ptr addrspace(1 ) %ptr
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-NEXT: v_or_b32_e32 v2, v2, v3
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x i8>, ptr addrspace(4) %ptr
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-NEXT: v_or_b32_e32 v2, v2, v3
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x i8>, ptr addrspace(4) %ptr
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-NEXT: v_or_b32_e32 v2, v2, v3
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x i8>, ptr addrspace(4) %ptr
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-NEXT: v_or_b32_e32 v2, v2, v3
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x i8>, ptr addrspace(1) %ptr
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-NEXT: v_or_b32_e32 v2, v2, v3
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x i8>, ptr addrspace(1) %ptr
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-NEXT: v_or_b32_e32 v2, v2, v3
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x i8>, ptr addrspace(1) %ptr
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_and_or_b32 v2, v2, s1, s0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <4 x i8>, ptr addrspace(1 ) %ptr
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_lshl_or_b32 v2, v2, s1, s0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <4 x i8>, ptr addrspace(4) %ptr
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_and_or_b32 v2, s0, v3, v2
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <4 x i8>, ptr addrspace(4) %ptr
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_and_or_b32 v2, s0, v2, v3
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <4 x i8>, ptr addrspace(4) %ptr
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_and_or_b32 v2, v3, v4, v2
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <4 x i8>, ptr addrspace(1) %ptr
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_and_or_b32 v2, v3, s0, v2
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <4 x i8>, ptr addrspace(1) %ptr
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_and_or_b32 v2, v4, v2, v3
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <4 x i8>, ptr addrspace(1) %ptr
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, s1
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0
; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <8 x i8>, ptr addrspace(4) %ptr
; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <8 x i8>, ptr addrspace(1 ) %ptr
; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1
; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <8 x i8>, ptr addrspace(4) %ptr
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v5, s0
; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <8 x i8>, ptr addrspace(4) %ptr
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v5, s0
; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <8 x i8>, ptr addrspace(4) %ptr
; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <8 x i8>, ptr addrspace(1) %ptr
; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <8 x i8>, ptr addrspace(1) %ptr
; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <8 x i8>, ptr addrspace(1) %ptr
; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
; GFX11-NEXT: v_mov_b32_e32 v3, s3
; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <16 x i8>, ptr addrspace(4) %ptr
; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v6, s0
; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v6, s1
; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <16 x i8>, ptr addrspace(1 ) %ptr
; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s5, 3
; GFX11-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc_lo
; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <16 x i8>, ptr addrspace(4) %ptr
; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v7, s0
; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v7, s1
; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <16 x i8>, ptr addrspace(4) %ptr
; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v7, s0
; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v7, s1
; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <16 x i8>, ptr addrspace(4) %ptr
; GFX11-NEXT: v_cndmask_b32_e64 v2, v5, v9, s0
; GFX11-NEXT: v_cndmask_b32_e64 v3, v6, v9, s1
; GFX11-NEXT: global_store_b128 v[7:8], v[0:3], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <16 x i8>, ptr addrspace(1) %ptr
; GFX11-NEXT: v_cndmask_b32_e64 v2, v5, v9, s0
; GFX11-NEXT: v_cndmask_b32_e64 v3, v6, v9, s1
; GFX11-NEXT: global_store_b128 v[7:8], v[0:3], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <16 x i8>, ptr addrspace(1) %ptr
; GFX11-NEXT: v_cndmask_b32_e64 v2, v6, v3, s0
; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, v3, s1
; GFX11-NEXT: global_store_b128 v[8:9], v[0:3], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <16 x i8>, ptr addrspace(1) %ptr
; GFX11-NEXT: global_store_b128 v64, v[56:59], s[2:3] offset:224
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b128 v64, v[60:63], s[2:3] offset:240
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%id = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[13:16], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[14:17], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[15:18], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11_W32-NEXT: v_div_fmas_f32 v0, s5, v0, v1
; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11_W32-NEXT: s_nop 0
; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W32-NEXT: s_endpgm
;
; GFX11_W64-NEXT: v_div_fmas_f32 v0, s5, v0, v1
; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11_W64-NEXT: s_nop 0
; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W64-NEXT: s_endpgm
%result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %d)
; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
; GFX11_W32-NEXT: v_div_fmas_f32 v0, 1.0, s4, v0
; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11_W32-NEXT: s_nop 0
; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W32-NEXT: s_endpgm
;
; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
; GFX11_W64-NEXT: v_div_fmas_f32 v0, 1.0, s4, v0
; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11_W64-NEXT: s_nop 0
; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W64-NEXT: s_endpgm
%result = call float @llvm.amdgcn.div.fmas.f32(float 1.0, float %b, float %c, i1 %d)
; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
; GFX11_W32-NEXT: v_div_fmas_f32 v0, s4, 1.0, v0
; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11_W32-NEXT: s_nop 0
; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W32-NEXT: s_endpgm
;
; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
; GFX11_W64-NEXT: v_div_fmas_f32 v0, s4, 1.0, v0
; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11_W64-NEXT: s_nop 0
; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W64-NEXT: s_endpgm
%result = call float @llvm.amdgcn.div.fmas.f32(float %a, float 1.0, float %c, i1 %d)
; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
; GFX11_W32-NEXT: v_div_fmas_f32 v0, s4, v0, 1.0
; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11_W32-NEXT: s_nop 0
; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W32-NEXT: s_endpgm
;
; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
; GFX11_W64-NEXT: v_div_fmas_f32 v0, s4, v0, 1.0
; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11_W64-NEXT: s_nop 0
; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W64-NEXT: s_endpgm
%result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float 1.0, i1 %d)
; GFX11_W32-NEXT: v_div_fmas_f64 v[0:1], s[2:3], v[0:1], v[2:3]
; GFX11_W32-NEXT: v_mov_b32_e32 v2, 0
; GFX11_W32-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11_W32-NEXT: s_nop 0
; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W32-NEXT: s_endpgm
;
; GFX11_W64-NEXT: v_div_fmas_f64 v[0:1], s[2:3], v[0:1], v[2:3]
; GFX11_W64-NEXT: v_mov_b32_e32 v2, 0
; GFX11_W64-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11_W64-NEXT: s_nop 0
; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W64-NEXT: s_endpgm
%result = call double @llvm.amdgcn.div.fmas.f64(double %a, double %b, double %c, i1 %d)
; GFX11_W32-NEXT: v_div_fmas_f32 v0, s4, v0, v1
; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11_W32-NEXT: s_nop 0
; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W32-NEXT: s_endpgm
;
; GFX11_W64-NEXT: v_div_fmas_f32 v0, s4, v0, v1
; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11_W64-NEXT: s_nop 0
; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W64-NEXT: s_endpgm
%cmp = icmp eq i32 %i, 0
; GFX11_W32-NEXT: v_div_fmas_f32 v0, s4, v0, v1
; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11_W32-NEXT: s_nop 0
; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W32-NEXT: s_endpgm
;
; GFX11_W64-NEXT: v_div_fmas_f32 v0, s4, v0, v1
; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11_W64-NEXT: s_nop 0
; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W64-NEXT: s_endpgm
%result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 false)
; GFX11_W32-NEXT: v_div_fmas_f32 v0, s4, v0, v1
; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11_W32-NEXT: s_nop 0
; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W32-NEXT: s_endpgm
;
; GFX11_W64-NEXT: v_div_fmas_f32 v0, s4, v0, v1
; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11_W64-NEXT: s_nop 0
; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W64-NEXT: s_endpgm
%result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 true)
; GFX11_W32-NEXT: v_div_fmas_f32 v0, v2, v3, v1
; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
; GFX11_W32-NEXT: global_store_b32 v1, v0, s[4:5] offset:8
+; GFX11_W32-NEXT: s_nop 0
; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W32-NEXT: s_endpgm
;
; GFX11_W64-NEXT: v_div_fmas_f32 v0, v2, v3, v1
; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
; GFX11_W64-NEXT: global_store_b32 v1, v0, s[4:5] offset:8
+; GFX11_W64-NEXT: s_nop 0
; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W64-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] offset:8
+; GFX11_W32-NEXT: s_nop 0
; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W32-NEXT: s_endpgm
;
; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] offset:8
+; GFX11_W64-NEXT: s_nop 0
; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W64-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_div_scale_f32 v0, null, v0, v0, v1
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_div_scale_f32 v0, null, v1, v0, v1
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_div_scale_f64 v[0:1], null, v[2:3], v[2:3], v[0:1]
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_div_scale_f64 v[0:1], null, v[0:1], v[2:3], v[0:1]
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_div_scale_f32 v0, null, v0, v0, s0
; GFX11-NEXT: global_store_b32 v1, v0, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_div_scale_f32 v0, null, s0, v0, s0
; GFX11-NEXT: global_store_b32 v1, v0, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_div_scale_f32 v0, null, s0, s0, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_div_scale_f32 v0, null, v0, s0, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_div_scale_f64 v[0:1], null, v[0:1], v[0:1], s[0:1]
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_div_scale_f64 v[0:1], null, s[0:1], v[0:1], s[0:1]
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_div_scale_f64 v[0:1], null, s[0:1], s[0:1], v[0:1]
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_div_scale_f64 v[0:1], null, v[0:1], s[0:1], v[0:1]
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_div_scale_f32 v0, null, s3, s3, s2
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false)
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_div_scale_f32 v0, null, s2, s3, s2
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true)
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_div_scale_f64 v[0:1], null, s[4:5], s[4:5], s[2:3]
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false)
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_div_scale_f64 v[0:1], null, s[2:3], s[4:5], s[2:3]
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true)
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_div_scale_f32 v0, null, v0, v0, 1.0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_div_scale_f32 v0, null, 2.0, 2.0, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_div_scale_f32 v0, null, v0, v0, v1
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_div_scale_f32 v0, null, v0, v0, v1
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_div_scale_f32 v0, null, s0, s0, 0x41000000
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 false)
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_div_scale_f32 v0, null, 0x41000000, 0x41000000, s0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float 8.0, i1 false)
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_div_scale_f32 v0, null, s0, s0, s0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 false)
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double 8.0, double undef, i1 false)
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: global_atomic_csub_u32 v0, v1, v0, s[0:1] glc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 1024
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_mov_b32 s6, s8
; GFX11-NEXT: s_mov_b32 s7, s9
; GFX11-NEXT: image_store v2, v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.image.store.2d.f32.i32(float %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
; GFX11-NEXT: s_mov_b32 s6, s8
; GFX11-NEXT: s_mov_b32 s7, s9
; GFX11-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.image.store.2d.v2f32.i32(<2 x float> %in, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
; GFX11-NEXT: s_mov_b32 s6, s8
; GFX11-NEXT: s_mov_b32 s7, s9
; GFX11-NEXT: image_store v[2:4], v[0:1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.image.store.2d.v3f32.i32(<3 x float> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
; GFX11-NEXT: s_mov_b32 s6, s8
; GFX11-NEXT: s_mov_b32 s7, s9
; GFX11-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %in, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
; GFX11-NEXT: s_mov_b32 s6, s8
; GFX11-NEXT: s_mov_b32 s7, s9
; GFX11-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %in, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
; GFX11-NEXT: s_mov_b32 s6, s8
; GFX11-NEXT: s_mov_b32 s7, s9
; GFX11-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_2D unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %in, i32 2, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
; GFX11-NEXT: s_mov_b32 s6, s8
; GFX11-NEXT: s_mov_b32 s7, s9
; GFX11-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_2D unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %in, i32 4, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
; GFX11-NEXT: s_mov_b32 s6, s8
; GFX11-NEXT: s_mov_b32 s7, s9
; GFX11-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %in, i32 8, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
; GFX11-NEXT: s_mov_b32 s6, s8
; GFX11-NEXT: s_mov_b32 s7, s9
; GFX11-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %in, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
; GFX11-NEXT: s_mov_b32 s6, s8
; GFX11-NEXT: s_mov_b32 s7, s9
; GFX11-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_2D unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %in, i32 6, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
; GFX11-NEXT: s_mov_b32 s6, s8
; GFX11-NEXT: s_mov_b32 s7, s9
; GFX11-NEXT: image_store v0, v[1:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
tail call void @llvm.amdgcn.image.store.2d.f32.i32(float %in, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v1
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%id = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: .LBB1_2: ; %bb1
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = call i1 @llvm.amdgcn.is.private(ptr %ptr)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v1
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%id = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: .LBB1_2: ; %bb1
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 ; encoding: [0x80,0x00,0x10,0xca,0x02,0x00,0x00,0x01]
; GFX11-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11]
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] ; encoding: [0x00,0x00,0x6a,0xdc,0x01,0x00,0x00,0x00]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 true) #0
; GFX11-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x00,0x11]
; GFX11-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x02,0x7e,0x01,0x01,0x00,0x11]
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; encoding: [0x00,0x00,0x6e,0xdc,0x02,0x00,0x00,0x00]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
%tmp0 = call i64 @llvm.amdgcn.mov.dpp.i64(i64 %in1, i32 1, i32 1, i32 1, i1 false) #0
; GFX11-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 false)
; GFX11-NEXT: v_mov_b32_dpp v2, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GFX11-NEXT: v_mov_b32_dpp v3, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GFX11-NEXT: global_store_b64 v4, v[2:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[24:25], v[16:19], off
; W32-NEXT: global_store_b128 v[24:25], v[20:23], off offset:16
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[24:25], v[16:19], off
; W32-NEXT: global_store_b128 v[24:25], v[20:23], off offset:16
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[24:25], v[16:19], off
; W32-NEXT: global_store_b128 v[24:25], v[20:23], off offset:16
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[24:25], v[16:19], off
; W32-NEXT: global_store_b128 v[24:25], v[20:23], off offset:16
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[24:25], v[16:19], off
; W32-NEXT: global_store_b128 v[24:25], v[20:23], off offset:16
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[24:25], v[16:19], off
; W32-NEXT: global_store_b128 v[24:25], v[20:23], off offset:16
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[16:17], v[8:11], off
; W32-NEXT: global_store_b128 v[16:17], v[12:15], off offset:16
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[16:17], v[8:11], off
; W32-NEXT: global_store_b128 v[16:17], v[12:15], off offset:16
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[16:17], v[8:11], off
; W32-NEXT: global_store_b128 v[16:17], v[12:15], off offset:16
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[16:17], v[8:11], off
; W32-NEXT: global_store_b128 v[16:17], v[12:15], off offset:16
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[16:17], v[8:11], off
; W32-NEXT: global_store_b128 v[16:17], v[12:15], off offset:16
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[16:17], v[8:11], off
; W32-NEXT: global_store_b128 v[16:17], v[12:15], off offset:16
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[16:17], v[8:11], off
; W32-NEXT: global_store_b128 v[16:17], v[12:15], off offset:16
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[16:17], v[8:11], off
; W32-NEXT: global_store_b128 v[16:17], v[12:15], off offset:16
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[12:13], v[4:7], off
; W32-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[12:13], v[4:7], off
; W32-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[12:13], v[4:7], off
; W32-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[12:13], v[4:7], off
; W32-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[12:13], v[4:7], off
; W32-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[12:13], v[4:7], off
; W32-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[12:13], v[4:7], off
; W32-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[12:13], v[4:7], off
; W32-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_f32_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19]
; W64-NEXT: global_store_b128 v[20:21], v[16:19], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_f32_16x16x16_bf16 v[16:19], v[0:7], v[8:15], v[16:19]
; W64-NEXT: global_store_b128 v[20:21], v[16:19], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_f16_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19]
; W64-NEXT: global_store_b128 v[20:21], v[16:19], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_f16_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19] op_sel:[0,0,1]
; W64-NEXT: global_store_b128 v[20:21], v[16:19], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_bf16_16x16x16_bf16 v[16:19], v[0:7], v[8:15], v[16:19]
; W64-NEXT: global_store_b128 v[20:21], v[16:19], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_bf16_16x16x16_bf16 v[16:19], v[0:7], v[8:15], v[16:19] op_sel:[0,0,1]
; W64-NEXT: global_store_b128 v[20:21], v[16:19], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11]
; W64-NEXT: global_store_b128 v[12:13], v[8:11], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,1,0]
; W64-NEXT: global_store_b128 v[12:13], v[8:11], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,0,0]
; W64-NEXT: global_store_b128 v[12:13], v[8:11], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,1,0]
; W64-NEXT: global_store_b128 v[12:13], v[8:11], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] clamp
; W64-NEXT: global_store_b128 v[12:13], v[8:11], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,1,0] clamp
; W64-NEXT: global_store_b128 v[12:13], v[8:11], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,0,0] clamp
; W64-NEXT: global_store_b128 v[12:13], v[8:11], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,1,0] clamp
; W64-NEXT: global_store_b128 v[12:13], v[8:11], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7]
; W64-NEXT: global_store_b128 v[8:9], v[4:7], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,1,0]
; W64-NEXT: global_store_b128 v[8:9], v[4:7], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,0,0]
; W64-NEXT: global_store_b128 v[8:9], v[4:7], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,1,0]
; W64-NEXT: global_store_b128 v[8:9], v[4:7], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] clamp
; W64-NEXT: global_store_b128 v[8:9], v[4:7], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,1,0] clamp
; W64-NEXT: global_store_b128 v[8:9], v[4:7], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,0,0] clamp
; W64-NEXT: global_store_b128 v[8:9], v[4:7], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,1,0] clamp
; W64-NEXT: global_store_b128 v[8:9], v[4:7], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; GFX11-NEXT: global_store_b128 v8, v[0:3], s[2:3]
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b128 v8, v[4:7], s[2:3] offset:16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%load = load <8 x i32>, ptr addrspace(4) %ptr, align 1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_add3_u32 v5, v5, v0, v1
; GFX11-NEXT: global_store_b64 v6, v[4:5], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_add_nc_u32 v3, v3, v0
; GFX11-NEXT: global_store_b64 v0, v[2:3], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_add_nc_u32 v3, v3, v0
; GFX11-NEXT: global_store_b64 v0, v[2:3], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v1, v0, 0
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_add_nc_u32 v3, v3, v0
; GFX11-NEXT: global_store_b64 v0, v[2:3], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mul_lo_u32 v1, v1, v2
; GFX11-NEXT: global_store_b64 v0, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_mul_lo_u32 v1, v0, v2
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: global_store_b64 v0, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_add3_u32 v1, v1, v3, v2
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v4, v2
; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v5, v3, vcc_lo
; GFX11-NEXT: global_store_b32 v[2:3], v1, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
bb:
; GFX11-NEXT: v_mul_i32_i24_e32 v1, -7, v1
; GFX11-NEXT: v_lshlrev_b64 v[1:2], 3, v[1:2]
; GFX11-NEXT: global_store_b64 v0, v[1:2], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
bb:
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_add_u16 v0, v1, v0
; GFX11-NEXT: global_store_b32 v2, v0, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_pk_add_u16 v1, s2, s0
; GFX11-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a = load <2 x i16>, ptr addrspace(4) %in0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_pk_add_u16 v1, s2, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a = load <2 x i16>, ptr addrspace(4) %in0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_pk_add_u16 v1, s2, s3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = add <2 x i16> %a, %b
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_add_u16 v0, 0x1c8007b, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_add_u16 v0, 0xfc21fcb3, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_sub_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_add_u16 v0, v0, 32
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_add_u16 v0, 0x3f80, v0 op_sel:[1,0] op_sel_hi:[0,1]
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_alignbit_b32 v2, 0, v0, 16
; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v0, 0xffff, v0
; GFX11-NEXT: global_store_b128 v1, v[0:3], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_ashrrev_i32_e32 v1, 16, v0
; GFX11-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11W64-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX11W64-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W64-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11W64-NEXT: s_nop 0
; GFX11W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W64-NEXT: s_endpgm
;
; GFX11W32-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX11W32-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W32-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11W32-NEXT: s_nop 0
; GFX11W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W32-NEXT: s_endpgm
entry:
; GFX11W64-NEXT: v_mad_u64_u32 v[1:2], null, s6, v0, s[2:3]
; GFX11W64-NEXT: v_mov_b32_e32 v0, 0
; GFX11W64-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11W64-NEXT: s_nop 0
; GFX11W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W64-NEXT: s_endpgm
;
; GFX11W32-NEXT: v_mad_u64_u32 v[1:2], null, s2, v0, s[4:5]
; GFX11W32-NEXT: v_mov_b32_e32 v0, 0
; GFX11W32-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11W32-NEXT: s_nop 0
; GFX11W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W32-NEXT: s_endpgm
entry:
; GFX11W64-NEXT: v_add_nc_u32_e32 v1, s2, v1
; GFX11W64-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W64-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11W64-NEXT: s_nop 0
; GFX11W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W64-NEXT: s_endpgm
;
; GFX11W32-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_add_nc_u32 v1, s2, v1
; GFX11W32-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W32-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11W32-NEXT: s_nop 0
; GFX11W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W32-NEXT: s_endpgm
entry:
; GFX11W64-NEXT: v_add_nc_u32_e32 v1, s2, v1
; GFX11W64-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W64-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11W64-NEXT: s_nop 0
; GFX11W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W64-NEXT: s_endpgm
;
; GFX11W32-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_add_nc_u32 v1, s2, v1
; GFX11W32-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W32-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11W32-NEXT: s_nop 0
; GFX11W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W32-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11W64-NEXT: v_sub_nc_u32_e32 v0, s2, v0
; GFX11W64-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W64-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11W64-NEXT: s_nop 0
; GFX11W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W64-NEXT: s_endpgm
;
; GFX11W32-NEXT: v_sub_nc_u32_e32 v0, s2, v0
; GFX11W32-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W32-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11W32-NEXT: s_nop 0
; GFX11W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W32-NEXT: s_endpgm
entry:
; GFX11W64-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11W64-NEXT: v_sub_nc_u32_e32 v0, s2, v0
; GFX11W64-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11W64-NEXT: s_nop 0
; GFX11W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W64-NEXT: s_endpgm
;
; GFX11W32-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11W32-NEXT: v_sub_nc_u32_e32 v0, s2, v0
; GFX11W32-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11W32-NEXT: s_nop 0
; GFX11W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W32-NEXT: s_endpgm
entry:
; GFX11W64-NEXT: v_sub_nc_u32_e32 v1, s2, v1
; GFX11W64-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W64-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11W64-NEXT: s_nop 0
; GFX11W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W64-NEXT: s_endpgm
;
; GFX11W32-NEXT: v_sub_nc_u32_e32 v1, s2, v1
; GFX11W32-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W32-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11W32-NEXT: s_nop 0
; GFX11W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W32-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX1164-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164-NEXT: v_mad_u64_u32 v[1:2], null, s8, v0, s[0:1]
; GFX1164-NEXT: buffer_store_b32 v1, off, s[4:7], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_mad_u64_u32 v[1:2], null, s0, v0, s[2:3]
; GFX1132-NEXT: buffer_store_b32 v1, off, s[4:7], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: v_add_nc_u32_e32 v0, s2, v1
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: v_add_nc_u32_e32 v0, s2, v1
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164-NEXT: v_mov_b32_e32 v1, v3
; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[4:7], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_mov_b32_e32 v1, v3
; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[4:7], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: s_mov_b32 s5, s1
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX1164-NEXT: v_sub_nc_u32_e32 v0, s2, v0
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: v_sub_nc_u32_e32 v0, s2, v0
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164-NEXT: v_sub_nc_u32_e32 v0, s0, v0
; GFX1164-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_sub_nc_u32_e32 v0, s0, v0
; GFX1132-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: v_sub_nc_u32_e32 v0, s2, v1
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: v_sub_nc_u32_e32 v0, s2, v1
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: v_mov_b32_e32 v1, v5
; GFX1164-NEXT: v_sub_co_ci_u32_e32 v1, vcc, s1, v1, vcc
; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[4:7], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: v_mov_b32_e32 v1, v5
; GFX1132-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo
; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[4:7], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: s_mov_b32 s5, s1
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: buffer_store_b32 v1, off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: v_mad_u64_u32 v[1:2], null, s2, v0, s[4:5]
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: buffer_store_b32 v1, off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164-NEXT: v_mov_b32_e32 v1, v3
; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_mov_b32_e32 v1, v3
; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: v_sub_nc_u32_e32 v0, s2, v0
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: v_sub_nc_u32_e32 v0, s2, v0
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: v_mov_b32_e32 v1, v5
; GFX1164-NEXT: v_sub_co_ci_u32_e32 v1, vcc, s4, v1, vcc
; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: v_mov_b32_e32 v1, v5
; GFX1132-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, s4, v1, vcc_lo
; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: s_mov_b32 s2, -1
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: s_mov_b32 s2, -1
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: ; %bb.5: ; %if
; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX1164-NEXT: .LBB0_6: ; %UnifiedReturnBlock
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: ; %bb.5: ; %if
; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX1132-NEXT: .LBB0_6: ; %UnifiedReturnBlock
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX1164-NEXT: ; %bb.5: ; %if
; GFX1164-NEXT: buffer_store_b32 v4, off, s[0:3], 0
; GFX1164-NEXT: .LBB1_6: ; %UnifiedReturnBlock
+; GFX1164-NEXT: s_nop 0
; GFX1164-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1164-NEXT: s_endpgm
;
; GFX1132-NEXT: ; %bb.5: ; %if
; GFX1132-NEXT: buffer_store_b32 v4, off, s[0:3], 0
; GFX1132-NEXT: .LBB1_6: ; %UnifiedReturnBlock
+; GFX1132-NEXT: s_nop 0
; GFX1132-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1132-NEXT: s_endpgm
entry:
; GFX11W64-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX11W64-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W64-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11W64-NEXT: s_nop 0
; GFX11W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W64-NEXT: s_endpgm
;
; GFX11W32-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX11W32-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W32-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11W32-NEXT: s_nop 0
; GFX11W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W32-NEXT: s_endpgm
entry:
; GFX11W64-NEXT: v_mad_u64_u32 v[1:2], null, s6, v0, s[2:3]
; GFX11W64-NEXT: v_mov_b32_e32 v0, 0
; GFX11W64-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11W64-NEXT: s_nop 0
; GFX11W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W64-NEXT: s_endpgm
;
; GFX11W32-NEXT: v_mad_u64_u32 v[1:2], null, s2, v0, s[4:5]
; GFX11W32-NEXT: v_mov_b32_e32 v0, 0
; GFX11W32-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11W32-NEXT: s_nop 0
; GFX11W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W32-NEXT: s_endpgm
entry:
; GFX11W64-NEXT: v_add_nc_u32_e32 v1, s2, v1
; GFX11W64-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W64-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11W64-NEXT: s_nop 0
; GFX11W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W64-NEXT: s_endpgm
;
; GFX11W32-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_add_nc_u32 v1, s2, v1
; GFX11W32-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W32-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11W32-NEXT: s_nop 0
; GFX11W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W32-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11W64-NEXT: v_sub_nc_u32_e32 v0, s2, v0
; GFX11W64-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W64-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11W64-NEXT: s_nop 0
; GFX11W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W64-NEXT: s_endpgm
;
; GFX11W32-NEXT: v_sub_nc_u32_e32 v0, s2, v0
; GFX11W32-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W32-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11W32-NEXT: s_nop 0
; GFX11W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W32-NEXT: s_endpgm
entry:
; GFX11W64-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11W64-NEXT: v_sub_nc_u32_e32 v0, s2, v0
; GFX11W64-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11W64-NEXT: s_nop 0
; GFX11W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W64-NEXT: s_endpgm
;
; GFX11W32-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11W32-NEXT: v_sub_nc_u32_e32 v0, s2, v0
; GFX11W32-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11W32-NEXT: s_nop 0
; GFX11W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W32-NEXT: s_endpgm
entry:
; GFX11W64-NEXT: v_sub_nc_u32_e32 v1, s2, v1
; GFX11W64-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W64-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11W64-NEXT: s_nop 0
; GFX11W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W64-NEXT: s_endpgm
;
; GFX11W32-NEXT: v_sub_nc_u32_e32 v1, s2, v1
; GFX11W32-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W32-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11W32-NEXT: s_nop 0
; GFX11W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W32-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11W64-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX11W64-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W64-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11W64-NEXT: s_nop 0
; GFX11W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W64-NEXT: s_endpgm
;
; GFX11W32-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX11W32-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W32-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11W32-NEXT: s_nop 0
; GFX11W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W32-NEXT: s_endpgm
entry:
; GFX11W64-NEXT: v_mad_u64_u32 v[1:2], null, s6, v0, s[2:3]
; GFX11W64-NEXT: v_mov_b32_e32 v0, 0
; GFX11W64-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11W64-NEXT: s_nop 0
; GFX11W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W64-NEXT: s_endpgm
;
; GFX11W32-NEXT: v_mad_u64_u32 v[1:2], null, s2, v0, s[4:5]
; GFX11W32-NEXT: v_mov_b32_e32 v0, 0
; GFX11W32-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11W32-NEXT: s_nop 0
; GFX11W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W32-NEXT: s_endpgm
entry:
; GFX11W64-NEXT: v_add_nc_u32_e32 v1, s2, v1
; GFX11W64-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W64-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11W64-NEXT: s_nop 0
; GFX11W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W64-NEXT: s_endpgm
;
; GFX11W32-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_add_nc_u32 v1, s2, v1
; GFX11W32-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W32-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11W32-NEXT: s_nop 0
; GFX11W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W32-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11W64-NEXT: v_mov_b32_e32 v0, 0
; GFX11W64-NEXT: s_waitcnt vmcnt(0)
; GFX11W64-NEXT: global_store_b32 v0, v2, s[0:1]
+; GFX11W64-NEXT: s_nop 0
; GFX11W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W64-NEXT: s_endpgm
;
; GFX11W32-NEXT: v_mov_b32_e32 v0, 0
; GFX11W32-NEXT: s_waitcnt vmcnt(0)
; GFX11W32-NEXT: global_store_b32 v0, v2, s[0:1]
+; GFX11W32-NEXT: s_nop 0
; GFX11W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W32-NEXT: s_endpgm
entry:
; GFX11W64-NEXT: v_sub_nc_u32_e32 v0, s2, v0
; GFX11W64-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W64-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11W64-NEXT: s_nop 0
; GFX11W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W64-NEXT: s_endpgm
;
; GFX11W32-NEXT: v_sub_nc_u32_e32 v0, s2, v0
; GFX11W32-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W32-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11W32-NEXT: s_nop 0
; GFX11W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W32-NEXT: s_endpgm
entry:
; GFX11W64-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11W64-NEXT: v_sub_nc_u32_e32 v0, s2, v0
; GFX11W64-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11W64-NEXT: s_nop 0
; GFX11W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W64-NEXT: s_endpgm
;
; GFX11W32-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11W32-NEXT: v_sub_nc_u32_e32 v0, s2, v0
; GFX11W32-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11W32-NEXT: s_nop 0
; GFX11W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W32-NEXT: s_endpgm
entry:
; GFX11W64-NEXT: v_sub_nc_u32_e32 v1, s2, v1
; GFX11W64-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W64-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11W64-NEXT: s_nop 0
; GFX11W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W64-NEXT: s_endpgm
;
; GFX11W32-NEXT: v_sub_nc_u32_e32 v1, s2, v1
; GFX11W32-NEXT: s_waitcnt lgkmcnt(0)
; GFX11W32-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11W32-NEXT: s_nop 0
; GFX11W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W32-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11W64-NEXT: v_mov_b32_e32 v0, 0
; GFX11W64-NEXT: s_waitcnt vmcnt(0)
; GFX11W64-NEXT: global_store_b32 v0, v2, s[0:1]
+; GFX11W64-NEXT: s_nop 0
; GFX11W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W64-NEXT: s_endpgm
;
; GFX11W32-NEXT: v_mov_b32_e32 v0, 0
; GFX11W32-NEXT: s_waitcnt vmcnt(0)
; GFX11W32-NEXT: global_store_b32 v0, v2, s[0:1]
+; GFX11W32-NEXT: s_nop 0
; GFX11W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11W32-NEXT: s_endpgm
entry:
; GFX11-FLAT-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-FLAT-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-FLAT-NEXT: global_store_d16_hi_b16 v0, v1, s[0:1]
+; GFX11-FLAT-NEXT: s_nop 0
; GFX11-FLAT-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FLAT-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX11-GISEL-NEXT: global_store_b16 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%brev = call i16 @llvm.bitreverse.i16(i16 %val) #1
; GFX11-FLAT-NEXT: s_waitcnt vmcnt(0)
; GFX11-FLAT-NEXT: v_bfrev_b32_e32 v0, v0
; GFX11-FLAT-NEXT: global_store_d16_hi_b16 v1, v0, s[0:1]
+; GFX11-FLAT-NEXT: s_nop 0
; GFX11-FLAT-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FLAT-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v1, v1
; GFX11-GISEL-NEXT: global_store_d16_hi_b16 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%val = load i16, ptr addrspace(1) %valptr
; GFX11-FLAT-NEXT: v_mov_b32_e32 v0, s2
; GFX11-FLAT-NEXT: s_mov_b32 s2, -1
; GFX11-FLAT-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-FLAT-NEXT: s_nop 0
; GFX11-FLAT-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FLAT-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%brev = call i32 @llvm.bitreverse.i32(i32 %val) #1
; GFX11-FLAT-NEXT: s_waitcnt vmcnt(0)
; GFX11-FLAT-NEXT: v_bfrev_b32_e32 v0, v0
; GFX11-FLAT-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-FLAT-NEXT: s_nop 0
; GFX11-FLAT-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FLAT-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v0, v0
; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-FLAT-NEXT: s_mov_b32 s4, s0
; GFX11-FLAT-NEXT: s_mov_b32 s5, s1
; GFX11-FLAT-NEXT: buffer_store_b64 v[0:1], off, s[4:7], 0
+; GFX11-FLAT-NEXT: s_nop 0
; GFX11-FLAT-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FLAT-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%brev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %val) #1
; GFX11-FLAT-NEXT: v_bfrev_b32_e32 v1, v1
; GFX11-FLAT-NEXT: v_bfrev_b32_e32 v0, v0
; GFX11-FLAT-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX11-FLAT-NEXT: s_nop 0
; GFX11-FLAT-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FLAT-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v0, v0
; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v1, v1
; GFX11-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-FLAT-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GFX11-FLAT-NEXT: s_mov_b32 s2, -1
; GFX11-FLAT-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX11-FLAT-NEXT: s_nop 0
; GFX11-FLAT-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FLAT-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%brev = call i64 @llvm.bitreverse.i64(i64 %val) #1
; GFX11-FLAT-NEXT: v_bfrev_b32_e32 v2, v0
; GFX11-FLAT-NEXT: v_bfrev_b32_e32 v1, v1
; GFX11-FLAT-NEXT: buffer_store_b64 v[1:2], off, s[0:3], 0
+; GFX11-FLAT-NEXT: s_nop 0
; GFX11-FLAT-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FLAT-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v2, v0
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX11-GISEL-NEXT: global_store_b64 v0, v[1:2], s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-FLAT-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-FLAT-NEXT: s_mov_b32 s2, -1
; GFX11-FLAT-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
+; GFX11-FLAT-NEXT: s_nop 0
; GFX11-FLAT-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FLAT-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-GISEL-NEXT: global_store_b128 v4, v[0:3], s[8:9]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%brev = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %val) #1
; GFX11-FLAT-NEXT: v_bfrev_b32_e32 v2, v0
; GFX11-FLAT-NEXT: v_bfrev_b32_e32 v1, v1
; GFX11-FLAT-NEXT: buffer_store_b128 v[1:4], off, s[0:3], 0
+; GFX11-FLAT-NEXT: s_nop 0
; GFX11-FLAT-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FLAT-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v7, v2
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX11-GISEL-NEXT: global_store_b128 v0, v[4:7], s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_cbranch_vccnz .LBB0_2
; GFX11-NEXT: ; %bb.1: ; %one
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB0_2: ; %two
; GFX11-NEXT: buffer_store_b16 v1, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_mov_b32 s2, s6
; GFX11-NEXT: s_mov_b32 s3, s7
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_mov_b32 s2, s6
; GFX11-NEXT: s_mov_b32 s3, s7
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_perm_b32 v0, 0, s2, 0x10203
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load i32, ptr addrspace(1) %in, align 4
; GFX11-NEXT: v_perm_b32 v1, 0, s5, 0x10203
; GFX11-NEXT: v_perm_b32 v0, 0, s4, 0x10203
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <2 x i32>, ptr addrspace(1) %in, align 8
; GFX11-NEXT: v_perm_b32 v1, 0, s5, 0x10203
; GFX11-NEXT: v_perm_b32 v0, 0, s4, 0x10203
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <4 x i32>, ptr addrspace(1) %in, align 16
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[8:11], 0 offset:16
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <8 x i32>, ptr addrspace(1) %in, align 32
; GFX11-NEXT: v_perm_b32 v1, 0, s4, 0x10203
; GFX11-NEXT: v_perm_b32 v0, 0, s5, 0x10203
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load i64, ptr addrspace(1) %in, align 8
; GFX11-NEXT: v_perm_b32 v1, 0, s4, 0x10203
; GFX11-NEXT: v_perm_b32 v0, 0, s5, 0x10203
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <2 x i64>, ptr addrspace(1) %in, align 16
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[8:11], 0 offset:16
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <4 x i64>, ptr addrspace(1) %in, align 32
; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX11-NEXT: buffer_store_b32 v0, off, s[36:39], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_store_b32 v1, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = call float @coldcc(float 1.0)
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = call float @fastcc(float 1.0)
; GFX11: ; %bb.0:
; GFX11-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = add <2 x i16> %arg0, <i16 1, i16 1>
; GFX11: ; %bb.0:
; GFX11-NEXT: v_pk_sub_u16 v0, s0, -1 op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = add <2 x i16> %arg0, <i16 1, i16 1>
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_mov_b32_e32 v2, s2
; GFX11-NEXT: global_store_b96 v[0:1], v[0:2], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = add <3 x i32> %arg0, <i32 1, i32 2, i32 3>
; GFX11-NEXT: v_add_f32_e64 v1, s1, 2.0
; GFX11-NEXT: v_add_f32_e64 v0, s0, 1.0
; GFX11-NEXT: global_store_b96 v[0:1], v[0:2], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd <3 x float> %arg0, <float 1.0, float 2.0, float 4.0>
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v[0:1], v4, off
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = add <5 x i32> %arg0, <i32 1, i32 2, i32 3, i32 4, i32 5>
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v[0:1], v4, off
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd <5 x float> %arg0, <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>
; GFX11-NEXT: v_add_nc_u32_e32 v1, 2, v1
; GFX11-NEXT: v_add_nc_u32_e32 v0, 1, v0
; GFX11-NEXT: global_store_b96 v[0:1], v[0:2], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = add <3 x i32> %arg0, <i32 1, i32 2, i32 3>
; GFX11-NEXT: v_dual_add_f32 v2, 4.0, v2 :: v_dual_add_f32 v1, 2.0, v1
; GFX11-NEXT: v_add_f32_e32 v0, 1.0, v0
; GFX11-NEXT: global_store_b96 v[0:1], v[0:2], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd <3 x float> %arg0, <float 1.0, float 2.0, float 4.0>
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v[0:1], v4, off
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = add <5 x i32> %arg0, <i32 1, i32 2, i32 3, i32 4, i32 5>
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v[0:1], v4, off
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd <5 x float> %arg0, <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>
; GFX11: ; %bb.0:
; GFX11-NEXT: v_add_nc_u16 v0, v0, v0
; GFX11-NEXT: global_store_b16 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = add i16 %arg0, %arg0
; GFX11-NEXT: s_add_i32 s0, s0, s0
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: global_store_b16 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = add i16 %arg0, %arg0
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s2
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 0, 0x1234, s2
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: global_store_b8 v0, v2, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: global_store_b8 v2, v3, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: global_store_b8 v2, v3, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_sub_co_ci_u32_e64 v1, null, s3, 0, s2
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_sub_co_ci_u32_e64 v1, null, 0x1234, 0, s2
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: global_store_b8 v0, v2, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: global_store_b8 v2, v3, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: global_store_b8 v2, v3, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: .LBB16_3:
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB16_4:
; GFX11-NEXT: scratch_load_b32 v1, off, off offset:6
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_add_f32_e64 v1, v1, 1.0 clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: global_store_b32 v0, v2, s[0:1]
; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_add_f32_e64 v1, v1, 1.0 clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_max_f32_e64 v1, -v1, -v1 clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_max_f32_e32 v1, 0, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_add_f32_e64 v1, v1, 1.0 clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_add_f16_e64 v1, v1, 1.0 clamp
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_add_f16_e64 v1, v1, 1.0 clamp
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_add_f32_e64 v0, v0, 1.0 clamp
; GFX11-NEXT: v_add_f32_e64 v1, v1, 1.0 clamp
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 clamp
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_add_f32_e64 v2, v2, v1 clamp
; GFX11-NEXT: v_add_f32_e32 v1, v2, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_add_f16 v1, v1, 1.0 op_sel_hi:[1,0] clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_add_f16 v1, v1, 1.0 op_sel_hi:[1,0] clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 neg_lo:[1,1] neg_hi:[1,1] clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 neg_lo:[1,1] clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 neg_hi:[1,1] clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 op_sel:[1,1] op_sel_hi:[0,0] clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_max_f32_e64 v1, v1, v1 clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f32_e64 v1, v1, v1 clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f32_e64 v1, -v1, -v1 clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_maxmin_f32 v1, v1, 0x80000000, 1.0
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_maxmin_f32 v1, v1, 0x80000000, 1.0
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: global_store_b32 v0, v2, s[0:1]
; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f16_e64 v1, v1, v1 clamp
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f16_e64 v1, -v1, -v1 clamp
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| clamp
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] clamp
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] clamp
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f64 v[0:1], -|v[0:1]|, -|v[0:1]| clamp
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, 0x80000000, 1.0, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f32_e64 v1, v1, v1 clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f32_e64 v1, v1, v1 clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f32_e64 v1, v1, v1 clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f32_e64 v1, v1, v1 clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f32_e64 v1, v1, v1 clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f32_e64 v1, v1, v1 clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_dual_mov_b32 v1, 1.0 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_dual_mov_b32 v1, 0.5 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_dual_mov_b32 v1, 0x7fffff :: v_dual_lshlrev_b32 v0, 2, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_med3_f32 v1, v1, 0, 1.0
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_add_f32_e64 v1, v1, 0.5 clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_med3_f32 v1, v1, 0, 1.0
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_med3_f32 v1, v1, 0, 1.0
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f32_e64 v1, v1, v1 clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f32_e64 v1, v1, v1 clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v1, 0, 1.0
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v1, 1.0, 0
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, 0, v1, 1.0
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, 1.0, v1, 0
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_dual_mov_b32 v1, 0x7fc00000 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_dual_mov_b32 v1, 0x7f800001 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_pk_max_f16 v1, v1, 2.0
; GFX11-NEXT: v_pk_min_f16 v1, v1, 1.0 op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_pk_max_f16 v1, v1, 0
; GFX11-NEXT: v_pk_min_f16 v1, v1, 1.0 op_sel:[0,1] op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 neg_lo:[1,1] neg_hi:[1,1] clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 neg_lo:[1,1] neg_hi:[1,1] clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 neg_lo:[1,1] clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 neg_hi:[1,1] clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 op_sel:[1,1] op_sel_hi:[0,0] clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1 clamp
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_max_f32_e64 v0, v0, v1 clamp
; GFX11-NEXT: global_store_b32 v2, v0, s[0:1] offset:12
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
{
; GFX11-NEXT: v_dual_add_f32 v2, v2, v6 :: v_dual_add_f32 v5, v5, v9
; GFX11-NEXT: v_dual_add_f32 v4, v4, v8 :: v_dual_add_f32 v3, v3, v7
; GFX11-NEXT: image_store v[2:5], v[0:1], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_dual_add_f32 v5, v5, v9 :: v_dual_add_f32 v4, v4, v8
; GFX11-NEXT: v_dual_add_f32 v3, v3, v7 :: v_dual_add_f32 v2, v2, v6
; GFX11-NEXT: image_store v[2:5], v[0:1], s[16:23] dmask:0xf dim:SQ_RSRC_IMG_2D unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_dual_add_f32 v5, v5, v9 :: v_dual_add_f32 v4, v4, v8
; GFX11-NEXT: v_dual_add_f32 v3, v3, v7 :: v_dual_add_f32 v2, v2, v6
; GFX11-NEXT: image_store v[2:5], v[0:1], s[12:19] dmask:0xf dim:SQ_RSRC_IMG_2D unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_min_u32 s2, s2, 32
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_min_u32_e32 v0, 32, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_min_u32_e32 v1, 32, v1
; GFX11-NEXT: v_min_u32_e32 v0, 32, v0
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_min_u32_e32 v1, 32, v1
; GFX11-NEXT: v_min_u32_e32 v0, 32, v0
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_add_nc_u16 v1, v1, -8
; GFX11-NEXT: global_store_b8 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load i8, ptr addrspace(1) %valptr
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_min3_u32 v0, v0, s2, 64
; GFX11-NEXT: global_store_b64 v1, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_min3_u32 v0, v0, s2, 64
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
; GFX11-NEXT: v_min3_u32 v0, v0, v1, 64
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_add_nc_u32_e64 v1, v1, 32 clamp
; GFX11-NEXT: v_min3_u32 v1, v1, v2, 64
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_clz_i32_u32_e32 v0, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_clz_i32_u32_e32 v0, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 32, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc_lo
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 32, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc_lo
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_clz_i32_u32_e32 v0, v0
; GFX11-NEXT: global_store_b8 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0xffff, v2, vcc_lo
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load i16, ptr addrspace(1) %valptr
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x7f, v0
; GFX11-NEXT: global_store_b8 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_cvt_f32_ubyte1_e32 v1, v0
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_cvt_f32_ubyte1_e32 v1, v0
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX11-NEXT: global_store_b96 v3, v[0:2], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_cvt_f32_ubyte1_e32 v1, v0
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX11-NEXT: global_store_b128 v5, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v6, v[0:3], s[0:1]
; GFX11-NEXT: global_store_b32 v6, v4, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
; GFX11-NEXT: global_store_b32 v4, v5, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid.x = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b96 v8, v[4:6], s[0:1] offset:16
; GFX11-NEXT: global_store_b128 v8, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v10, v[4:7], s[0:1] offset:16
; GFX11-NEXT: global_store_b128 v10, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_f32_ubyte1_e32 v0, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX11-NEXT: global_store_b128 v5, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_f32_ubyte1_e32 v0, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_f32_ubyte2_e32 v0, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_f32_ubyte3_e32 v0, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v1, v0
; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
; GFX11-NEXT: global_store_b32 v2, v0, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
bb:
; GFX11-NEXT: global_store_b8 v[0:1], v3, off
; GFX11-NEXT: global_store_b8 v[0:1], v0, off
; GFX11-NEXT: global_store_b8 v[0:1], v1, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tmp = insertelement <2 x i16> undef, i16 0, i32 0
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tmp = insertelement <2 x i16> undef, i16 %a, i32 0
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tmp = insertelement <2 x half> undef, half %a, i32 0
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%shift = lshr i32 %b, 16
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%shift_a = lshr i32 %a, 16
; GFX11-NEXT: ds_load_u16_d16 v0, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_div_fmas_f32 v4, s0, s0, s0
; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_div_fmas_f32 v4, s0, s0, s0
; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%x.i = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: v_div_fmas_f32 v5, s0, s0, s0
; GFX11-NEXT: global_store_b32 v[0:1], v5, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 10
; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
bb:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_d16_hi_b16 v0, v1, s[0:1]
; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0 offset:20
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x half>, ptr addrspace(4) %vec.ptr
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x half>, ptr addrspace(4) %vec.ptr
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshrrev_b32_e64 v1, v1, s2
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
; GFX11-NEXT: buffer_store_b16 v1, off, s[4:7], 0 offset:2
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%p0 = extractelement <3 x half> %foo, i32 0
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%p0 = extractelement <3 x half> %foo, i32 %idx
; GFX11-NEXT: global_load_b32 v1, v1, s[2:3] offset:4
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshrrev_b64 v[1:2], v3, v[1:2]
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%load = load <16 x half>, ptr addrspace(4) %ptr
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%load = load <16 x half>, ptr addrspace(4) %ptr
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo
; GFX11-NEXT: global_store_b16 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo
; GFX11-NEXT: global_store_b16 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%bc= bitcast i16 %in to half
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%fabs = call half @llvm.fabs.f16(half %in)
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %in)
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_mul_f16_e64 v1, |s2|, s3
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%fabs = call half @llvm.fabs.f16(half %in0)
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%bc = bitcast i32 %in to <2 x half>
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pk_mul_f16 v0, v1, v0
; GFX11-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pk_mul_f16 v0, v0, s0
; GFX11-NEXT: global_store_b32 v1, v0, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b16 v[0:1], v1, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_d16_hi_b16 v[0:1], v0, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_add_f16_e32 v0, v0, v1
; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_add_f16_e32 v0, 1.0, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_add_f16_e32 v0, 2.0, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_add_f16 v0, v1, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_add_f16 v0, 0x40003c00, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_add_f16 v0, 0x3c004000, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x20001
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep.r = getelementptr i16, ptr addrspace(1) %r, i64 1
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x20001
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep.r = getelementptr i16, ptr addrspace(1) %r, i64 1
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x20001
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep.r = getelementptr i16, ptr addrspace(1) %r, i64 1
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b16 v0, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call half @llvm.canonicalize.f16(half undef)
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
; GFX11-NEXT: global_store_b16 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load half, ptr addrspace(1) %out
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_max_f16_e64 v1, s2, s2
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = bitcast i16 %val.arg to half
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f16_e64 v1, |v1|, |v1|
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load half, ptr addrspace(1) %out
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load half, ptr addrspace(1) %out
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f16_e64 v1, -v1, -v1
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load half, ptr addrspace(1) %out
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f16_e64 v1, -v1, -v1
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load half, ptr addrspace(1) %out
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load half, ptr addrspace(1) %out
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b16 v0, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call half @llvm.canonicalize.f16(half 0.0)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0xffff8000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call half @llvm.canonicalize.f16(half -0.0)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3c00
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call half @llvm.canonicalize.f16(half 1.0)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0xffffbc00
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call half @llvm.canonicalize.f16(half -1.0)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x4c00
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call half @llvm.canonicalize.f16(half 16.0)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3ff
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3ff
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0xffff83ff
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0xffff83ff
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7c00
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call half @llvm.canonicalize.f16(half 0xH7C00)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7e00
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 -1 to half))
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7e00
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 -2 to half))
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7e00
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call half @llvm.canonicalize.f16(half 0xH7C01)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7e00
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call half @llvm.canonicalize.f16(half 0xH7DFF)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7e00
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call half @llvm.canonicalize.f16(half 0xHFDFF)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7e00
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call half @llvm.canonicalize.f16(half 0xHFC01)
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 neg_lo:[1,1] neg_hi:[1,1]
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_max_f16 v0, v0, v0 neg_lo:[1,1] neg_hi:[1,1]
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_pk_max_f16 v1, s2, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = bitcast i32 %val.arg to <2 x half>
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> zeroinitializer)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x80008000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half -0.0, half -0.0>)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3c003c00
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 1.0, half 1.0>)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0xbc00bc00
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half -1.0, half -1.0>)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x4c004c00
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 16.0, half 16.0>)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3ff03ff
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH03FF, half 0xH03FF>)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3ff03ff
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH03FF, half 0xH03FF>)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x83ff83ff
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH83FF, half 0xH83FF>)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x83ff83ff
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH83FF, half 0xH83FF>)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7c007c00
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH7C00, half 0xH7C00>)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7e007e00
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> bitcast (i32 -1 to <2 x half>))
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7e007e00
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half bitcast (i16 -2 to half), half bitcast (i16 -2 to half)>)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7e007e00
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH7C01, half 0xH7C01>)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7e007e00
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH7DFF, half 0xH7DFF>)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7e007e00
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xHFDFF, half 0xHFDFF>)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7e007e00
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xHFC01, half 0xHFC01>)
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> undef)
; GFX11-NEXT: v_mov_b32_e32 v1, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> undef)
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f32_e32 v1, v1, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load float, ptr addrspace(1) %out
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_max_f32_e64 v1, s2, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call float @llvm.canonicalize.f32(float %val)
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f32_e64 v1, |v1|, |v1|
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load float, ptr addrspace(1) %out
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load float, ptr addrspace(1) %out
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f32_e64 v1, -v1, -v1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load float, ptr addrspace(1) %out
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call float @llvm.canonicalize.f32(float undef)
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call float @llvm.canonicalize.f32(float 0.0)
; GFX11-NEXT: v_bfrev_b32_e32 v1, 1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call float @llvm.canonicalize.f32(float -0.0)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 1.0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call float @llvm.canonicalize.f32(float 1.0)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, -1.0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call float @llvm.canonicalize.f32(float -1.0)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x41800000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call float @llvm.canonicalize.f32(float 16.0)
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
; GFX11-NEXT: v_max_f32_e64 v1, 0x7fffff, 0x7fffff
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
; GFX11-NEXT: v_max_f32_e64 v1, 0x7fffff, 0x7fffff
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
; GFX11-NEXT: v_max_f32_e64 v1, 0x7fffff, 0x7fffff
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fffff
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
; GFX11-NEXT: v_bfrev_b32_e32 v1, 1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x807fffff
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call float @llvm.canonicalize.f32(float 0x7FF8000000000000)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 -1 to float))
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 -2 to float))
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2139095041 to float))
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2143289343 to float))
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 4286578689 to float))
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 4290772991 to float))
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load double, ptr addrspace(1) %out
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3]
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call double @llvm.canonicalize.f64(double %val)
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f64 v[0:1], |v[0:1]|, |v[0:1]|
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load double, ptr addrspace(1) %out
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f64 v[0:1], -|v[0:1]|, -|v[0:1]|
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load double, ptr addrspace(1) %out
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load double, ptr addrspace(1) %out
; GFX11-NEXT: v_mov_b32_e32 v1, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call double @llvm.canonicalize.f64(double 0.0)
; GFX11-NEXT: v_bfrev_b32_e32 v1, 1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call double @llvm.canonicalize.f64(double -0.0)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3ff00000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call double @llvm.canonicalize.f64(double 1.0)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0xbff00000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call double @llvm.canonicalize.f64(double -1.0)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x40300000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call double @llvm.canonicalize.f64(double 16.0)
; GFX11-NEXT: v_mov_b32_e32 v1, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
; GFX11-NEXT: v_mov_b32_e32 v0, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
; GFX11-NEXT: v_bfrev_b32_e32 v1, 1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
; GFX11-NEXT: v_mov_b32_e32 v0, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call double @llvm.canonicalize.f64(double 0x7FF8000000000000)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -1 to double))
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -2 to double))
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9218868437227405313 to double))
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9223372036854775807 to double))
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18442240474082181121 to double))
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18446744073709551615 to double))
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f32_e32 v1, v1, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f16_e32 v1, v1, v1
; GFX11-NEXT: global_store_b16 v0, v1, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f32_e32 v1, v1, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_max_f16_e32 v1, v1, v1
; GFX11-NEXT: global_store_b16 v0, v1, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_cmp_lt_f16_e32 vcc_lo, v0, v1
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, s0
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_eq_f16_e32 vcc_lo, v0, v1
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_le_f16_e32 vcc_lo, v0, v1
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_gt_f16_e32 vcc_lo, v0, v1
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_lg_f16_e32 vcc_lo, v0, v1
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_ge_f16_e32 vcc_lo, v0, v1
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v1
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_nge_f16_e32 vcc_lo, v0, v1
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_nlg_f16_e32 vcc_lo, v0, v1
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_nle_f16_e32 vcc_lo, v0, v1
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_neq_f16_e32 vcc_lo, v0, v1
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_lt_f16_e32 vcc_lo, v3, v2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_eq_f16_e32 vcc_lo, v3, v2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_le_f16_e32 vcc_lo, v3, v2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_gt_f16_e32 vcc_lo, v3, v2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_lg_f16_e32 vcc_lo, v3, v2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_ge_f16_e32 vcc_lo, v3, v2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_u_f16_e32 vcc_lo, v3, v2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_nge_f16_e32 vcc_lo, v3, v2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_nlg_f16_e32 vcc_lo, v3, v2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_nle_f16_e32 vcc_lo, v3, v2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_neq_f16_e32 vcc_lo, v3, v2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_mov_b32_e32 v0, s3
; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, s2, v0
; GFX11-NEXT: global_store_b16 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%out = call half @llvm.copysign.f16(half %mag, half %sign)
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call half @llvm.copysign.f16(half %mag, half 0.0)
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call half @llvm.copysign.f16(half %mag, half 1.0)
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call half @llvm.copysign.f16(half %mag, half 10.0)
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call half @llvm.copysign.f16(half %mag, half -1.0)
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call half @llvm.copysign.f16(half %mag, half -10.0)
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_and_b32_e64 v1, 0xffff8000, s2
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call half @llvm.copysign.f16(half 0.0, half %sign)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_or_b32_e32 v0, 0x3c00, v0
; GFX11-NEXT: global_store_b16 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call half @llvm.copysign.f16(half 1.0, half %sign)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_or_b32_e32 v0, 0x4900, v0
; GFX11-NEXT: global_store_b16 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call half @llvm.copysign.f16(half 10.0, half %sign)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_or_b32_e32 v0, 0x3c00, v0
; GFX11-NEXT: global_store_b16 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call half @llvm.copysign.f16(half -1.0, half %sign)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_or_b32_e32 v0, 0x4900, v0
; GFX11-NEXT: global_store_b16 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call half @llvm.copysign.f16(half -10.0, half %sign)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_bfi_b32 v0, 0x7fffffff, v1, v0
; GFX11-NEXT: global_store_b32 v2, v0, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: v_bfi_b32 v3, 0x7fffffff, v3, v1
; GFX11-NEXT: global_store_b64 v0, v[2:3], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_bfi_b32 v0, 0x7fffffff, v0, v1
; GFX11-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v2
; GFX11-NEXT: global_store_b64 v3, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
; GFX11-NEXT: global_store_b16 v2, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
; GFX11-NEXT: global_store_b16 v2, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, v1, v0
; GFX11-NEXT: global_store_b16 v2, v0, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_or_b32_e32 v0, s1, v0
; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, v0, s0
; GFX11-NEXT: global_store_b16 v1, v0, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%mag.trunc = fptrunc double %mag to half
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX11-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%out = call <2 x half> @llvm.copysign.v2f16(<2 x half> %arg_mag, <2 x half> %arg_sign)
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b16 v3, v2, s[0:1] offset:4
; GFX11-NEXT: global_store_b32 v3, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%out = call <3 x half> @llvm.copysign.v3f16(<3 x half> %arg_mag, <3 x half> %arg_sign)
; GFX11-NEXT: v_lshl_or_b32 v1, v2, 16, v0
; GFX11-NEXT: v_lshl_or_b32 v0, v3, 16, v4
; GFX11-NEXT: global_store_b64 v5, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%out = call <4 x half> @llvm.copysign.v4f16(<4 x half> %arg_mag, <4 x half> %arg_sign)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_bfi_b32 v0, 0x7fffffff, s2, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call float @llvm.copysign.f32(float %mag, float %sign)
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call float @llvm.copysign.f32(float %mag, float 0.0)
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call float @llvm.copysign.f32(float %mag, float 1.0)
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call float @llvm.copysign.f32(float %mag, float 10.0)
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call float @llvm.copysign.f32(float %mag, float -1.0)
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call float @llvm.copysign.f32(float %mag, float -10.0)
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call float @llvm.copysign.f32(float 0.0, float %sign)
; GFX11-NEXT: s_or_b32 s2, s2, 1.0
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call float @llvm.copysign.f32(float 1.0, float %sign)
; GFX11-NEXT: s_or_b32 s2, s2, 0x41200000
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call float @llvm.copysign.f32(float 10.0, float %sign)
; GFX11-NEXT: s_or_b32 s2, s2, 1.0
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call float @llvm.copysign.f32(float -1.0, float %sign)
; GFX11-NEXT: s_or_b32 s2, s2, 0x41200000
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call float @llvm.copysign.f32(float -10.0, float %sign)
; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, s5, v0
; GFX11-NEXT: v_bfi_b32 v0, 0x7fffffff, s4, v2
; GFX11-NEXT: global_store_b64 v3, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call <2 x float> @llvm.copysign.v2f32(<2 x float> %mag, <2 x float> %sign)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-NEXT: v_bfi_b32 v0, 0x7fffffff, s4, v3
; GFX11-NEXT: global_store_b96 v4, v[0:2], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call <3 x float> @llvm.copysign.v3f32(<3 x float> %mag, <3 x float> %sign)
; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, s5, v4
; GFX11-NEXT: v_bfi_b32 v0, 0x7fffffff, s4, v5
; GFX11-NEXT: global_store_b128 v6, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call <4 x float> @llvm.copysign.v4f32(<4 x float> %mag, <4 x float> %sign)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_bfi_b32 v0, 0x7fffffff, s2, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%sign.trunc = fptrunc double %sign to float
; GFX11-NEXT: s_or_b32 s2, s2, 1.0
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%sign.trunc = fptrunc double %sign to float
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_bfi_b32 v0, 0x7fffffff, s2, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%sign.ext = fpext half %sign to float
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%sign.ext = fpext half %sign to float
; GFX11-NEXT: v_mov_b32_e32 v0, s3
; GFX11-NEXT: v_bfi_b32 v0, 0x7fffffff, s2, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%sign.ext = fpext bfloat %sign to float
; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, s5, v0
; GFX11-NEXT: v_mov_b32_e32 v0, s4
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call double @llvm.copysign.f64(double %mag, double %sign)
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call double @llvm.copysign.f64(double %mag, double 0.0)
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call double @llvm.copysign.f64(double %mag, double 1.0)
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call double @llvm.copysign.f64(double %mag, double 10.0)
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call double @llvm.copysign.f64(double %mag, double -1.0)
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call double @llvm.copysign.f64(double %mag, double -10.0)
; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, s3, v0
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%sign.ext = fpext float %sign to double
; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, s3, v0
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%sign.ext = fpext half %sign to double
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call double @llvm.copysign.f64(double 0.0, double %sign)
; GFX11-NEXT: s_or_b32 s2, s2, 0x3ff00000
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call double @llvm.copysign.f64(double 1.0, double %sign)
; GFX11-NEXT: s_or_b32 s2, s2, 0x40240000
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call double @llvm.copysign.f64(double 10.0, double %sign)
; GFX11-NEXT: s_or_b32 s2, s2, 0x3ff00000
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call double @llvm.copysign.f64(double -1.0, double %sign)
; GFX11-NEXT: s_or_b32 s2, s2, 0x40240000
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call double @llvm.copysign.f64(double -10.0, double %sign)
; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, s5, v2
; GFX11-NEXT: v_mov_b32_e32 v2, s6
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call <2 x double> @llvm.copysign.v2f64(<2 x double> %mag, <2 x double> %sign)
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b64 v6, v[4:5], s[0:1] offset:16
; GFX11-NEXT: global_store_b128 v6, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call <3 x double> @llvm.copysign.v3f64(<3 x double> %mag, <3 x double> %sign)
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:16
; GFX11-NEXT: global_store_b128 v8, v[4:7], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call <4 x double> @llvm.copysign.v4f64(<4 x double> %mag, <4 x double> %sign)
; GFX11-NEXT: v_fma_mixlo_f16 v3, v1, v3, 0 op_sel_hi:[1,0,0]
; GFX11-NEXT: v_div_fixup_f16 v1, v3, v2, v1
; GFX11-NEXT: global_store_b16 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_rcp_f16_e32 v1, v1
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_rcp_f16_e64 v1, |v1|
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_rcp_f16_e32 v1, v1
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_rcp_f16_e32 v1, v1
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_rcp_f16_e64 v1, -v1
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_rsq_f16_e32 v1, v1
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_rcp_f16_e64 v1, -v1
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_mul_f16_e32 v1, v1, v2
; GFX11-NEXT: global_store_b16 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_mul_f16_e32 v1, v1, v2
; GFX11-NEXT: global_store_b16 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_mul_f16_e32 v0, 0.5, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b16 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%x = load half, ptr addrspace(1) undef
; GFX11-NEXT: v_mul_f16_e32 v0, 0x2e66, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b16 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%x = load half, ptr addrspace(1) undef
; GFX11-NEXT: v_mul_f16_e32 v0, 0xae66, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b16 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%x = load half, ptr addrspace(1) undef
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: v_div_fixup_f32 v0, v0, s3, s2
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: v_div_fixup_f32 v0, v0, s3, s2
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mul_f32_e32 v1, s2, v1
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX11-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: v_div_fixup_f32 v0, v0, s3, s2
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mul_f32 v0, s2, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mul_f32 v0, s2, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mul_f32 v0, s2, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: v_div_fixup_f32 v0, v0, s3, s2
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mul_f32 v0, s2, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: v_div_fixup_f32 v1, v1, s7, s5
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v4, 0
; GFX11-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
; GFX11-NEXT: global_store_b64 v4, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_dual_mul_f32 v0, s4, v0 :: v_dual_mul_f32 v1, s5, v1
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_dual_mul_f32 v0, s4, v0 :: v_dual_mul_f32 v1, s5, v1
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v4, 0
; GFX11-NEXT: v_div_fixup_f32 v3, v3, s7, s3
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[8:9]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_dual_mul_f32 v2, s2, v2 :: v_dual_mul_f32 v3, s3, v3
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[8:9]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_dual_mul_f32 v2, s2, v2 :: v_dual_mul_f32 v3, s3, v3
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[8:9]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: v_div_fixup_f32 v0, v0, s2, 1.0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: v_div_fixup_f32 v0, v0, s2, 1.0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX11-NEXT: global_store_b64 v6, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b64 v8, v[0:1], s[0:1] offset:8 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX11-NEXT: global_store_b64 v6, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
; GFX11-NEXT: global_store_b64 v6, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b64 v8, v[0:1], s[0:1] offset:8 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], v[4:5]
; GFX11-NEXT: global_store_b64 v6, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b64 v8, v[0:1], s[0:1] offset:8 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], -v[4:5]
; GFX11-NEXT: global_store_b64 v6, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b64 v8, v[0:1], s[0:1] offset:8 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b64 v8, v[0:1], s[0:1] offset:8 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
; GFX11-NOFMA-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[6:7]
; GFX11-NOFMA-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5]
; GFX11-NOFMA-NEXT: global_store_b64 v10, v[0:1], s[0:1]
+; GFX11-NOFMA-NEXT: s_nop 0
; GFX11-NOFMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NOFMA-NEXT: s_endpgm
;
; GFX11-FMA-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-FMA-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX11-FMA-NEXT: global_store_b64 v10, v[0:1], s[0:1]
+; GFX11-FMA-NEXT: s_nop 0
; GFX11-FMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FMA-NEXT: s_endpgm
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
; GFX11-NOFMA-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], v[6:7]
; GFX11-NOFMA-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
; GFX11-NOFMA-NEXT: global_store_b64 v10, v[0:1], s[0:1]
+; GFX11-NOFMA-NEXT: s_nop 0
; GFX11-NOFMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NOFMA-NEXT: s_endpgm
;
; GFX11-FMA-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-FMA-NEXT: v_fma_f64 v[0:1], -v[2:3], v[4:5], v[0:1]
; GFX11-FMA-NEXT: global_store_b64 v10, v[0:1], s[0:1]
+; GFX11-FMA-NEXT: s_nop 0
; GFX11-FMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FMA-NEXT: s_endpgm
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
; GFX11-NOFMA-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NOFMA-NEXT: v_mul_f32_e32 v1, v1, v2
; GFX11-NOFMA-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NOFMA-NEXT: s_nop 0
; GFX11-NOFMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NOFMA-NEXT: s_endpgm
;
; GFX11-FMA-NEXT: s_waitcnt vmcnt(0)
; GFX11-FMA-NEXT: v_fmac_f32_e32 v2, v1, v2
; GFX11-FMA-NEXT: global_store_b32 v0, v2, s[4:5]
+; GFX11-FMA-NEXT: s_nop 0
; GFX11-FMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FMA-NEXT: s_endpgm
ptr addrspace(1) %in1,
; GFX11-NOFMA-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NOFMA-NEXT: v_mul_f32_e32 v1, v2, v1
; GFX11-NOFMA-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NOFMA-NEXT: s_nop 0
; GFX11-NOFMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NOFMA-NEXT: s_endpgm
;
; GFX11-FMA-NEXT: s_waitcnt vmcnt(0)
; GFX11-FMA-NEXT: v_fmac_f32_e32 v2, v1, v2
; GFX11-FMA-NEXT: global_store_b32 v0, v2, s[4:5]
+; GFX11-FMA-NEXT: s_nop 0
; GFX11-FMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FMA-NEXT: s_endpgm
ptr addrspace(1) %in1,
; GFX11-NOFMA-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NOFMA-NEXT: v_mul_f32_e32 v1, v1, v2
; GFX11-NOFMA-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NOFMA-NEXT: s_nop 0
; GFX11-NOFMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NOFMA-NEXT: s_endpgm
;
; GFX11-FMA-NEXT: s_waitcnt vmcnt(0)
; GFX11-FMA-NEXT: v_fma_f32 v1, v1, v2, -v2
; GFX11-FMA-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-FMA-NEXT: s_nop 0
; GFX11-FMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FMA-NEXT: s_endpgm
ptr addrspace(1) %in1,
; GFX11-NOFMA-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NOFMA-NEXT: v_mul_f32_e32 v1, v2, v1
; GFX11-NOFMA-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NOFMA-NEXT: s_nop 0
; GFX11-NOFMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NOFMA-NEXT: s_endpgm
;
; GFX11-FMA-NEXT: s_waitcnt vmcnt(0)
; GFX11-FMA-NEXT: v_fma_f32 v1, v1, v2, -v2
; GFX11-FMA-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-FMA-NEXT: s_nop 0
; GFX11-FMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FMA-NEXT: s_endpgm
ptr addrspace(1) %in1,
; GFX11-NOFMA-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NOFMA-NEXT: v_mul_f32_e32 v1, v1, v2
; GFX11-NOFMA-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NOFMA-NEXT: s_nop 0
; GFX11-NOFMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NOFMA-NEXT: s_endpgm
;
; GFX11-FMA-NEXT: s_waitcnt vmcnt(0)
; GFX11-FMA-NEXT: v_fma_f32 v1, -v1, v2, v2
; GFX11-FMA-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-FMA-NEXT: s_nop 0
; GFX11-FMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FMA-NEXT: s_endpgm
ptr addrspace(1) %in1,
; GFX11-NOFMA-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NOFMA-NEXT: v_mul_f32_e32 v1, v2, v1
; GFX11-NOFMA-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NOFMA-NEXT: s_nop 0
; GFX11-NOFMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NOFMA-NEXT: s_endpgm
;
; GFX11-FMA-NEXT: s_waitcnt vmcnt(0)
; GFX11-FMA-NEXT: v_fma_f32 v1, -v1, v2, v2
; GFX11-FMA-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-FMA-NEXT: s_nop 0
; GFX11-FMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FMA-NEXT: s_endpgm
ptr addrspace(1) %in1,
; GFX11-NOFMA-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NOFMA-NEXT: v_mul_f32_e32 v1, v1, v2
; GFX11-NOFMA-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NOFMA-NEXT: s_nop 0
; GFX11-NOFMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NOFMA-NEXT: s_endpgm
;
; GFX11-FMA-NEXT: s_waitcnt vmcnt(0)
; GFX11-FMA-NEXT: v_fma_f32 v1, -v1, v2, -v2
; GFX11-FMA-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-FMA-NEXT: s_nop 0
; GFX11-FMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FMA-NEXT: s_endpgm
ptr addrspace(1) %in1,
; GFX11-NOFMA-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NOFMA-NEXT: v_mul_f32_e32 v1, v2, v1
; GFX11-NOFMA-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NOFMA-NEXT: s_nop 0
; GFX11-NOFMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NOFMA-NEXT: s_endpgm
;
; GFX11-FMA-NEXT: s_waitcnt vmcnt(0)
; GFX11-FMA-NEXT: v_fma_f32 v1, -v1, v2, -v2
; GFX11-FMA-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-FMA-NEXT: s_nop 0
; GFX11-FMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FMA-NEXT: s_endpgm
ptr addrspace(1) %in1,
; GFX11-NOFMA-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NOFMA-NEXT: v_mul_f32_e32 v1, v1, v2
; GFX11-NOFMA-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NOFMA-NEXT: s_nop 0
; GFX11-NOFMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NOFMA-NEXT: s_endpgm
;
; GFX11-FMA-NEXT: s_waitcnt vmcnt(0)
; GFX11-FMA-NEXT: v_fma_f32 v1, v1, v2, -v2
; GFX11-FMA-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-FMA-NEXT: s_nop 0
; GFX11-FMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FMA-NEXT: s_endpgm
ptr addrspace(1) %in1,
; GFX11-NOFMA-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NOFMA-NEXT: v_mul_f32_e32 v1, v2, v1
; GFX11-NOFMA-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NOFMA-NEXT: s_nop 0
; GFX11-NOFMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NOFMA-NEXT: s_endpgm
;
; GFX11-FMA-NEXT: s_waitcnt vmcnt(0)
; GFX11-FMA-NEXT: v_fma_f32 v1, v1, v2, -v2
; GFX11-FMA-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-FMA-NEXT: s_nop 0
; GFX11-FMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FMA-NEXT: s_endpgm
ptr addrspace(1) %in1,
; GFX11-NOFMA-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NOFMA-NEXT: v_mul_f32_e32 v1, v1, v2
; GFX11-NOFMA-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NOFMA-NEXT: s_nop 0
; GFX11-NOFMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NOFMA-NEXT: s_endpgm
;
; GFX11-FMA-NEXT: s_waitcnt vmcnt(0)
; GFX11-FMA-NEXT: v_fmac_f32_e32 v2, v1, v2
; GFX11-FMA-NEXT: global_store_b32 v0, v2, s[4:5]
+; GFX11-FMA-NEXT: s_nop 0
; GFX11-FMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FMA-NEXT: s_endpgm
ptr addrspace(1) %in1,
; GFX11-NOFMA-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NOFMA-NEXT: v_mul_f32_e32 v1, v2, v1
; GFX11-NOFMA-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NOFMA-NEXT: s_nop 0
; GFX11-NOFMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NOFMA-NEXT: s_endpgm
;
; GFX11-FMA-NEXT: s_waitcnt vmcnt(0)
; GFX11-FMA-NEXT: v_fmac_f32_e32 v2, v1, v2
; GFX11-FMA-NEXT: global_store_b32 v0, v2, s[4:5]
+; GFX11-FMA-NEXT: s_nop 0
; GFX11-FMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FMA-NEXT: s_endpgm
ptr addrspace(1) %in1,
; GFX11-NOFMA-NEXT: s_waitcnt vmcnt(0)
; GFX11-NOFMA-NEXT: v_fmac_f32_e32 v2, v3, v1
; GFX11-NOFMA-NEXT: global_store_b32 v0, v2, s[0:1]
+; GFX11-NOFMA-NEXT: s_nop 0
; GFX11-NOFMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NOFMA-NEXT: s_endpgm
;
; GFX11-FMA-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-FMA-NEXT: v_fmac_f32_e32 v1, v3, v2
; GFX11-FMA-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-FMA-NEXT: s_nop 0
; GFX11-FMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FMA-NEXT: s_endpgm
ptr addrspace(1) %in1,
; GFX11-NOFMA-NEXT: s_waitcnt vmcnt(0)
; GFX11-NOFMA-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
; GFX11-NOFMA-NEXT: global_store_b64 v8, v[0:1], s[0:1]
+; GFX11-NOFMA-NEXT: s_nop 0
; GFX11-NOFMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NOFMA-NEXT: s_endpgm
;
; GFX11-FMA-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-FMA-NEXT: v_fma_f64 v[0:1], v[4:5], v[2:3], v[0:1]
; GFX11-FMA-NEXT: global_store_b64 v6, v[0:1], s[0:1]
+; GFX11-FMA-NEXT: s_nop 0
; GFX11-FMA-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FMA-NEXT: s_endpgm
ptr addrspace(1) %in1,
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_fmac_f32_e32 v2, 2.0, v1
; GFX11-NEXT: global_store_b32 v0, v2, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_fmac_f32_e32 v2, -2.0, v1
; GFX11-NEXT: global_store_b32 v0, v2, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_fma_f32 v1, v9, -v5, -v1
; GFX11-NEXT: v_fma_f32 v0, v8, -v4, -v0
; GFX11-NEXT: global_store_b128 v12, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_mov_b32 s9, s1
; GFX11-NEXT: v_max3_f32 v0, v0, v1, v2
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a = load volatile float, ptr addrspace(1) %aptr, align 4
; GFX11-NEXT: s_mov_b32 s9, s1
; GFX11-NEXT: v_max3_f32 v0, v2, v0, v1
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a = load volatile float, ptr addrspace(1) %aptr, align 4
; GFX11-NEXT: s_mov_b32 s9, s1
; GFX11-NEXT: v_max3_f16 v0, v0, v1, v2
; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a = load volatile half, ptr addrspace(1) %aptr, align 2
; GFX11-NEXT: s_mov_b32 s9, s1
; GFX11-NEXT: v_max3_f16 v0, v2, v0, v1
; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a = load volatile half, ptr addrspace(1) %aptr, align 2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_med3_f32 v1, v1, 2.0, 4.0
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_med3_f32 v1, v1, 2.0, 4.0
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_med3_f32 v1, v1, 2.0, 4.0
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_med3_f32 v1, v1, 2.0, 4.0
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_maxmin_f32 v1, v1, 4.0, 2.0
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], 2.0
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], 4.0
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v1, 2.0, 4.0
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_med3_f32 v1, v1, 2.0, 4.0
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, -v1, v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v1, -v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v1, v2, -v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, -v1, |v2|, -|v3|
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, -|v1|, -|v2|, -|v3|
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_maxmin_f32 v3, v1, v2, v3
; GFX11-NEXT: v_minmax_f32 v1, v1, v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_minmax_f32 v1, -v1, v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_maxmin_f32 v1, v1, v2, v3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_med3_f16 v1, v1, 2.0, 4.0
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_med3_f16 v1, v1, v2, v3
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_maxmin_f32 v1, v1, s2, 0x41800000
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: global_store_b32 v0, v2, s[0:1]
; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_mov_b32 s9, s1
; GFX11-NEXT: v_min3_f32 v0, v0, v1, v2
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a = load volatile float, ptr addrspace(1) %aptr, align 4
; GFX11-NEXT: s_mov_b32 s9, s1
; GFX11-NEXT: v_min3_f32 v0, v2, v0, v1
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a = load volatile float, ptr addrspace(1) %aptr, align 4
; GFX11-NEXT: s_mov_b32 s9, s1
; GFX11-NEXT: v_min3_f16 v0, v0, v1, v2
; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a = load volatile half, ptr addrspace(1) %aptr, align 2
; GFX11-NEXT: s_mov_b32 s9, s1
; GFX11-NEXT: v_min3_f16 v0, v2, v0, v1
; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a = load volatile half, ptr addrspace(1) %aptr, align 2
; GFX11-NEXT: v_max_f64 v[2:3], v[4:5], v[4:5]
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a = load volatile double, ptr addrspace(1) %aptr, align 4
; GFX11-NEXT: v_max_f64 v[2:3], v[4:5], v[4:5]
; GFX11-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a = load volatile double, ptr addrspace(1) %aptr, align 4
; GFX11-NEXT: s_mov_b32 s9, s5
; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_mov_b32 s5, s1
; GFX11-NEXT: v_mul_f16_e32 v0, 0x4200, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_mov_b32 s5, s1
; GFX11-NEXT: v_mul_f16_e32 v0, 4.0, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_mul_f16 v0, v0, v1
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_mul_f16 v0, 0x44004200, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_mul_f16 v0, 0x42004400, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_pk_mul_f16 v1, v3, v1
; GFX11-NEXT: v_pk_mul_f16 v0, v2, v0
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_pk_mul_f16 v1, 0x44004200, v1
; GFX11-NEXT: v_pk_mul_f16 v0, 0x40004800, v0
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_rndne_f16_e32 v1, s2
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%1 = call half @llvm.nearbyint.f16(half %in)
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_rndne_f32_e32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_rndne_f32_e32 v1, s3
; GFX11-NEXT: v_rndne_f32_e32 v0, s2
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_rndne_f32_e32 v1, s5
; GFX11-NEXT: v_rndne_f32_e32 v0, s4
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_rndne_f64_e32 v[0:1], s[2:3]
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_rndne_f64_e32 v[2:3], s[6:7]
; GFX11-NEXT: v_rndne_f64_e32 v[0:1], s[4:5]
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:16
; GFX11-NEXT: global_store_b128 v8, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_sub_f16_e64 v1, s3, |s2|
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%fabs = call half @llvm.fabs.f16(half %x)
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_mul_f16_e64 v1, s3, -|s2|
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%fabs = call half @llvm.fabs.f16(half %x)
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%bc = bitcast i16 %in to half
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%fabs = call half @llvm.fabs.f16(half %in)
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_or_b32_e32 v1, 0x8000, v1
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load half, ptr addrspace(1) %in, align 2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_or_b32_e32 v0, 0x80008000, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd <2 x half> %in, <half 1.0, half 2.0>
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %in)
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_pk_mul_f16 v1, s2, -4.0 op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: global_store_b32 v0, v2, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: global_store_b32 v0, v2, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
; GFX11-NEXT: v_cndmask_b32_e64 v1, s2, -v0, vcc_lo
; GFX11-NEXT: v_mov_b32_e32 v0, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a = select i1 %z, double %x, double %y
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%fneg = fsub half -0.0, %in
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_xor_b32_e32 v1, 0x8000, v1
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%bc = bitcast i16 %in to half
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mul_f16_e64 v1, -v1, v1
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load half, ptr addrspace(1) %in
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%fneg = fsub <2 x half> <half -0.0, half -0.0>, %in
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%in = call i32 asm sideeffect "; def $0", "=s"()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%bc = bitcast i32 %in to <2 x half>
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_mul_f16 v1, v1, v1 neg_lo:[1,0] neg_hi:[1,0]
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <2 x half>, ptr addrspace(1) %in
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b16 v[0:1], v1, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <2 x half>, ptr addrspace(1) %in
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_d16_hi_b16 v[0:1], v0, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <2 x half>, ptr addrspace(1) %in
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%fabs = tail call float @llvm.fabs.f32(float %x) #1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%fabs = tail call float @llvm.fabs.f32(float %x) #1
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%fabs = tail call float @llvm.fabs.f32(float %x) #1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ord = fcmp ord float %x, 0.000000e+00
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%x.fabs = tail call float @llvm.fabs.f32(float %x) #3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ord = fcmp ord float %x, 0.000000e+00
; GFX11-NEXT: s_and_b32 s2, s3, s2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ord = fcmp ord float %x, 0.000000e+00
; GFX11-NEXT: s_and_b32 s2, s2, s3
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ord = fcmp ord float %x, 0.000000e+00
; GFX11-NEXT: s_and_b32 s2, s3, s2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ord = fcmp uno float %x, 0.000000e+00
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ord = fcmp ord float %x, 0.000000e+00
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ord = fcmp ord float %x, 0.000000e+00
; GFX11-NEXT: s_and_b32 s2, s3, s2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ord = fcmp ord float %x, %y
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%fabs = tail call half @llvm.fabs.f16(half %x) #1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ord = fcmp ord half %x, 0.0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ord = fcmp ord half %x, 0.0
; GFX1100-NEXT: s_waitcnt lgkmcnt(0)
; GFX1100-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen
+; GFX1100-NEXT: s_nop 0
; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-NEXT: s_endpgm
;
; G_GFX1100-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX1100-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; G_GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen
+; G_GFX1100-NEXT: s_nop 0
; G_GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; G_GFX1100-NEXT: s_endpgm
main_body:
; GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen glc
; GFX1100-NEXT: s_waitcnt vmcnt(0)
; GFX1100-NEXT: global_store_b32 v[0:1], v0, off
+; GFX1100-NEXT: s_nop 0
; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-NEXT: s_endpgm
;
; G_GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen glc
; G_GFX1100-NEXT: s_waitcnt vmcnt(0)
; G_GFX1100-NEXT: global_store_b32 v[0:1], v0, off
+; G_GFX1100-NEXT: s_nop 0
; G_GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; G_GFX1100-NEXT: s_endpgm
main_body:
; GFX1100-NEXT: s_waitcnt lgkmcnt(0)
; GFX1100-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen
+; GFX1100-NEXT: s_nop 0
; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-NEXT: s_endpgm
;
; G_GFX1100-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX1100-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; G_GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen
+; G_GFX1100-NEXT: s_nop 0
; G_GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; G_GFX1100-NEXT: s_endpgm
main_body:
; GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen glc
; GFX1100-NEXT: s_waitcnt vmcnt(0)
; GFX1100-NEXT: global_store_b32 v[0:1], v0, off
+; GFX1100-NEXT: s_nop 0
; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-NEXT: s_endpgm
;
; G_GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen glc
; G_GFX1100-NEXT: s_waitcnt vmcnt(0)
; G_GFX1100-NEXT: global_store_b32 v[0:1], v0, off
+; G_GFX1100-NEXT: s_nop 0
; G_GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; G_GFX1100-NEXT: s_endpgm
main_body:
; GFX1100-NEXT: v_mov_b32_e32 v1, 0
; GFX1100-NEXT: s_waitcnt vmcnt(0)
; GFX1100-NEXT: global_store_b32 v1, v0, s[6:7]
+; GFX1100-NEXT: s_nop 0
; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-NEXT: s_endpgm
;
; G_GFX1100-NEXT: v_mov_b32_e32 v1, 0
; G_GFX1100-NEXT: s_waitcnt vmcnt(0)
; G_GFX1100-NEXT: global_store_b32 v1, v0, s[6:7]
+; G_GFX1100-NEXT: s_nop 0
; G_GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; G_GFX1100-NEXT: s_endpgm
main_body:
; GFX1100-NEXT: s_waitcnt lgkmcnt(0)
; GFX1100-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen
+; GFX1100-NEXT: s_nop 0
; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-NEXT: s_endpgm
;
; G_GFX1100-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX1100-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; G_GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen
+; G_GFX1100-NEXT: s_nop 0
; G_GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; G_GFX1100-NEXT: s_endpgm
main_body:
; GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen glc
; GFX1100-NEXT: s_waitcnt vmcnt(0)
; GFX1100-NEXT: global_store_b32 v[0:1], v0, off
+; GFX1100-NEXT: s_nop 0
; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-NEXT: s_endpgm
;
; G_GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen glc
; G_GFX1100-NEXT: s_waitcnt vmcnt(0)
; G_GFX1100-NEXT: global_store_b32 v[0:1], v0, off
+; G_GFX1100-NEXT: s_nop 0
; G_GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; G_GFX1100-NEXT: s_endpgm
main_body:
; GFX1100-NEXT: s_waitcnt lgkmcnt(0)
; GFX1100-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen
+; GFX1100-NEXT: s_nop 0
; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-NEXT: s_endpgm
;
; G_GFX1100-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX1100-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; G_GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen
+; G_GFX1100-NEXT: s_nop 0
; G_GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; G_GFX1100-NEXT: s_endpgm
main_body:
; GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen glc
; GFX1100-NEXT: s_waitcnt vmcnt(0)
; GFX1100-NEXT: global_store_b32 v[0:1], v0, off
+; GFX1100-NEXT: s_nop 0
; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-NEXT: s_endpgm
;
; G_GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen glc
; G_GFX1100-NEXT: s_waitcnt vmcnt(0)
; G_GFX1100-NEXT: global_store_b32 v[0:1], v0, off
+; G_GFX1100-NEXT: s_nop 0
; G_GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; G_GFX1100-NEXT: s_endpgm
main_body:
; GFX1100-NEXT: v_mov_b32_e32 v1, 0
; GFX1100-NEXT: s_waitcnt vmcnt(0)
; GFX1100-NEXT: global_store_b32 v1, v0, s[6:7]
+; GFX1100-NEXT: s_nop 0
; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-NEXT: s_endpgm
;
; G_GFX1100-NEXT: v_mov_b32_e32 v1, 0
; G_GFX1100-NEXT: s_waitcnt vmcnt(0)
; G_GFX1100-NEXT: global_store_b32 v1, v0, s[6:7]
+; G_GFX1100-NEXT: s_nop 0
; G_GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; G_GFX1100-NEXT: s_endpgm
main_body:
; GFX11-LABEL: global_atomic_fmin_f32_noret:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_atomic_min_f32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ret = call float @llvm.amdgcn.global.atomic.fmin.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
; GFX11-LABEL: global_atomic_fmax_f32_noret:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_atomic_max_f32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ret = call float @llvm.amdgcn.global.atomic.fmax.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
; GFX11-NEXT: global_atomic_max_f32 v0, v[0:1], v2, off glc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v[3:4], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ret = call float @llvm.amdgcn.global.atomic.fmax.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
; GFX11-NEXT: global_atomic_min_f32 v0, v[0:1], v2, off glc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v[3:4], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ret = call float @llvm.amdgcn.global.atomic.fmin.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load i16, ptr addrspace(1) %in, align 2
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_cvt_f64_f32_e32 v[2:3], v2
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, s2
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_f32_f16_e64 v0, -v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_f32_f16_e64 v0, |v0|
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2
; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-SDAG-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0
; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: v_cvt_f16_f32_e64 v0, -s2
; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0, |v0|
; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: v_cvt_f16_f32_e64 v0, |s2|
; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -|v0|
; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: v_cvt_f16_f32_e64 v0, -|s2|
; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-SDAG-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-SDAG-NEXT: s_mov_b32 s2, -1
; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%result = fptrunc double %in to float
; GFX11-SAFE-SDAG-NEXT: v_or_b32_e32 v0, s2, v0
; GFX11-SAFE-SDAG-NEXT: s_mov_b32 s2, -1
; GFX11-SAFE-SDAG-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-SAFE-SDAG-NEXT: s_nop 0
; GFX11-SAFE-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SAFE-SDAG-NEXT: s_endpgm
;
; GFX11-SAFE-GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX11-SAFE-GISEL-NEXT: s_mov_b32 s2, -1
; GFX11-SAFE-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-SAFE-GISEL-NEXT: s_nop 0
; GFX11-SAFE-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SAFE-GISEL-NEXT: s_endpgm
;
; GFX11-UNSAFE-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-UNSAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX11-UNSAFE-SDAG-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-UNSAFE-SDAG-NEXT: s_nop 0
; GFX11-UNSAFE-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-UNSAFE-SDAG-NEXT: s_endpgm
;
; GFX11-UNSAFE-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-UNSAFE-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX11-UNSAFE-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-UNSAFE-GISEL-NEXT: s_nop 0
; GFX11-UNSAFE-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-UNSAFE-GISEL-NEXT: s_endpgm
%result = fptrunc double %in to half
; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
; GFX11-SDAG-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
; GFX11-GISEL-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%result = fptrunc <2 x double> %in to <2 x float>
; GFX11-SDAG-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-SDAG-NEXT: s_mov_b32 s2, -1
; GFX11-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[8:9]
; GFX11-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%result = fptrunc <3 x double> %in to <3 x float>
; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
; GFX11-SDAG-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[8:9]
; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[10:11]
; GFX11-GISEL-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%result = fptrunc <4 x double> %in to <4 x float>
; GFX11-SDAG-NEXT: s_clause 0x1
; GFX11-SDAG-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 offset:16
; GFX11-SDAG-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: s_clause 0x1
; GFX11-GISEL-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-GISEL-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 offset:16
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%result = fptrunc <8 x double> %in to <8 x float>
; GFX11-NEXT: v_trunc_f16_e32 v3, v3
; GFX11-NEXT: v_fma_f16 v1, -v3, v2, v1
; GFX11-NEXT: global_store_b16 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %in2) #0 {
; GFX11-NEXT: v_trunc_f16_e32 v3, v3
; GFX11-NEXT: v_fma_f16 v1, -v3, v2, v1
; GFX11-NEXT: global_store_b16 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %in2) #0 {
; GFX11-NEXT: v_trunc_f16_e32 v3, v3
; GFX11-NEXT: v_fma_f16 v1, -v3, v2, v1
; GFX11-NEXT: global_store_b16 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %in2) #1 {
; GFX11-NEXT: v_trunc_f32_e32 v3, v3
; GFX11-NEXT: v_fma_f32 v1, -v3, v2, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %in2) #0 {
; GFX11-NEXT: v_trunc_f32_e32 v3, v3
; GFX11-NEXT: v_fma_f32 v1, -v3, v2, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %in2) #0 {
; GFX11-NEXT: v_trunc_f32_e32 v3, v3
; GFX11-NEXT: v_fma_f32 v1, -v3, v2, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %in2) #1 {
; GFX11-NEXT: v_trunc_f64_e32 v[4:5], v[4:5]
; GFX11-NEXT: v_fma_f64 v[0:1], -v[4:5], v[2:3], v[0:1]
; GFX11-NEXT: global_store_b64 v12, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %in2) #0 {
; GFX11-NEXT: v_trunc_f64_e32 v[4:5], v[4:5]
; GFX11-NEXT: v_fma_f64 v[0:1], -v[4:5], v[2:3], v[0:1]
; GFX11-NEXT: global_store_b64 v10, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %in2) #0 {
; GFX11-NEXT: v_trunc_f64_e32 v[4:5], v[4:5]
; GFX11-NEXT: v_fma_f64 v[0:1], -v[4:5], v[2:3], v[0:1]
; GFX11-NEXT: global_store_b64 v10, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %in2) #1 {
; GFX11-NEXT: v_fma_f16 v1, -v4, v2, v1
; GFX11-NEXT: v_pack_b32_f16 v1, v3, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %in2) #0 {
; GFX11-NEXT: v_fma_f16 v0, -v5, v2, v0
; GFX11-NEXT: v_pack_b32_f16 v0, v3, v0
; GFX11-NEXT: global_store_b64 v4, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %in2) #0 {
; GFX11-NEXT: v_trunc_f32_e32 v3, v3
; GFX11-NEXT: v_fma_f32 v0, -v3, v2, v0
; GFX11-NEXT: global_store_b64 v4, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %in2) #0 {
; GFX11-NEXT: v_trunc_f32_e32 v5, v5
; GFX11-NEXT: v_fma_f32 v0, -v5, v4, v0
; GFX11-NEXT: global_store_b128 v8, v[0:3], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %in2) #0 {
; GFX11-NEXT: v_trunc_f64_e32 v[6:7], v[6:7]
; GFX11-NEXT: v_fma_f64 v[0:1], -v[6:7], v[4:5], v[0:1]
; GFX11-NEXT: global_store_b128 v16, v[0:3], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %in2) #0 {
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_alignbit_b32 v0, s1, v0, s0
; GFX11-NEXT: global_store_b32 v1, v0, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_alignbit_b32 v1, s2, s3, 25
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_alignbit_b32 v1, s5, v0, s3
; GFX11-NEXT: v_alignbit_b32 v0, s4, v3, s2
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_alignbit_b32 v1, s5, s7, 23
; GFX11-NEXT: v_alignbit_b32 v0, s4, s6, 25
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_alignbit_b32 v1, s5, v5, s9
; GFX11-NEXT: v_alignbit_b32 v0, s4, v6, s8
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_alignbit_b32 v1, s5, s9, 25
; GFX11-NEXT: v_alignbit_b32 v0, s4, s8, 31
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%shl = shl i32 %a, 7
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_alignbit_b32 v0, s2, s3, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_alignbit_b32 v1, s2, s3, 7
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_alignbit_b32 v1, s5, s7, v0
; GFX11-NEXT: v_alignbit_b32 v0, s4, s6, v2
; GFX11-NEXT: global_store_b64 v3, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_alignbit_b32 v1, s5, s7, 9
; GFX11-NEXT: v_alignbit_b32 v0, s4, s6, 7
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_alignbit_b32 v1, s5, s9, v4
; GFX11-NEXT: v_alignbit_b32 v0, s4, s8, v5
; GFX11-NEXT: global_store_b128 v6, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_alignbit_b32 v1, s5, s9, 7
; GFX11-NEXT: v_alignbit_b32 v0, s4, s8, 1
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_mov_b32 s9, s5
; GFX11-NEXT: v_sub_f16_e32 v0, v0, v1
; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_mov_b32 s5, s1
; GFX11-NEXT: v_sub_f16_e32 v0, 1.0, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_mov_b32 s5, s1
; GFX11-NEXT: v_add_f16_e32 v0, -2.0, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_add_f16 v0, 0x40003c00, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_add_f16 v0, 0xbc00c000, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX11-NEXT: global_store_b32 v[0:1], v1, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = atomicrmw fadd ptr addrspace(1) %ptr, float 4.0 seq_cst
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = atomicrmw fadd ptr addrspace(1) %ptr, float 4.0 syncscope("agent") seq_cst
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = atomicrmw fadd ptr addrspace(1) %ptr, float 4.0 syncscope("agent") seq_cst
; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX11-NEXT: global_store_b32 v[0:1], v1, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = atomicrmw fadd ptr addrspace(1) %ptr, float 4.0 syncscope("one-as") seq_cst
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_atomic_add_f32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%load = load ptr, ptr addrspace(4) %arg
; GFX11-LABEL: global_inc_saddr_i32_nortn:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_atomic_inc_u32 v0, v1, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_inc_saddr_i32_nortn_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_atomic_inc_u32 v0, v1, s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_inc_saddr_i64_nortn:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_atomic_inc_u64 v0, v[1:2], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_inc_saddr_i64_nortn_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_atomic_inc_u64 v0, v[1:2], s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_dec_saddr_i32_nortn:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_atomic_dec_u32 v0, v1, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_dec_saddr_i32_nortn_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_atomic_dec_u32 v0, v1, s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_dec_saddr_i64_nortn:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_atomic_dec_u64 v0, v[1:2], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_dec_saddr_i64_nortn_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_atomic_dec_u64 v0, v[1:2], s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b8 v0, v2, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%voffset = load i32, ptr addrspace(1) %voffset.ptr
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b8 v0, v2, s[2:3] offset:2047
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%voffset = load i32, ptr addrspace(1) %voffset.ptr
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b8 v0, v2, s[2:3] offset:-2048
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%voffset = load i32, ptr addrspace(1) %voffset.ptr
; GFX11-NEXT: v_readfirstlane_b32 s0, v2
; GFX11-NEXT: v_readfirstlane_b32 s1, v3
; GFX11-NEXT: global_store_b8 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds
; GFX11-NEXT: v_readfirstlane_b32 s0, v2
; GFX11-NEXT: v_readfirstlane_b32 s1, v3
; GFX11-NEXT: global_store_b8 v0, v1, s[0:1] offset:-120
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds
; GFX11-LABEL: global_store_saddr_i16_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b16 v0, v1, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_i16_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b16 v0, v1, s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_f16_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b16 v0, v1, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_f16_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b16 v0, v1, s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_i32_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_i32_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_f32_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_f32_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_p3_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_p3_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_i64_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_i64_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_f64_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_f64_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v2i32_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v2i32_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v2f32_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v2f32_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v4i16_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v4i16_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v4f16_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v4f16_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_p1_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_p1_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v3i32_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v3i32_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v3f32_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v3f32_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v6i16_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v6i16_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v6f16_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v6f16_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v4i32_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v4f32_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v4f32_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v2i64_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v2i64_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v2f64_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v2f64_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v8i16_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v8i16_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v8f16_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v8f16_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v2p1_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v2p1_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v4p3_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_v4p3_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_i16_d16hi_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_d16_hi_b16 v0, v1, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_i16_d16hi_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_d16_hi_b16 v0, v1, s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_d16_hi_b8 v0, v1, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg128:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_d16_hi_b8 v0, v1, s[2:3] offset:-128
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
store half %arg, ptr addrspace(1) %out
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
store <2 x half> %arg, ptr addrspace(1) %out
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1] offset:4
; GFX11-NEXT: global_store_b32 v0, v2, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
store <3 x half> %arg, ptr addrspace(1) %out
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
store <4 x half> %arg, ptr addrspace(1) %out
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v3, s7
; GFX11-NEXT: v_dual_mov_b32 v1, s5 :: v_dual_mov_b32 v2, s6
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
store <8 x half> %arg, ptr addrspace(1) %out
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, s2
; GFX11-NEXT: v_cvt_f32_f16_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%fpext = fpext <2 x half> %in to <2 x float>
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_cvt_f32_f16_e32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ext = fpext half %arg to float
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, s2
; GFX11-NEXT: v_cvt_f32_f16_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ext = fpext <2 x half> %arg to <2 x float>
; GFX11-NEXT: v_cvt_f32_f16_e32 v1, s4
; GFX11-NEXT: v_cvt_f32_f16_e32 v2, s3
; GFX11-NEXT: global_store_b96 v3, v[0:2], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ext = fpext <3 x half> %arg to <3 x float>
; GFX11-NEXT: v_cvt_f32_f16_e32 v1, s5
; GFX11-NEXT: v_cvt_f32_f16_e32 v2, s3
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ext = fpext <4 x half> %arg to <4 x float>
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:16
; GFX11-NEXT: global_store_b128 v8, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ext = fpext <8 x half> %arg to <8 x float>
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ext = fpext half %arg to double
; GFX11-NEXT: v_cvt_f64_f32_e32 v[2:3], v0
; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v1
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ext = fpext <2 x half> %arg to <2 x double>
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b64 v6, v[4:5], s[0:1] offset:16
; GFX11-NEXT: global_store_b128 v6, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ext = fpext <3 x half> %arg to <3 x double>
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:16
; GFX11-NEXT: global_store_b128 v8, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ext = fpext <4 x half> %arg to <4 x double>
; GFX11-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:32
; GFX11-NEXT: global_store_b128 v16, v[4:7], s[0:1] offset:16
; GFX11-NEXT: global_store_b128 v16, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ext = fpext <8 x half> %arg to <8 x double>
; GFX11-NEXT: global_load_u16 v1, v0, s[2:3]
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load half, ptr addrspace(1) %in
; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <2 x half>, ptr addrspace(1) %in
; GFX11-NEXT: global_load_b64 v[0:1], v2, s[0:1]
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <4 x half>, ptr addrspace(1) %in
; GFX11-NEXT: global_load_b128 v[0:3], v4, s[2:3]
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <8 x half>, ptr addrspace(1) %in
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load half, ptr addrspace(1) %in
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <2 x half>, ptr addrspace(1) %in
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v4
; GFX11-NEXT: global_store_b96 v3, v[0:2], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <3 x half>, ptr addrspace(1) %in
; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v5
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <4 x half>, ptr addrspace(1) %in
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v12, v[8:11], s[0:1] offset:16
; GFX11-NEXT: global_store_b128 v12, v[4:7], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <8 x half>, ptr addrspace(1) %in
; GFX11-NEXT: global_store_b128 v20, v[0:3], s[0:1] offset:32
; GFX11-NEXT: global_store_b128 v20, v[12:15], s[0:1] offset:16
; GFX11-NEXT: global_store_b128 v20, v[8:11], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <16 x half>, ptr addrspace(1) %in
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load half, ptr addrspace(1) %in
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_cvt_f64_f32_e32 v[2:3], v2
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <2 x half>, ptr addrspace(1) %in
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b64 v6, v[4:5], s[0:1] offset:16
; GFX11-NEXT: global_store_b128 v6, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <3 x half>, ptr addrspace(1) %in
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:16
; GFX11-NEXT: global_store_b128 v8, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <4 x half>, ptr addrspace(1) %in
; GFX11-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:32
; GFX11-NEXT: global_store_b128 v16, v[4:7], s[0:1] offset:16
; GFX11-NEXT: global_store_b128 v16, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <8 x half>, ptr addrspace(1) %in
; GFX11-NEXT: global_store_b128 v32, v[8:11], s[0:1] offset:32
; GFX11-NEXT: global_store_b128 v32, v[4:7], s[0:1] offset:16
; GFX11-NEXT: global_store_b128 v32, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <16 x half>, ptr addrspace(1) %in
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load float, ptr addrspace(1) %in
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX11-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <2 x float>, ptr addrspace(1) %in
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b16 v3, v2, s[0:1] offset:4
; GFX11-NEXT: global_store_b32 v3, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <3 x float>, ptr addrspace(1) %in
; GFX11-NEXT: v_pack_b32_f16 v1, v2, v3
; GFX11-NEXT: v_pack_b32_f16 v0, v0, v5
; GFX11-NEXT: global_store_b64 v4, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <4 x float>, ptr addrspace(1) %in
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-NEXT: v_pack_b32_f16 v0, v4, v5
; GFX11-NEXT: global_store_b128 v8, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <8 x float>, ptr addrspace(1) %in
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v16, v[4:7], s[0:1] offset:16
; GFX11-NEXT: global_store_b128 v16, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load <16 x float>, ptr addrspace(1) %in
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_add_f16_e64 v1, s2, s3
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd half %a, %b
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_pk_add_f16 v1, s2, s3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd <2 x half> %a, %b
; GFX11-NEXT: v_pk_add_f16 v1, v1, v3
; GFX11-NEXT: v_pk_add_f16 v0, v0, v2
; GFX11-NEXT: global_store_b64 v4, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%b_ptr = getelementptr <4 x half>, ptr addrspace(1) %in, i32 1
; GFX11-NEXT: v_pk_add_f16 v1, s5, s9
; GFX11-NEXT: v_pk_add_f16 v0, s4, s8
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd <8 x half> %a, %b
; GFX11-NEXT: global_load_u16 v1, v0, s[0:1]
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b16 v0, v1, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load half, ptr addrspace(1) %in
; GFX11-NEXT: global_load_u16 v1, v0, s[2:3]
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = load i16, ptr addrspace(1) %in
; GFX11-NEXT: global_store_b32 v0, v1, s[10:11]
; GFX11-NEXT: s_cbranch_scc0 .LBB0_1
; GFX11-NEXT: ; %bb.2: ; %bb2
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
bb:
; GFX11-NEXT: global_store_b32 v0, v1, s[10:11]
; GFX11-NEXT: s_cbranch_scc0 .LBB1_1
; GFX11-NEXT: ; %bb.2: ; %bb2
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
bb:
; GFX11-NEXT: s_cmpk_eq_i32 s5, 0x400
; GFX11-NEXT: s_cbranch_scc0 .LBB2_1
; GFX11-NEXT: ; %bb.2: ; %bb2
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
bb:
; GFX11-NEXT: s_cmpk_eq_i32 s4, 0x400
; GFX11-NEXT: s_cbranch_scc0 .LBB3_1
; GFX11-NEXT: ; %bb.2: ; %bb2
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
bb:
; GFX11-NEXT: global_store_b16 v2, v3, s[6:7]
; GFX11-NEXT: s_cbranch_vccz .LBB4_1
; GFX11-NEXT: ; %bb.2: ; %bb2
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
bb:
; GFX11-NEXT: s_cbranch_vccz .LBB5_1
; GFX11-NEXT: ; %bb.2: ; %bb2
; GFX11-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
bb:
; GFX11-NEXT: global_store_b16 v2, v3, s[6:7]
; GFX11-NEXT: s_cbranch_vccz .LBB6_1
; GFX11-NEXT: ; %bb.2: ; %bb2
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
bb:
; GFX11-NEXT: s_cbranch_vccz .LBB7_1
; GFX11-NEXT: ; %bb.2: ; %bb2
; GFX11-NEXT: s_set_inst_prefetch_distance 0x2
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
bb:
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 dlc ; encoding: [0x00,0x20,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: v_add_f16_e64 v0, s2, 0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x00,0x01,0x00]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: v_add_f16_e64 v0, s2, 0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe0,0x01,0x00]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: v_add_f16_e64 v0, s2, -0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe2,0x01,0x00]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: v_add_f16_e64 v0, s2, 1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe4,0x01,0x00]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: v_add_f16_e64 v0, s2, -1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe6,0x01,0x00]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: v_add_f16_e64 v0, s2, 2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe8,0x01,0x00]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: v_add_f16_e64 v0, s2, -2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xea,0x01,0x00]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: v_add_f16_e64 v0, s2, 4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xec,0x01,0x00]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: v_add_f16_e64 v0, s2, -4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xee,0x01,0x00]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
; GFX11-NEXT: v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x64]
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
; GFX11-NEXT: v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x64,0x00,0x64,0x00,0x00]
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: v_add_f16_e64 v0, s2, 1 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x02,0x01,0x00]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: v_add_f16_e64 v0, s2, 2 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x04,0x01,0x00]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: v_add_f16_e64 v0, s2, 16 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x20,0x01,0x00]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
; GFX11-NEXT: v_add_nc_u16 v0, v0, -1 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x83,0x01,0x00]
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
; GFX11-NEXT: v_add_nc_u16 v0, v0, -2 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x85,0x01,0x00]
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
; GFX11-NEXT: v_add_nc_u16 v0, v0, -16 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0xa1,0x01,0x00]
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: v_add_f16_e64 v0, s2, 63 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x7e,0x01,0x00]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: v_add_f16_e64 v0, s2, 64 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x80,0x01,0x00]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x i16>, ptr addrspace(4) %vec.ptr
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; GFX11-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x i16>, ptr addrspace(4) %vec.ptr
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s1
; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x i16>, ptr addrspace(4) %vec.ptr
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; GFX11-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x i16>, ptr addrspace(4) %vec.ptr
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s0
; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x i16>, ptr addrspace(4) %vec.ptr
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s1
; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x i16>, ptr addrspace(4) %vec.ptr
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x i16>, ptr addrspace(4) %vec.ptr
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; GFX11-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x i16>, ptr addrspace(4) %vec.ptr
; GFX11-NEXT: s_pack_ll_b32_b16 s2, 0x4500, s2
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x half>, ptr addrspace(4) %vec.ptr
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%vec = load <2 x half>, ptr addrspace(4) %vec.ptr
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_bfi_b32 v1, 0xffff, s2, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_perm_b32 v1, v1, s0, 0x7060302
; GFX11-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_bfi_b32 v1, 0xffff, 53, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_perm_b32 v1, s2, v1, 0x5040100
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_perm_b32 v1, -15, v1, 0x5040100
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_bfi_b32 v1, 0xffff, s2, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_bfi_b32 v1, 0xffff, 53, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_perm_b32 v1, s2, v1, 0x5040100
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_perm_b32 v1, 35, v1, 0x5040100
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_or_b32 s2, s3, s2
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%idx = load volatile i32, ptr addrspace(4) %idx.ptr
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_bfi_b32 v1, s0, 0x3e703e7, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_bfi_b32 v1, v1, 0x12341234, v2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_bfi_b32 v0, 0xffff, s0, v0
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_perm_b32 v0, s0, v0, 0x5040100
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_bfi_b32 v1, 0xffff, s0, v1
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_perm_b32 v1, s0, v1, 0x5040100
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_bfi_b32 v1, 0xffff, s0, v1
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_bfi_b32 v0, v2, s0, v0
; GFX11-NEXT: global_store_b64 v4, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: v_bfi_b32 v1, s1, s6, v1
; GFX11-NEXT: v_bfi_b32 v0, s0, s6, v0
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_perm_b32 v1, s0, v1, 0x5040100
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_bfi_b32 v3, 0xffff, s0, v3
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-NEXT: v_perm_b32 v0, v8, v0, 0x5040100
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v8, v[4:7], s[4:5] offset:16
; GFX11-NEXT: global_store_b128 v8, v[0:3], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v8, v[4:7], s[4:5] offset:16
; GFX11-NEXT: global_store_b128 v8, v[0:3], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v8, v[4:7], s[4:5] offset:16
; GFX11-NEXT: global_store_b128 v8, v[0:3], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_cvt_pk_rtz_f16_f32_e64 v1, s2, s3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y)
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_cvt_pk_rtz_f16_f32_e64 v1, s2, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %x)
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_pk_rtz_f16_f32_e32 v1, v1, v2
; GFX11-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_pk_rtz_f16_f32_e64 v1, v1, 1.0
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_pk_rtz_f16_f32_e32 v1, 1.0, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_pk_rtz_f16_f32_e64 v1, -v1, v2
; GFX11-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_pk_rtz_f16_f32_e64 v1, v1, -v2
; GFX11-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_pk_rtz_f16_f32_e64 v1, -v1, -v2
; GFX11-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_pk_rtz_f16_f32_e64 v1, -|v1|, -v2
; GFX11-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: ds_add_gs_reg_rtn v[3:4], v0 offset:16 gds
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_b32 v[1:2], v3, off
+; CHECK-NEXT: s_nop 0
; CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; CHECK-NEXT: s_endpgm
%res = call i32 @llvm.amdgcn.ds.add.gs.reg.rtn.i32(i32 %arg, i32 16)
; CHECK-NEXT: ds_add_gs_reg_rtn v[3:4], v0 offset:32 gds
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_b64 v[1:2], v[3:4], off
+; CHECK-NEXT: s_nop 0
; CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; CHECK-NEXT: s_endpgm
%res = call i64 @llvm.amdgcn.ds.add.gs.reg.rtn.i64(i32 %arg, i32 32)
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_add_nc_u32_e32 v0, v1, v0
; CHECK-NEXT: global_store_b32 v[6:7], v0, off
+; CHECK-NEXT: s_nop 0
; CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; CHECK-NEXT: s_endpgm
%pair = call { i32, i32 } @llvm.amdgcn.ds.bvh.stack.rtn(i32 %addr, i32 %data0, <4 x i32> %data1, i32 0)
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_add_nc_u32_e32 v0, v1, v0
; CHECK-NEXT: global_store_b32 v[6:7], v0, off
+; CHECK-NEXT: s_nop 0
; CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; CHECK-NEXT: s_endpgm
%pair = call { i32, i32 } @llvm.amdgcn.ds.bvh.stack.rtn(i32 %addr, i32 %data0, <4 x i32> %data1, i32 1)
; CHECK-NEXT: ds_sub_gs_reg_rtn v[3:4], v0 offset:16 gds
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_b32 v[1:2], v3, off
+; CHECK-NEXT: s_nop 0
; CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; CHECK-NEXT: s_endpgm
%res = call i32 @llvm.amdgcn.ds.sub.gs.reg.rtn.i32(i32 %arg, i32 16)
; CHECK-NEXT: ds_sub_gs_reg_rtn v[3:4], v0 offset:32 gds
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_b64 v[1:2], v[3:4], off
+; CHECK-NEXT: s_nop 0
; CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; CHECK-NEXT: s_endpgm
%res = call i64 @llvm.amdgcn.ds.sub.gs.reg.rtn.i64(i32 %arg, i32 32)
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 0
; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GISEL-GFX11-NEXT: global_store_b32 v0, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: v_cmp_eq_f16_e64 s2, s2, |s3|
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: v_cmp_eq_f16_e64 s2, s2, |s3|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: v_cmp_eq_f16_e64 s2, |s2|, |s3|
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: v_cmp_eq_f16_e64 s2, |s2|, |s3|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 0
; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GISEL-GFX11-NEXT: global_store_b32 v0, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dot2_bf16_bf16 v1, s2, s3, v1
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dot2_f16_f16 v1, s2, s3, v1
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_dot2_f32_bf16 v0, s2, s3, v0 clamp
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_dot2_f32_bf16 v0, s2, s3, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 0
; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GISEL-GFX11-NEXT: global_store_b32 v0, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 0
; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GISEL-GFX11-NEXT: global_store_b32 v0, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_and_b32 s2, s2, s3
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-LABEL: test_intr_icmp_i32_invalid_cc:
; GISEL-GFX11: ; %bb.0:
; GISEL-GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 0
; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GISEL-GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 0
; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GISEL-GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-LABEL: test_intr_icmp_i32_invalid_cc:
; GISEL-GFX11: ; %bb.0:
; GISEL-GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
;
; GFX11-LABEL: store_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_3d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_cube:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_1darray:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_2darray:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_2dmsaa:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_2darraymsaa:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_mip_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store_mip v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_mip_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_mip_3d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_mip_cube:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_mip_1darray:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_mip_2darray:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_1d_V1:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v0, v1, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_1d_V2:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:1], v2, s[0:7] dmask:0xc dim:SQ_RSRC_IMG_1D unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_1d_glc:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_1d_slc:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_1d_glc_slc:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16 ; encoding: [0x80,0x0f,0x19,0xf0,0x04,0x00,0x00,0x00]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
main_body:
; GFX11-LABEL: store_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16 ; encoding: [0x84,0x0f,0x19,0xf0,0x04,0x00,0x00,0x00]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
main_body:
; GFX11-LABEL: store_3d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 ; encoding: [0x88,0x0f,0x19,0xf0,0x04,0x00,0x00,0x00]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
main_body:
; GFX11-LABEL: store_cube:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16 ; encoding: [0x8c,0x0f,0x19,0xf0,0x04,0x00,0x00,0x00]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
main_body:
; GFX11-LABEL: store_1darray:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16 ; encoding: [0x90,0x0f,0x19,0xf0,0x04,0x00,0x00,0x00]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
main_body:
; GFX11-LABEL: store_2darray:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16 ; encoding: [0x94,0x0f,0x19,0xf0,0x04,0x00,0x00,0x00]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
main_body:
; GFX11-LABEL: store_2dmsaa:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm a16 ; encoding: [0x98,0x0f,0x19,0xf0,0x04,0x00,0x00,0x00]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
main_body:
; GFX11-LABEL: store_2darraymsaa:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 ; encoding: [0x9c,0x0f,0x19,0xf0,0x04,0x00,0x00,0x00]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
main_body:
; GFX11-LABEL: store_mip_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store_mip v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16 ; encoding: [0x80,0x0f,0x1d,0xf0,0x04,0x00,0x00,0x00]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
main_body:
; GFX11-LABEL: store_mip_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16 ; encoding: [0x84,0x0f,0x1d,0xf0,0x04,0x00,0x00,0x00]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
main_body:
; GFX11-LABEL: store_mip_3d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 ; encoding: [0x88,0x0f,0x1d,0xf0,0x04,0x00,0x00,0x00]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
main_body:
; GFX11-LABEL: store_mip_cube:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16 ; encoding: [0x8c,0x0f,0x1d,0xf0,0x04,0x00,0x00,0x00]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
main_body:
; GFX11-LABEL: store_mip_1darray:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16 ; encoding: [0x90,0x0f,0x1d,0xf0,0x04,0x00,0x00,0x00]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
main_body:
; GFX11-LABEL: store_mip_2darray:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16 ; encoding: [0x94,0x0f,0x1d,0xf0,0x04,0x00,0x00,0x00]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
main_body:
; GFX11-LABEL: store_1d_V1:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v0, v1, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm a16 ; encoding: [0x80,0x02,0x19,0xf0,0x01,0x00,0x00,0x00]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
main_body:
; GFX11-LABEL: store_1d_V2:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:1], v2, s[0:7] dmask:0xc dim:SQ_RSRC_IMG_1D unorm a16 ; encoding: [0x80,0x0c,0x19,0xf0,0x02,0x00,0x00,0x00]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
main_body:
; GFX11-LABEL: store_1d_glc:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc a16 ; encoding: [0x80,0x4f,0x19,0xf0,0x04,0x00,0x00,0x00]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
main_body:
; GFX11-LABEL: store_1d_slc:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc a16 ; encoding: [0x80,0x1f,0x19,0xf0,0x04,0x00,0x00,0x00]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
main_body:
; GFX11-LABEL: store_1d_glc_slc:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc a16 ; encoding: [0x80,0x5f,0x19,0xf0,0x04,0x00,0x00,0x00]
+; GFX11-NEXT: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
main_body:
; GFX11-LABEL: store_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_3d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_cube:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_1darray:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_2darray:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_2dmsaa:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_2darraymsaa:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_mip_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_mip_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_mip_3d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_mip_cube:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_mip_1darray:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_mip_2darray:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_1d_V1:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v0, v1, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_1d_V2:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:1], v2, s[0:7] dmask:0xc dim:SQ_RSRC_IMG_1D unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_1d_glc:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_1d_slc:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_1d_glc_slc:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-NEXT: image_load v[0:3], v4, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: image_store v[0:3], v4, s[16:23] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_f16_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm a16 d16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_v2f16_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm a16 d16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_v3f16_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm a16 d16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_v4f16_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[1:2], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16 d16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_f16_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm a16 d16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_v2f16_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm a16 d16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_v3f16_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D unorm a16 d16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_v4f16_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[1:2], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16 d16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_f16_3d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm a16 d16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_v2f16_3d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm a16 d16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_v3f16_3d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_3D unorm a16 d16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_v4f16_3d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 d16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_f32_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_v2f32_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_v3f32_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_v4f32_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[1:4], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_f32_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_v2f32_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_v3f32_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_v4f32_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[1:4], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_f32_3d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_v2f32_3d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_v3f32_3d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_3D unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: store_v4f32_3d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-NEXT: s_mov_b32 s0, 0
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_mov_b32 s0, -1
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_movk_i32 s0, 0x1000
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11-NEXT: global_store_b32 v[1:2], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: .LBB5_2: ; %endif
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GISEL-NEXT: v_mov_b32_e32 v2, s0
; GISEL-NEXT: global_store_b32 v[0:1], v2, off
+; GISEL-NEXT: s_nop 0
; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-NEXT: s_endpgm
;
; SDAG-NEXT: ; %bb.2: ; %endif
; SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s1
; SDAG-NEXT: global_store_b32 v[0:1], v2, off
+; SDAG-NEXT: s_nop 0
; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-NEXT: s_endpgm
entry:
; GISEL-NEXT: v_mov_b32_e32 v3, 0
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
; GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
+; GISEL-NEXT: s_nop 0
; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-NEXT: s_endpgm
;
; SDAG-NEXT: v_mov_b32_e32 v3, s2
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
; SDAG-NEXT: global_store_b64 v[0:1], v[2:3], off
+; SDAG-NEXT: s_nop 0
; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-NEXT: s_endpgm
entry:
; GISEL-NEXT: v_mov_b32_e32 v3, 0
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
; GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
+; GISEL-NEXT: s_nop 0
; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-NEXT: s_endpgm
;
; SDAG-NEXT: v_mov_b32_e32 v3, s2
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
; SDAG-NEXT: global_store_b64 v[0:1], v[2:3], off
+; SDAG-NEXT: s_nop 0
; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-NEXT: s_endpgm
entry:
; GISEL-NEXT: v_mov_b32_e32 v3, 0
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
; GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
+; GISEL-NEXT: s_nop 0
; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-NEXT: s_endpgm
;
; SDAG-NEXT: v_mov_b32_e32 v3, s2
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
; SDAG-NEXT: global_store_b64 v[0:1], v[2:3], off
+; SDAG-NEXT: s_nop 0
; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-NEXT: s_endpgm
entry:
; GISEL-NEXT: v_mov_b32_e32 v1, 0
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GISEL-NEXT: s_nop 0
; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-NEXT: s_endpgm
;
; SDAG-NEXT: v_mov_b32_e32 v1, s2
; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; SDAG-NEXT: global_store_b64 v[2:3], v[0:1], off
+; SDAG-NEXT: s_nop 0
; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-NEXT: s_endpgm
entry:
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
; GISEL-NEXT: v_mov_b32_e32 v3, 0
; GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
+; GISEL-NEXT: s_nop 0
; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-NEXT: s_endpgm
;
; SDAG-NEXT: s_waitcnt_depctr 0xfffe
; SDAG-NEXT: v_mov_b32_e32 v3, s0
; SDAG-NEXT: global_store_b64 v[0:1], v[2:3], off
+; SDAG-NEXT: s_nop 0
; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-NEXT: s_endpgm
entry:
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
; GISEL-NEXT: v_mov_b32_e32 v3, 0
; GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
+; GISEL-NEXT: s_nop 0
; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-NEXT: s_endpgm
;
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
; SDAG-NEXT: v_mov_b32_e32 v3, s2
; SDAG-NEXT: global_store_b64 v[0:1], v[2:3], off
+; SDAG-NEXT: s_nop 0
; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-NEXT: s_endpgm
entry:
; GISEL-NEXT: v_mov_b32_e32 v3, s1
; GISEL-NEXT: v_mov_b32_e32 v2, s0
; GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
+; GISEL-NEXT: s_nop 0
; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-NEXT: s_endpgm
;
; SDAG-NEXT: ; %bb.2: ; %endif
; SDAG-NEXT: s_or_b64 exec, exec, s[2:3]
; SDAG-NEXT: global_store_b64 v[0:1], v[2:3], off
+; SDAG-NEXT: s_nop 0
; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_ldexp_f16_e32 v0, v1, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_ldexp_f16_e32 v0, 2.0, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_ldexp_f16_e64 v0, v0, 2
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_permlane16_b32 v0, v0, s7, s0
; GFX11-NEXT: global_store_b32 v1, v0, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_permlane16_b32 v0, v0, 1, 2
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 1, i32 2, i1 false, i1 false)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_permlane16_b32 v0, v0, s2, 0xc1d1
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 false, i1 false)
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_permlane16_b32 v1, v1, s3, s2
; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX11-GISEL-NEXT: v_permlane16_b32 v0, v0, s3, s4
; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_permlane16_b32 v1, v1, s2, s3
; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_permlane16_b32 v0, v0, s4, s3
; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX11-SDAG-NEXT: v_permlane16_b32 v1, v1, s3, s2
; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX11-GISEL-NEXT: v_permlane16_b32 v0, v0, s3, s4
; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%tidy = call i32 @llvm.amdgcn.workitem.id.y()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_permlane16_b32 v0, v0, s7, s0 op_sel:[1,0]
; GFX11-NEXT: global_store_b32 v1, v0, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 true, i1 false)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_permlane16_b32 v0, v0, s7, s0 op_sel:[0,1]
; GFX11-NEXT: global_store_b32 v1, v0, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 true)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_permlane16_b32 v0, v0, s7, s0 op_sel:[1,1]
; GFX11-NEXT: global_store_b32 v1, v0, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 true, i1 true)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_permlanex16_b32 v0, v0, s7, s0
; GFX11-NEXT: global_store_b32 v1, v0, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_permlanex16_b32 v0, v0, 1, 2
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 1, i32 2, i1 false, i1 false)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_permlanex16_b32 v0, v0, s2, 0xc1d1
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 false, i1 false)
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_permlanex16_b32 v1, v1, s3, s2
; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX11-GISEL-NEXT: v_permlanex16_b32 v0, v0, s3, s4
; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_permlanex16_b32 v1, v1, s2, s3
; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_permlanex16_b32 v0, v0, s4, s3
; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX11-SDAG-NEXT: v_permlanex16_b32 v1, v1, s3, s2
; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX11-GISEL-NEXT: v_permlanex16_b32 v0, v0, s3, s4
; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%tidy = call i32 @llvm.amdgcn.workitem.id.y()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_permlanex16_b32 v0, v0, s7, s0 op_sel:[1,0]
; GFX11-NEXT: global_store_b32 v1, v0, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 true, i1 false)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_permlanex16_b32 v0, v0, s7, s0 op_sel:[0,1]
; GFX11-NEXT: global_store_b32 v1, v0, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 true)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_permlanex16_b32 v0, v0, s7, s0 op_sel:[1,1]
; GFX11-NEXT: global_store_b32 v1, v0, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 true, i1 true)
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_permlane16_b32 v0, v0, s2, s3
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_permlane16_b32 v0, v0, s2, s3
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-NEXT: v_permlane16_b32 v1, v0, s2, s3
; GFX11-SDAG-NEXT: global_store_b32 v2, v1, s[0:1]
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: v_permlane16_b32 v1, v0, s2, s3
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_permlane16_b32 v0, v0, s2, s3 op_sel:[1,0]
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_permlane16_b32 v0, v0, s2, s3 op_sel:[0,1]
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_permlane16_b32 v0, v0, s2, s3 op_sel:[1,1]
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_permlanex16_b32 v0, v0, s2, s3
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_permlanex16_b32 v0, v0, s2, s3
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-NEXT: v_permlanex16_b32 v1, v0, s2, s3
; GFX11-SDAG-NEXT: global_store_b32 v2, v1, s[0:1]
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: v_permlanex16_b32 v1, v0, s2, s3
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_permlanex16_b32 v0, v0, s2, s3 op_sel:[1,0]
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_permlanex16_b32 v0, v0, s2, s3 op_sel:[0,1]
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_permlanex16_b32 v0, v0, s2, s3 op_sel:[1,1]
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_permlane64_b32 v0, v0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%v = call i32 @llvm.amdgcn.permlane64(i32 %src0)
; GFX11-NEXT: v_permlane64_b32 v0, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%v = call i32 @llvm.amdgcn.permlane64(i32 99)
; GFX11-SDAG-NEXT: v_permlane64_b32 v0, v0
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-SDAG-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 glc
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 slc
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_immoffs:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 offset:42
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_ofs:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: buffer_store_b128 v[0:3], v4, s[0:3], 0 offen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-NEXT: buffer_load_b128 v[0:3], v5, s[0:3], 0 offen
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_store_b128 v[0:3], v6, s[0:3], 0 offen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_x1:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: buffer_store_b32 v0, v1, s[0:3], 0 offen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_x2:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: buffer_store_b64 v[0:1], v2, s[0:3], 0 offen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b128 v[1:4], v0, s[0:3], 0 offen offset:4
; GFX11-NEXT: buffer_store_b64 v[5:6], v0, s[0:3], 0 offen offset:28
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a1 = add i32 %a, 4
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b128 v[1:4], v0, s[0:3], 0 offen offset:4
; GFX11-NEXT: buffer_store_b64 v[5:6], v0, s[0:3], 0 offen offset:28
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a = shl i32 %inp, 6
; GFX11-NEXT: buffer_store_b64 v[1:2], v0, s[0:3], 0 offen offset:4
; GFX11-NEXT: buffer_store_b64 v[3:4], v0, s[0:3], 0 offen offset:12 glc
; GFX11-NEXT: buffer_store_b64 v[5:6], v0, s[0:3], 0 offen offset:28 glc slc
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a1 = add i32 %a, 4
; GFX11-LABEL: buffer_store_x2_offen_merged_and:
; GFX11: ; %bb.0:
; GFX11-NEXT: buffer_store_b128 v[1:4], v0, s[0:3], 0 offen offset:4
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a1 = add i32 %a, 4
; GFX11: ; %bb.0:
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX11-NEXT: buffer_store_b128 v[1:4], v0, s[0:3], 0 offen offset:4
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a = shl i32 %inp, 4
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 offset:4
; GFX11-NEXT: buffer_store_b64 v[4:5], off, s[0:3], 0 offset:28
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 4, i32 0, i32 0)
; GFX11-LABEL: buffer_store_x2_offset_merged:
; GFX11: ; %bb.0:
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 offset:4
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v1, <4 x i32> %rsrc, i32 4, i32 0, i32 0)
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b64 v[4:5], off, s[0:3], 0 glc
; GFX11-NEXT: buffer_store_b32 v6, off, s[0:3], 0 slc
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: raw_buffer_store_f16:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_v2f16:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: buffer_store_b32 v0, v1, s[0:3], 0 offen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_v4f16:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: buffer_store_b64 v[0:1], v2, s[0:3], 0 offen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: raw_buffer_store_i16:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_v2i16:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: buffer_store_b32 v0, v1, s[0:3], 0 offen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_v4i16:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: buffer_store_b64 v[0:1], v2, s[0:3], 0 offen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 offset:4
; GFX11-NEXT: buffer_store_b64 v[4:5], off, s[0:3], 0 offset:28
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 4, i32 0, i32 0)
; GFX11-NEXT: buffer_store_b32 v3, off, s[0:3], 0 offset:16
; GFX11-NEXT: buffer_store_b32 v4, off, s[0:3], 0 offset:28
; GFX11-NEXT: buffer_store_b32 v5, off, s[0:3], 0 offset:32
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 4, i32 0, i32 8)
; GFX11-PACKED-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, s4
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_x v0, off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
+; GFX11-PACKED-NEXT: s_nop 0
; GFX11-PACKED-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-PACKED-NEXT: s_endpgm
main_body:
; GFX11-PACKED-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, s4
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_xy v0, off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
+; GFX11-PACKED-NEXT: s_nop 0
; GFX11-PACKED-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-PACKED-NEXT: s_endpgm
main_body:
; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, s4
; GFX11-PACKED-NEXT: v_mov_b32_e32 v1, s5
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_xyz v[0:1], off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
+; GFX11-PACKED-NEXT: s_nop 0
; GFX11-PACKED-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-PACKED-NEXT: s_endpgm
main_body:
; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, s4
; GFX11-PACKED-NEXT: v_mov_b32_e32 v1, s5
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_xyzw v[0:1], off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
+; GFX11-PACKED-NEXT: s_nop 0
; GFX11-PACKED-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-PACKED-NEXT: s_endpgm
main_body:
; GFX11-NEXT: tbuffer_store_format_xyzw v[4:7], off, s[0:3], 0 format:[BUF_FMT_32_32_32_32_UINT] glc
; GFX11-NEXT: tbuffer_store_format_xyzw v[8:11], off, s[0:3], 0 format:78 slc
; GFX11-NEXT: tbuffer_store_format_xyzw v[8:11], off, s[0:3], 0 format:78 glc dlc
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: tbuffer_store_immoffs:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], off, s[0:3], 0 format:117 offset:42
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: tbuffer_store_scalar_and_imm_offs:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], off, s[0:3], s4 format:117 offset:42
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_ofs:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:115 offen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_x1:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: tbuffer_store_format_x v0, off, s[0:3], 0 format:125
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_x2:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: tbuffer_store_format_xy v[0:1], off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_voffset_large_12bit:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offset:4092
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_mov_b32_e32 v4, 0x1000
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_mov_b32_e32 v4, 0xf000
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_mov_b32_e32 v4, 0x7ff000
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_mov_b32_e32 v4, 0xfff000
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-PACKED-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, s4
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_x v0, off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
+; GFX11-PACKED-NEXT: s_nop 0
; GFX11-PACKED-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-PACKED-NEXT: s_endpgm
main_body:
; GFX11-PACKED-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, s4
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_xy v0, off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
+; GFX11-PACKED-NEXT: s_nop 0
; GFX11-PACKED-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-PACKED-NEXT: s_endpgm
main_body:
; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, s4
; GFX11-PACKED-NEXT: v_mov_b32_e32 v1, s5
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_xyz v[0:1], off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
+; GFX11-PACKED-NEXT: s_nop 0
; GFX11-PACKED-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-PACKED-NEXT: s_endpgm
main_body:
; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, s4
; GFX11-PACKED-NEXT: v_mov_b32_e32 v1, s5
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_xyzw v[0:1], off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
+; GFX11-PACKED-NEXT: s_nop 0
; GFX11-PACKED-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-PACKED-NEXT: s_endpgm
main_body:
; GFX11-NEXT: tbuffer_store_format_xyzw v[4:7], off, s[0:3], 0 format:[BUF_FMT_32_32_32_32_UINT] glc
; GFX11-NEXT: tbuffer_store_format_xyzw v[8:11], off, s[0:3], 0 format:78 slc
; GFX11-NEXT: tbuffer_store_format_xyzw v[8:11], off, s[0:3], 0 format:78 glc dlc
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: tbuffer_store_immoffs:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], off, s[0:3], 0 format:117 offset:42
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: tbuffer_store_scalar_and_imm_offs:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], off, s[0:3], s4 format:117 offset:42
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_ofs:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:115 offen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_x1:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: tbuffer_store_format_x v0, off, s[0:3], 0 format:125
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_x2:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: tbuffer_store_format_xy v[0:1], off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_voffset_large_12bit:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offset:4092
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_mov_b32_e32 v4, 0x1000
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_mov_b32_e32 v4, 0xf000
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_mov_b32_e32 v4, 0x7ff000
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_mov_b32_e32 v4, 0xfff000
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: exp mrt0 v0, v0, v0, v0 done
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%ret = call i32 @llvm.amdgcn.s.sendmsg.rtn.i32(i32 128)
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%ret = call i32 @llvm.amdgcn.s.sendmsg.rtn.i32(i32 129)
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ret = call i64 @llvm.amdgcn.s.sendmsg.rtn.i64(i32 130)
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ret = call i64 @llvm.amdgcn.s.sendmsg.rtn.i64(i32 131)
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%ret = call i32 @llvm.amdgcn.s.sendmsg.rtn.i32(i32 132)
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ret = call i64 @llvm.amdgcn.s.sendmsg.rtn.i64(i32 133)
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
%ret = call i32 @llvm.amdgcn.s.sendmsg.rtn.i32(i32 0)
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ret = call i64 @llvm.amdgcn.s.sendmsg.rtn.i64(i32 99999)
; GFX11-NEXT: buffer_store_b128 v[0:3], v12, s[0:3], 0 idxen
; GFX11-NEXT: buffer_store_b128 v[4:7], v12, s[0:3], 0 idxen glc
; GFX11-NEXT: buffer_store_b128 v[8:11], v12, s[0:3], 0 idxen slc
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_mov_b32_e32 v4, 0
; GFX11-NEXT: buffer_store_b128 v[0:3], v4, s[0:3], 0 idxen offset:42
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_idx:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: buffer_store_b128 v[0:3], v4, s[0:3], 0 idxen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, s4
; GFX11-NEXT: buffer_store_b128 v[0:3], v[4:5], s[0:3], 0 idxen offen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_both:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: buffer_store_b128 v[0:3], v[4:5], s[0:3], 0 idxen offen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_mov_b32_e32 v6, v4
; GFX11-NEXT: buffer_store_b128 v[0:3], v[5:6], s[0:3], 0 idxen offen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-NEXT: buffer_load_b128 v[0:3], v5, s[0:3], 0 idxen
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_store_b128 v[0:3], v6, s[0:3], 0 idxen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_x1:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: buffer_store_b32 v0, v1, s[0:3], 0 idxen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_x2:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: buffer_store_b64 v[0:1], v2, s[0:3], 0 idxen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-NEXT: buffer_store_b128 v[0:3], v7, s[0:3], 0 idxen
; GFX11-NEXT: buffer_store_b64 v[4:5], v7, s[0:3], 0 idxen glc
; GFX11-NEXT: buffer_store_b32 v6, v7, s[0:3], 0 idxen slc
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX11-NEXT: buffer_store_b8 v0, v1, s[0:3], 0 idxen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX11-NEXT: buffer_store_b16 v0, v1, s[0:3], 0 idxen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%v2 = fptrunc float %v1 to half
; GFX11-LABEL: struct_buffer_store_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: buffer_store_b32 v0, v1, s[0:3], 0 idxen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.struct.buffer.store.v2f16(<2 x half> %v1, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0)
; GFX11-LABEL: struct_buffer_store_v4f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: buffer_store_b64 v[0:1], v2, s[0:3], 0 idxen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.struct.buffer.store.v4f16(<4 x half> %v1, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0)
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX11-NEXT: buffer_store_b16 v0, v1, s[0:3], 0 idxen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: struct_buffer_store_vif16:
; GFX11: ; %bb.0:
; GFX11-NEXT: buffer_store_b32 v0, v1, s[0:3], 0 idxen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.struct.buffer.store.v2i16(<2 x i16> %v1, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0)
; GFX11-LABEL: struct_buffer_store_v4i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: buffer_store_b64 v[0:1], v2, s[0:3], 0 idxen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.struct.buffer.store.v4i16(<4 x i16> %v1, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0)
; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, s4
; GFX11-PACKED-NEXT: v_mov_b32_e32 v1, s5
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_x v0, v1, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
+; GFX11-PACKED-NEXT: s_nop 0
; GFX11-PACKED-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-PACKED-NEXT: s_endpgm
main_body:
; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, s4
; GFX11-PACKED-NEXT: v_mov_b32_e32 v1, s5
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_xy v0, v1, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
+; GFX11-PACKED-NEXT: s_nop 0
; GFX11-PACKED-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-PACKED-NEXT: s_endpgm
main_body:
; GFX11-PACKED-NEXT: v_mov_b32_e32 v1, s5
; GFX11-PACKED-NEXT: v_mov_b32_e32 v2, s6
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_xyz v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
+; GFX11-PACKED-NEXT: s_nop 0
; GFX11-PACKED-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-PACKED-NEXT: s_endpgm
main_body:
; GFX11-PACKED-NEXT: v_mov_b32_e32 v1, s5
; GFX11-PACKED-NEXT: v_mov_b32_e32 v2, s6
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_xyzw v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
+; GFX11-PACKED-NEXT: s_nop 0
; GFX11-PACKED-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-PACKED-NEXT: s_endpgm
main_body:
; GFX11-NEXT: tbuffer_store_format_xyzw v[4:7], v12, s[0:3], 0 format:[BUF_FMT_32_32_32_32_UINT] idxen glc
; GFX11-NEXT: tbuffer_store_format_xyzw v[8:11], v12, s[0:3], 0 format:78 idxen slc
; GFX11-NEXT: tbuffer_store_format_xyzw v[8:11], v12, s[0:3], 0 format:78 idxen glc dlc
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_mov_b32_e32 v4, 0
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:117 idxen offset:42
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_mov_b32_e32 v4, 0
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], s4 format:117 idxen offset:42
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_idx:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_8_8_8_8_SINT] idxen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, s4
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:115 idxen offen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_both:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:70 idxen offen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v5, s[0:3], 0 idxen
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v6, s[0:3], 0 format:[BUF_FMT_8_8_8_8_UINT] idxen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_x1:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: tbuffer_store_format_x v0, v1, s[0:3], 0 format:125 idxen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_x2:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: tbuffer_store_format_xy v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_mov_b32_e32 v4, 0
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:4092
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v5, 0x1000 :: v_dual_mov_b32 v4, s4
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v5, 0xf000 :: v_dual_mov_b32 v4, s4
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v5, 0x7ff000 :: v_dual_mov_b32 v4, s4
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v5, 0xfff000 :: v_dual_mov_b32 v4, s4
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, s4
; GFX11-PACKED-NEXT: v_mov_b32_e32 v1, s5
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_x v0, v1, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
+; GFX11-PACKED-NEXT: s_nop 0
; GFX11-PACKED-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-PACKED-NEXT: s_endpgm
main_body:
; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, s4
; GFX11-PACKED-NEXT: v_mov_b32_e32 v1, s5
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_xy v0, v1, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
+; GFX11-PACKED-NEXT: s_nop 0
; GFX11-PACKED-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-PACKED-NEXT: s_endpgm
main_body:
; GFX11-PACKED-NEXT: v_mov_b32_e32 v1, s5
; GFX11-PACKED-NEXT: v_mov_b32_e32 v2, s6
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_xyz v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
+; GFX11-PACKED-NEXT: s_nop 0
; GFX11-PACKED-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-PACKED-NEXT: s_endpgm
main_body:
; GFX11-PACKED-NEXT: v_mov_b32_e32 v1, s5
; GFX11-PACKED-NEXT: v_mov_b32_e32 v2, s6
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_xyzw v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
+; GFX11-PACKED-NEXT: s_nop 0
; GFX11-PACKED-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-PACKED-NEXT: s_endpgm
main_body:
; GFX11-NEXT: tbuffer_store_format_xyzw v[4:7], v12, s[0:3], 0 format:[BUF_FMT_32_32_32_32_UINT] idxen glc
; GFX11-NEXT: tbuffer_store_format_xyzw v[8:11], v12, s[0:3], 0 format:78 idxen slc
; GFX11-NEXT: tbuffer_store_format_xyzw v[8:11], v12, s[0:3], 0 format:78 idxen glc dlc
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_mov_b32_e32 v4, 0
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:117 idxen offset:42
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_mov_b32_e32 v4, 0
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], s4 format:117 idxen offset:42
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_idx:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_8_8_8_8_SINT] idxen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, s4
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:115 idxen offen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_both:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:70 idxen offen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v5, s[0:3], 0 idxen
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v6, s[0:3], 0 format:[BUF_FMT_8_8_8_8_UINT] idxen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_x1:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: tbuffer_store_format_x v0, v1, s[0:3], 0 format:125 idxen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-LABEL: buffer_store_x2:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: tbuffer_store_format_xy v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_mov_b32_e32 v4, 0
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:4092
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v5, 0x1000 :: v_dual_mov_b32 v4, s4
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v5, 0xf000 :: v_dual_mov_b32 v4, s4
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v5, 0x7ff000 :: v_dual_mov_b32 v4, s4
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v5, 0xfff000 :: v_dual_mov_b32 v4, s4
; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[24:25], v[20:23], off offset:16
; W32-NEXT: global_store_b128 v[24:25], v[16:19], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[24:25], v[20:23], off offset:16
; W32-NEXT: global_store_b128 v[24:25], v[16:19], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[24:25], v[20:23], off offset:16
; W32-NEXT: global_store_b128 v[24:25], v[16:19], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[24:25], v[20:23], off offset:16
; W32-NEXT: global_store_b128 v[24:25], v[16:19], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[24:25], v[20:23], off offset:16
; W32-NEXT: global_store_b128 v[24:25], v[16:19], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[24:25], v[20:23], off offset:16
; W32-NEXT: global_store_b128 v[24:25], v[16:19], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[16:17], v[12:15], off offset:16
; W32-NEXT: global_store_b128 v[16:17], v[8:11], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[16:17], v[12:15], off offset:16
; W32-NEXT: global_store_b128 v[16:17], v[8:11], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[16:17], v[12:15], off offset:16
; W32-NEXT: global_store_b128 v[16:17], v[8:11], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[16:17], v[12:15], off offset:16
; W32-NEXT: global_store_b128 v[16:17], v[8:11], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[16:17], v[12:15], off offset:16
; W32-NEXT: global_store_b128 v[16:17], v[8:11], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[16:17], v[12:15], off offset:16
; W32-NEXT: global_store_b128 v[16:17], v[8:11], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[16:17], v[12:15], off offset:16
; W32-NEXT: global_store_b128 v[16:17], v[8:11], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[16:17], v[12:15], off offset:16
; W32-NEXT: global_store_b128 v[16:17], v[8:11], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16
; W32-NEXT: global_store_b128 v[12:13], v[4:7], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16
; W32-NEXT: global_store_b128 v[12:13], v[4:7], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16
; W32-NEXT: global_store_b128 v[12:13], v[4:7], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16
; W32-NEXT: global_store_b128 v[12:13], v[4:7], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16
; W32-NEXT: global_store_b128 v[12:13], v[4:7], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16
; W32-NEXT: global_store_b128 v[12:13], v[4:7], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16
; W32-NEXT: global_store_b128 v[12:13], v[4:7], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16
; W32-NEXT: global_store_b128 v[12:13], v[4:7], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_f32_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19]
; W64-NEXT: global_store_b128 v[20:21], v[16:19], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_f32_16x16x16_bf16 v[16:19], v[0:7], v[8:15], v[16:19]
; W64-NEXT: global_store_b128 v[20:21], v[16:19], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_f16_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19]
; W64-NEXT: global_store_b128 v[20:21], v[16:19], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_f16_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19] op_sel:[0,0,1]
; W64-NEXT: global_store_b128 v[20:21], v[16:19], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_bf16_16x16x16_bf16 v[16:19], v[0:7], v[8:15], v[16:19]
; W64-NEXT: global_store_b128 v[20:21], v[16:19], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_bf16_16x16x16_bf16 v[16:19], v[0:7], v[8:15], v[16:19] op_sel:[0,0,1]
; W64-NEXT: global_store_b128 v[20:21], v[16:19], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11]
; W64-NEXT: global_store_b128 v[12:13], v[8:11], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,1,0]
; W64-NEXT: global_store_b128 v[12:13], v[8:11], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,0,0]
; W64-NEXT: global_store_b128 v[12:13], v[8:11], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,1,0]
; W64-NEXT: global_store_b128 v[12:13], v[8:11], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] clamp
; W64-NEXT: global_store_b128 v[12:13], v[8:11], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,1,0] clamp
; W64-NEXT: global_store_b128 v[12:13], v[8:11], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,0,0] clamp
; W64-NEXT: global_store_b128 v[12:13], v[8:11], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,1,0] clamp
; W64-NEXT: global_store_b128 v[12:13], v[8:11], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7]
; W64-NEXT: global_store_b128 v[8:9], v[4:7], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,1,0]
; W64-NEXT: global_store_b128 v[8:9], v[4:7], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,0,0]
; W64-NEXT: global_store_b128 v[8:9], v[4:7], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,1,0]
; W64-NEXT: global_store_b128 v[8:9], v[4:7], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] clamp
; W64-NEXT: global_store_b128 v[8:9], v[4:7], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,1,0] clamp
; W64-NEXT: global_store_b128 v[8:9], v[4:7], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,0,0] clamp
; W64-NEXT: global_store_b128 v[8:9], v[4:7], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64: ; %bb.0: ; %bb
; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,1,0] clamp
; W64-NEXT: global_store_b128 v[8:9], v[4:7], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_ceil_f16_e32 v0, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_ceil_f16_e32 v1, v1
; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cos_f16_e32 v1, v1
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a.val = load half, ptr addrspace(1) %a
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_pack_b32_f16 v1, v1, v2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a.val = load <2 x half>, ptr addrspace(1) %a
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_floor_f16_e32 v0, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_floor_f16_e32 v1, v1
; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s2, s2, 3
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
; GFX11CHECK-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11CHECK-NEXT: s_nop 0
; GFX11CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11CHECK-NEXT: s_endpgm
%result = call i1 @llvm.is.fpclass.f16(half %x, i32 3)
; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
; GFX11CHECK-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11CHECK-NEXT: s_nop 0
; GFX11CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11CHECK-NEXT: s_endpgm
%result = call i1 @llvm.is.fpclass.f32(float %x, i32 3) ; nan
; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
; GFX11CHECK-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11CHECK-NEXT: s_nop 0
; GFX11CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11CHECK-NEXT: s_endpgm
%result = call i1 @llvm.is.fpclass.f64(double %x, i32 3) ; nan
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s3
; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX1100-SDAG-NEXT: s_nop 0
; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-SDAG-NEXT: s_endpgm
;
; GFX1100-GISEL-NEXT: v_dual_cndmask_b32 v0, v0, v1 :: v_dual_mov_b32 v1, 0
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1100-GISEL-NEXT: s_nop 0
; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-GISEL-NEXT: s_endpgm
;
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v0, v4 :: v_dual_sub_f32 v0, v2, v5
; GFX1100-SDAG-NEXT: global_store_b64 v3, v[0:1], s[0:1]
+; GFX1100-SDAG-NEXT: s_nop 0
; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-SDAG-NEXT: s_endpgm
;
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_sub_f32 v1, v1, v5
; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1100-GISEL-NEXT: s_nop 0
; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-GISEL-NEXT: s_endpgm
;
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v3, v6
; GFX1100-SDAG-NEXT: global_store_b96 v4, v[0:2], s[0:1]
+; GFX1100-SDAG-NEXT: s_nop 0
; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-SDAG-NEXT: s_endpgm
;
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v2, v2, v6
; GFX1100-GISEL-NEXT: global_store_b96 v3, v[0:2], s[0:1]
+; GFX1100-GISEL-NEXT: s_nop 0
; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-GISEL-NEXT: s_endpgm
;
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v5, v14 :: v_dual_sub_f32 v0, v6, v15
; GFX1100-SDAG-NEXT: global_store_b128 v7, v[0:3], s[0:1]
+; GFX1100-SDAG-NEXT: s_nop 0
; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-SDAG-NEXT: s_endpgm
;
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v3, v3, v15
; GFX1100-GISEL-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX1100-GISEL-NEXT: s_nop 0
; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-GISEL-NEXT: s_endpgm
;
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s3
; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX1100-SDAG-NEXT: s_nop 0
; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-SDAG-NEXT: s_endpgm
;
; GFX1100-GISEL-NEXT: v_dual_cndmask_b32 v0, v0, v1 :: v_dual_mov_b32 v1, 0
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1100-GISEL-NEXT: s_nop 0
; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-GISEL-NEXT: s_endpgm
;
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v0, v4 :: v_dual_sub_f32 v0, v2, v5
; GFX1100-SDAG-NEXT: global_store_b64 v3, v[0:1], s[0:1]
+; GFX1100-SDAG-NEXT: s_nop 0
; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-SDAG-NEXT: s_endpgm
;
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_sub_f32 v1, v1, v5
; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1100-GISEL-NEXT: s_nop 0
; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-GISEL-NEXT: s_endpgm
;
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v3, v6
; GFX1100-SDAG-NEXT: global_store_b96 v4, v[0:2], s[0:1]
+; GFX1100-SDAG-NEXT: s_nop 0
; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-SDAG-NEXT: s_endpgm
;
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v2, v2, v6
; GFX1100-GISEL-NEXT: global_store_b96 v3, v[0:2], s[0:1]
+; GFX1100-GISEL-NEXT: s_nop 0
; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-GISEL-NEXT: s_endpgm
;
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v5, v14 :: v_dual_sub_f32 v0, v6, v15
; GFX1100-SDAG-NEXT: global_store_b128 v7, v[0:3], s[0:1]
+; GFX1100-SDAG-NEXT: s_nop 0
; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-SDAG-NEXT: s_endpgm
;
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v3, v3, v15
; GFX1100-GISEL-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX1100-GISEL-NEXT: s_nop 0
; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-GISEL-NEXT: s_endpgm
;
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0
; GFX1100-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX1100-SDAG-NEXT: s_nop 0
; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-SDAG-NEXT: s_endpgm
;
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v1 :: v_dual_mov_b32 v1, 0
; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1100-GISEL-NEXT: s_nop 0
; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-GISEL-NEXT: s_endpgm
;
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v1, v0 :: v_dual_sub_f32 v0, v3, v2
; GFX1100-SDAG-NEXT: global_store_b64 v4, v[0:1], s[0:1]
+; GFX1100-SDAG-NEXT: s_nop 0
; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-SDAG-NEXT: s_endpgm
;
; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1100-GISEL-NEXT: s_nop 0
; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-GISEL-NEXT: s_endpgm
;
; GFX1100-SDAG-NEXT: v_dual_sub_f32 v2, v2, v0 :: v_dual_sub_f32 v1, v4, v1
; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v5, v3
; GFX1100-SDAG-NEXT: global_store_b96 v6, v[0:2], s[0:1]
+; GFX1100-SDAG-NEXT: s_nop 0
; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-SDAG-NEXT: s_endpgm
;
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v2, v2, v5
; GFX1100-GISEL-NEXT: global_store_b96 v3, v[0:2], s[0:1]
+; GFX1100-GISEL-NEXT: s_nop 0
; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-GISEL-NEXT: s_endpgm
;
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v6, v4 :: v_dual_sub_f32 v0, v7, v5
; GFX1100-SDAG-NEXT: global_store_b128 v9, v[0:3], s[0:1]
+; GFX1100-SDAG-NEXT: s_nop 0
; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-SDAG-NEXT: s_endpgm
;
; GFX1100-GISEL-NEXT: v_dual_sub_f32 v2, v2, v6 :: v_dual_sub_f32 v3, v3, v7
; GFX1100-GISEL-NEXT: v_mov_b32_e32 v4, 0
; GFX1100-GISEL-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX1100-GISEL-NEXT: s_nop 0
; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-GISEL-NEXT: s_endpgm
;
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_max_f16_e32 v0, 0x4200, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_max_f16_e32 v0, 4.0, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pk_max_f16 v0, v1, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pk_max_f16 v0, 0x44004200, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pk_max_f16 v0, 0x42004400, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0 offset:4
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_pk_max_f16 v0, v3, v2
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_pk_max_f16 v1, 0x44004200, v0
; GFX11-NEXT: v_pk_max_f16 v0, 0x40004800, v2
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_max_f16_e32 v1, v1, v1
; GFX11-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
; GFX11-NEXT: v_min_f16_e32 v0, 0x4200, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
; GFX11-NEXT: v_min_f16_e32 v0, 4.0, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: v_pk_min_f16 v0, v1, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: v_pk_min_f16 v0, 0x44004200, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: v_pk_min_f16 v0, 0x42004400, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0 offset:4
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_pk_min_f16 v1, v1, v0
; GFX11-NEXT: v_pk_min_f16 v0, v3, v2
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_pk_min_f16 v1, 0x44004200, v0
; GFX11-NEXT: v_pk_min_f16 v0, 0x40004800, v2
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
bb:
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
bb:
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_rndne_f16_e32 v0, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_rndne_f16_e32 v1, v1
; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc_lo
; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_dual_add_f32 v1, v0, v1 :: v_dual_add_f32 v0, v2, v3
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_add_f32_e32 v0, v6, v8
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 offset:16
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_add_f16_e32 v0, v0, v1
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_sin_f16_e32 v1, v1
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a.val = load half, ptr addrspace(1) %a
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_pack_b32_f16 v1, v1, v2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a.val = load <2 x half>, ptr addrspace(1) %a
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_sqrt_f16_e32 v0, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_trunc_f16_e32 v0, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_trunc_f16_e32 v1, v1
; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_pk_lshrrev_b16 v1, s3, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = lshr <2 x i16> %lhs, %rhs
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_lshrrev_b16 v0, v1, v0
; GFX11-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_lshrrev_b16 v1, s0, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_lshrrev_b16 v1, v1, s0
; GFX11-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_lshrrev_b16 v1, v1, 8 op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_lshrrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_pk_lshrrev_b16 v1, v3, v1
; GFX11-NEXT: v_pk_lshrrev_b16 v0, v2, v0
; GFX11-NEXT: global_store_b64 v4, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_pk_lshrrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX11-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mad_u16 v0, v1, v2, v0
; GFX11-NEXT: global_store_b16 v3, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ext0 = zext i32 %arg0 to i64
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1] glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1] glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1] glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1] glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: global_load_b32 v0, v0, s[0:1] slc dlc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v1, v0, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v0, v0, s[0:1] slc dlc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v1, v0, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3] glc slc dlc
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] glc slc dlc
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v1, s0 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3] glc slc dlc
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v1, s0 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] glc slc dlc
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1] glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1] glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1] glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1] glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1] glc dlc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1] glc dlc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: global_load_b32 v0, v0, s[0:1] glc dlc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v1, v0, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v0, v0, s[0:1] glc dlc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v1, v0, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3] dlc
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] dlc
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: v_dual_mov_b32 v1, s0 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3] dlc
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: v_dual_mov_b32 v1, s0 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] dlc
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1] glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1] glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
; GFX11-WGP-NEXT: ds_load_b32 v0, v0
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: ds_load_b32 v0, v0
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: ds_load_b32 v0, v0
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: ds_load_b32 v0, v0
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: ds_load_b32 v0, v0
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: ds_load_b32 v0, v0
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: ds_load_b32 v0, v0
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: ds_load_b32 v0, v0
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: scratch_load_b32 v0, off, s2 slc dlc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: scratch_load_b32 v0, off, s2 slc dlc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(5) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: scratch_load_b32 v0, v0, off slc dlc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: scratch_load_b32 v0, v0, off slc dlc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(5) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: scratch_load_b32 v0, off, s2 glc dlc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: scratch_load_b32 v0, off, s2 glc dlc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(5) %in, ptr addrspace(1) %out) {
; GFX11-WGP-NEXT: scratch_load_b32 v0, v0, off glc dlc
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-WGP-NEXT: s_nop 0
; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-NEXT: scratch_load_b32 v0, v0, off glc dlc
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-CU-NEXT: s_nop 0
; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-CU-NEXT: s_endpgm
ptr addrspace(5) %in, ptr addrspace(1) %out) {
; SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; SDAG-NEXT: s_mov_b32 s4, s3
; SDAG-NEXT: global_store_b32 v0, v1, s[4:5]
+; SDAG-NEXT: s_nop 0
; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-NEXT: s_endpgm
;
; GISEL-NEXT: s_mov_b32 s7, s4
; GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
; GISEL-NEXT: global_store_b32 v1, v0, s[6:7]
+; GISEL-NEXT: s_nop 0
; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-NEXT: s_endpgm
%smax = call i32 @llvm.smax.i32(i32 %a, i32 %b)
; SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; SDAG-NEXT: s_mov_b32 s4, s3
; SDAG-NEXT: global_store_b32 v0, v1, s[4:5]
+; SDAG-NEXT: s_nop 0
; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-NEXT: s_endpgm
;
; GISEL-NEXT: s_mov_b32 s7, s4
; GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
; GISEL-NEXT: global_store_b32 v1, v0, s[6:7]
+; GISEL-NEXT: s_nop 0
; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-NEXT: s_endpgm
%smax = call i32 @llvm.umax.i32(i32 %a, i32 %b)
; SDAG-NEXT: s_mov_b32 s4, s3
; SDAG-NEXT: v_maxmin_f32 v0, s0, s1, v0
; SDAG-NEXT: global_store_b32 v1, v0, s[4:5]
+; SDAG-NEXT: s_nop 0
; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-NEXT: s_endpgm
;
; GISEL-NEXT: s_mov_b32 s7, s4
; GISEL-NEXT: v_maxmin_f32 v0, s0, s1, v0
; GISEL-NEXT: global_store_b32 v1, v0, s[6:7]
+; GISEL-NEXT: s_nop 0
; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-NEXT: s_endpgm
%smax = call float @llvm.maxnum.f32(float %a, float %b)
; SDAG-NEXT: s_mov_b32 s4, s3
; SDAG-NEXT: v_maxmin_f16 v0, s0, s1, v0
; SDAG-NEXT: global_store_b16 v1, v0, s[4:5]
+; SDAG-NEXT: s_nop 0
; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-NEXT: s_endpgm
;
; GISEL-NEXT: s_mov_b32 s7, s4
; GISEL-NEXT: v_maxmin_f16 v0, s0, s1, v0
; GISEL-NEXT: global_store_b16 v1, v0, s[6:7]
+; GISEL-NEXT: s_nop 0
; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-NEXT: s_endpgm
%smax = call half @llvm.maxnum.f16(half %a, half %b)
; GFX11-NEXT: v_mul_lo_u32 v1, v1, v3
; GFX11-NEXT: v_mul_lo_u32 v0, v0, v2
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mul_lo_u32 v1, v1, v5
; GFX11-NEXT: v_mul_lo_u32 v0, v0, v4
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_mov_b32 s6, -1
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mul_lo_u32 v0, v1, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mul_hi_i32 v1, 0x50, v0
; GFX11-NEXT: v_mul_lo_u32 v0, 0x50, v0
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mul_hi_i32 v1, v0, 9
; GFX11-NEXT: v_mul_lo_u32 v0, v0, 9
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mul_lo_u32 v0, v0, v1
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_mov_b32 s0, s4
; GFX11-NEXT: s_mov_b32 s1, s5
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: v_add_nc_u32_e32 v1, v4, v1
; GFX11-NEXT: v_add_nc_u32_e32 v1, v1, v3
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_add_co_ci_u32_e32 v11, vcc_lo, v7, v0, vcc_lo
; GFX11-NEXT: global_store_b128 v16, v[8:11], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:1 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i8, ptr addrspace(1) %p, i64 1
; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:2047 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i8, ptr addrspace(1) %p, i64 2047
; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i8, ptr addrspace(1) %p, i64 4095
; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i8, ptr addrspace(1) %p, i64 8191
; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:-2048 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i8, ptr addrspace(1) %p, i64 -2048
; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:-4096 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
;
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off glc dlc
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
%gep = getelementptr i8, ptr addrspace(1) %p, i64 -8192
; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i8, ptr addrspace(1) %p, i64 4095
; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i8, ptr addrspace(1) %p, i64 8191
; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i8, ptr addrspace(1) %p, i64 16383
; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:-4096 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
;
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off glc dlc
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
%gep = getelementptr i8, ptr addrspace(1) %p, i64 -8192
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
;
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off glc dlc
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
%gep = getelementptr i8, ptr addrspace(1) %p, i64 -16384
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
;
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:2047 glc dlc
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
%gep = getelementptr i8, ptr addrspace(1) %p, i64 8589936639
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
;
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:2048 glc dlc
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
%gep = getelementptr i8, ptr addrspace(1) %p, i64 8589936640
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
;
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 glc dlc
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
%gep = getelementptr i8, ptr addrspace(1) %p, i64 8589938687
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
;
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off glc dlc
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
%gep = getelementptr i8, ptr addrspace(1) %p, i64 8589938688
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
;
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 glc dlc
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
%gep = getelementptr i8, ptr addrspace(1) %p, i64 8589942783
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
;
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off glc dlc
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-SDAG-NEXT: s_endpgm
%gep = getelementptr i8, ptr addrspace(1) %p, i64 8589942784
; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854773761
; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854773760
; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854771713
; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854771712
; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854767617
; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854767616
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mul_f32_e32 v1, 0.5, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], 0.5
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mul_f32_e32 v1, 0.5, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], 0.5
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mul_f32_e32 v0, 0.5, v0
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd float %a, 1.0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], 0.5
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd double %a, 1.0
; GFX11: ; %bb.0:
; GFX11-NEXT: v_add_f32_e64 v0, v0, 1.0 div:2
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd float %a, 1.0
; GFX11: ; %bb.0:
; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 div:2
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd nsz double %a, 1.0
; GFX11: ; %bb.0:
; GFX11-NEXT: v_add_f32_e64 v0, v0, 1.0 mul:2
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd float %a, 1.0
; GFX11: ; %bb.0:
; GFX11-NEXT: v_med3_f32 v0, v0, v1, v2 mul:2
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%fmed3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float %z)
; GFX11: ; %bb.0:
; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 mul:2
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd nsz double %a, 1.0
; GFX11: ; %bb.0:
; GFX11-NEXT: v_add_f32_e64 v0, v0, 1.0 mul:4
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd float %a, 1.0
; GFX11: ; %bb.0:
; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 mul:4
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd nsz double %a, 1.0
; GFX11-NEXT: global_store_b32 v[0:1], v1, off
; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd float %a, 1.0
; GFX11: ; %bb.0:
; GFX11-NEXT: v_add_f32_e64 v0, v0, 1.0 mul:4
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd float %a, 1.0
; GFX11: ; %bb.0:
; GFX11-NEXT: v_add_f32_e64 v0, v0, 1.0 clamp div:2
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd float %a, 1.0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mul_f32_e32 v0, 0.5, v0
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd float %a, 1.0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mul_f32_e64 v0, |v0|, 0.5
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd float %a, 1.0
; GFX11: ; %bb.0:
; GFX11-NEXT: v_add_f32_e64 v0, v0, v0 clamp
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd float %a, %a
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_add_f32_e32 v0, v0, v0
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%max = call float @llvm.maxnum.f32(float %a, float 0.0)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_add_f32_e64 v0, |v0|, |v0|
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%x = fadd float %a, 1.0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_add_f32_e64 v0, |v0|, v0
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%x = fadd float %a, 1.0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_add_f32_e64 v0, v0, |v0|
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%x = fadd float %a, 1.0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mul_f32_e32 v0, 0.5, v0
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd float %a, 1.0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mul_f32_e32 v0, 0.5, v0
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd float %a, 1.0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], 0.5
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd double %a, 1.0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_add_f32_e32 v0, v0, v0
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd float %a, 1.0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[0:1]
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd double %a, 1.0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mul_f16_e32 v0, 0.5, v0
; GFX11-NEXT: global_store_b16 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd half %a, 1.0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_add_f16_e32 v0, v0, v0
; GFX11-NEXT: global_store_b16 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd half %a, 1.0
; GFX11: ; %bb.0:
; GFX11-NEXT: v_add_f16_e64 v0, v0, 1.0 div:2
; GFX11-NEXT: global_store_b16 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = fadd half %a, 1.0
; GFX11-NEXT: v_add_f32_e64 v1, v1, v0 mul:2
; GFX11-NEXT: v_mul_f32_e32 v0, v1, v0
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%mul = fmul float %a, %a
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX11-NEXT: global_store_b64 v16, v[0:1], s[34:35]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s35, 0, s0
; GFX11-NEXT: global_store_b64 v[0:1], v[3:4], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_add3_u32 v0, v3, v1, v0
; GFX11-NEXT: global_store_b32 v6, v0, s[34:35]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v4, v0
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v5, v1, vcc_lo
; GFX11-NEXT: global_store_b64 v8, v[0:1], s[34:35]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_add3_u32 v0, v2, v0, v3
; GFX11-NEXT: global_store_b32 v6, v0, s[34:35]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX11-NEXT: global_store_b64 v12, v[0:1], s[36:37]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %buffer2) {
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX11-NEXT: global_store_b64 v16, v[0:1], s[34:35]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
; GFX11-NEXT: global_store_b64 v4, v[0:1], s[34:35]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_add_co_u32 v0, s0, s2, v0
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s0
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v1, v2, s[0:1]
; GFX11-NEXT: global_store_b8 v1, v0, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%sadd = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) nounwind
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: global_store_b8 v0, v2, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a = load i32, ptr addrspace(1) %aptr, align 4
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: global_store_b8 v2, v3, s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b64 v6, v[4:5], s[4:5]
; GFX11-NEXT: global_store_b8 v6, v0, s[6:7]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a = load i64, ptr addrspace(1) %aptr, align 4
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b64 v5, v[3:4], s[0:1]
; GFX11-NEXT: global_store_b64 v5, v[0:1], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a = load <2 x i32>, ptr addrspace(1) %aptr, align 4
; GFX11-NEXT: v_cmp_lt_f16_e32 vcc_lo, v0, v1
; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
; GFX11-NEXT: buffer_store_b16 v0, off, s[12:15], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_lt_f16_e32 vcc_lo, 0.5, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0.5, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3800, v2, vcc_lo
; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cmp_lt_f16_e32 vcc_lo, v0, v1
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3800, v2, vcc_lo
; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_dual_cndmask_b32 v1, v4, v7 :: v_dual_and_b32 v0, 0xffff, v0
; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[12:15], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_mov_b32 s4, s0
; GFX11-NEXT: s_mov_b32 s5, s1
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = shl <2 x i16> %lhs, %rhs
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_lshlrev_b16 v0, v1, v0
; GFX11-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_lshlrev_b16 v1, s0, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_lshlrev_b16 v1, v1, s0
; GFX11-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_lshlrev_b16 v1, v1, 8 op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_pk_lshlrev_b16 v1, v3, v1
; GFX11-NEXT: v_pk_lshlrev_b16 v0, v2, v0
; GFX11-NEXT: global_store_b64 v4, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX11-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 64, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v0, v2, s[0:1] dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_sub_nc_u32_e32 v1, 64, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_add_nc_u32_e32 v1, 0xffffffbf, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_sub_nc_u32_e32 v1, 0x41, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_add_nc_u32_e32 v1, 16, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_sub_nc_u32_e32 v1, -16, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_add_nc_u32_e32 v1, 17, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_sub_nc_u32_e32 v1, 0xffffffef, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_sub_nc_u16 v1, v1, 64
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b16 v0, v2, s[0:1] dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_sub_i16 v1, v1, 64 op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_sub_i16 v1, v1, 0x400007
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_sub_i16 v1, v1, 0x7b0040
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_sub_i16 v1, v1, 7
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_sub_i16 v1, v1, 16 op_sel:[0,1] op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_sub_i16 v1, v1, 0xc400 op_sel:[0,1] op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_sub_i16 v1, v1, 0x4400 op_sel:[0,1] op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_sub_u16 v1, v1, 32
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_sub_u16 v1, v1, 16 op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_sub_u16 v1, v1, 16 op_sel:[0,1] op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_sub_u16 v1, v1, 16
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_sub_u16 v1, v1, 0x3c00 op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_sub_u16 v1, v1, 0xbc00 op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_sub_u16 v1, v1, 0xc000 op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_sub_u16 v1, v1, 0x4000 op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_sub_u16 v1, v1, 32
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX11-NEXT: global_store_b16 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = sitofp i64 %in to half
; GFX11-NEXT: v_ldexp_f32 v1, v1, v2
; GFX11-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_ldexp_f32 v0, v0, s2
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = sitofp i64 %in to float
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_ldexp_f32 v1, v1, v2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_ldexp_f32 v1, v0, s2
; GFX11-NEXT: v_ldexp_f32 v0, v2, s3
; GFX11-NEXT: global_store_b64 v3, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = sitofp <2 x i64> %in to <2 x float>
; GFX11-NEXT: v_ldexp_f32 v1, v6, v11
; GFX11-NEXT: v_ldexp_f32 v0, v4, v5
; GFX11-NEXT: global_store_b128 v7, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX11-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = sitofp <2 x i64> %in to <2 x half>
; GFX11-NEXT: v_pack_b32_f16 v1, v1, v3
; GFX11-NEXT: v_pack_b32_f16 v0, v4, v2
; GFX11-NEXT: global_store_b64 v5, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_f16_i16_e32 v0, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cvt_f16_i16_e32 v1, v1
; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a = load float, ptr addrspace(1) %in0
; GFX11-NEXT: s_cbranch_scc0 .LBB8_2
; GFX11-NEXT: ; %bb.1: ; %exit
; GFX11-NEXT: global_store_b32 v[0:1], v9, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB8_2: ; %bb
; GFX11-NEXT: v_mov_b32_e64 v9, -2
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: global_store_b32 v[0:1], v9, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB8_4:
; GFX11-NEXT: v_mov_b32_e32 v0, 8
; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB10_4:
; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: .LBB11_5: ; %end
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB11_6:
; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: .LBB13_5: ; %UnifiedReturnBlock
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB13_6:
; GFX11-NEXT: s_mov_b32 s6, -1
; GFX11-NEXT: v_pk_sub_i16 v0, v1, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_pk_sub_i16 v0, s2, s0
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a = load <2 x i16>, ptr addrspace(4) %in0
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a = load <2 x i16>, ptr addrspace(4) %in0
; GFX11-NEXT: s_mov_b32 s4, s0
; GFX11-NEXT: s_mov_b32 s5, s1
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%add = sub <2 x i16> %a, %b
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: v_pk_sub_i16 v0, v0, 0x1c8007b
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: v_pk_sub_i16 v0, v0, 0xfc21fcb3
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: v_pk_sub_i16 v0, v0, -1 op_sel_hi:[1,0]
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: v_pk_sub_i16 v0, v0, 32
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: v_pk_sub_i16 v0, v0, 0x3f80 op_sel:[0,1] op_sel_hi:[1,0]
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_alignbit_b32 v2, 0, v0, 16
; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v0, 0xffff, v0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_ashrrev_i32_e32 v1, 16, v0
; GFX11-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX11-NEXT: global_store_b16 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = uitofp i64 %in to half
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_ldexp_f32 v0, v0, s2
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = uitofp i64 %in to float
; GFX11-NEXT: v_cvt_f32_u32_e32 v1, v1
; GFX11-NEXT: v_ldexp_f32 v1, v1, v2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: v_ldexp_f32 v1, v0, s2
; GFX11-NEXT: v_ldexp_f32 v0, v2, s3
; GFX11-NEXT: global_store_b64 v3, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = uitofp <2 x i64> %in to <2 x float>
; GFX11-NEXT: v_ldexp_f32 v1, v6, v11
; GFX11-NEXT: v_ldexp_f32 v0, v4, v5
; GFX11-NEXT: global_store_b128 v7, v[0:3], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX11-NEXT: global_store_b32 v2, v0, s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = uitofp <2 x i64> %in to <2 x half>
; GFX11-NEXT: v_pack_b32_f16 v1, v1, v3
; GFX11-NEXT: v_pack_b32_f16 v0, v4, v2
; GFX11-NEXT: global_store_b64 v5, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cvt_f16_u16_e32 v0, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_cvt_f16_u16_e32 v1, v1
; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%a = load float, ptr addrspace(1) %in0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_add_f16_e32 v0, 0x4900, v0
; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
; GFX11-NEXT: v_add_f16_e32 v0, 0x4900, v0
; GFX11-NEXT: buffer_store_b16 v1, off, s[12:15], 0
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r0,
; SDAG-GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; SDAG-GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; SDAG-GFX11-NEXT: global_store_b32 v2, v0, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_pack_ll_b32_b16 s2, s2, s3
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
%src0 = trunc i32 %src0ext to i16
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
; SDAG-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; SDAG-GFX11-NEXT: s_nop 0
; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; SDAG-GFX11-NEXT: s_endpgm
;
; GISEL-GFX11-NEXT: s_pack_ll_b32_b16 s2, s3, s2
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_nop 0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
%src.max = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %src, <2 x i16> <i16 0, i16 0>)
; GFX11-NEXT: v_pk_fma_f16 v0, v0, v3, v4 op_sel:[1,0,0]
; GFX11-NEXT: v_pk_fma_f16 v1, v1, v3, v2 op_sel:[1,0,0]
; GFX11-NEXT: global_store_b64 v6, v[0:1], s[0:1]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
entry:
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v3, s7
; GFX11-NEXT: v_dual_mov_b32 v1, s5 :: v_dual_mov_b32 v2, s6
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[2:3]
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ld8 = load <8 x i32>, ptr addrspace(4) %in, align 16
; GFX11-NEXT: s_or_b32 s0, s0, 4
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%load = load i16, ptr addrspace(4) %arg, align 4
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%load = load i16, ptr addrspace(4) %arg, align 4
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%load = load i16, ptr addrspace(4) %arg, align 4
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b16 v[0:1], v4, off
; GFX11-NEXT: global_store_d16_hi_b8 v[2:3], v5, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%load = load i17, ptr addrspace(4) %arg, align 4
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_add_f16_e64 v2, s0, 4.0
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%load = load half, ptr addrspace(4) %arg, align 4
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-NEXT: v_or_b32_e32 v2, 0x300, v2
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%load = load <2 x i8>, ptr addrspace(4) %arg, align 4
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-NEXT: v_or_b32_e32 v2, 4, v2
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0
; GFX11-NEXT: global_store_b8 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%load = load i1, ptr addrspace(4) %arg, align 4
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%load = load i16, ptr addrspace(4) %arg, align 4
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, s1
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0
; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%load = load i1, ptr addrspace(4) %arg, align 4
; GFX11-NEXT: s_or_b32 s0, s0, 4
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%load = load i16, ptr addrspace(6) %arg, align 4
; GFX11-NEXT: s_or_b32 s0, s0, 1
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%load = load i16, ptr addrspace(1) %arg, align 4, !invariant.load !0
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[26:27], v[20:23], off offset:16
; W32-NEXT: global_store_b128 v[26:27], v[16:19], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[26:27], v[20:23], off offset:16
; W32-NEXT: global_store_b128 v[26:27], v[16:19], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[26:27], v[20:23], off offset:16
; W32-NEXT: global_store_b128 v[26:27], v[16:19], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[26:27], v[20:23], off offset:16
; W32-NEXT: global_store_b128 v[26:27], v[16:19], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[26:27], v[20:23], off offset:16
; W32-NEXT: global_store_b128 v[26:27], v[16:19], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[26:27], v[20:23], off offset:16
; W32-NEXT: global_store_b128 v[26:27], v[16:19], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[18:19], v[12:15], off offset:16
; W32-NEXT: global_store_b128 v[18:19], v[8:11], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[18:19], v[12:15], off offset:16
; W32-NEXT: global_store_b128 v[18:19], v[8:11], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[18:19], v[12:15], off offset:16
; W32-NEXT: global_store_b128 v[18:19], v[8:11], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[18:19], v[12:15], off offset:16
; W32-NEXT: global_store_b128 v[18:19], v[8:11], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[18:19], v[12:15], off offset:16
; W32-NEXT: global_store_b128 v[18:19], v[8:11], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[18:19], v[12:15], off offset:16
; W32-NEXT: global_store_b128 v[18:19], v[8:11], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[18:19], v[12:15], off offset:16
; W32-NEXT: global_store_b128 v[18:19], v[8:11], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[18:19], v[12:15], off offset:16
; W32-NEXT: global_store_b128 v[18:19], v[8:11], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[14:15], v[8:11], off offset:16
; W32-NEXT: global_store_b128 v[14:15], v[4:7], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[14:15], v[8:11], off offset:16
; W32-NEXT: global_store_b128 v[14:15], v[4:7], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[14:15], v[8:11], off offset:16
; W32-NEXT: global_store_b128 v[14:15], v[4:7], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[14:15], v[8:11], off offset:16
; W32-NEXT: global_store_b128 v[14:15], v[4:7], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[14:15], v[8:11], off offset:16
; W32-NEXT: global_store_b128 v[14:15], v[4:7], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[14:15], v[8:11], off offset:16
; W32-NEXT: global_store_b128 v[14:15], v[4:7], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[14:15], v[8:11], off offset:16
; W32-NEXT: global_store_b128 v[14:15], v[4:7], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W32-NEXT: s_clause 0x1
; W32-NEXT: global_store_b128 v[14:15], v[8:11], off offset:16
; W32-NEXT: global_store_b128 v[14:15], v[4:7], off
+; W32-NEXT: s_nop 0
; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W32-NEXT: s_endpgm
bb:
; W64-NEXT: v_wmma_f32_16x16x16_f16 v[16:19], v[8:15], v[8:15], v[16:19]
; W64-NEXT: global_store_b128 v[20:21], v[24:27], off
; W64-NEXT: global_store_b128 v[22:23], v[16:19], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64-NEXT: v_wmma_f32_16x16x16_bf16 v[16:19], v[8:15], v[8:15], v[16:19]
; W64-NEXT: global_store_b128 v[20:21], v[24:27], off
; W64-NEXT: global_store_b128 v[22:23], v[16:19], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64-NEXT: v_wmma_f16_16x16x16_f16 v[16:19], v[8:15], v[8:15], v[16:19]
; W64-NEXT: global_store_b128 v[20:21], v[24:27], off
; W64-NEXT: global_store_b128 v[22:23], v[16:19], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64-NEXT: v_wmma_f16_16x16x16_f16 v[16:19], v[8:15], v[8:15], v[16:19] op_sel:[0,0,1]
; W64-NEXT: global_store_b128 v[20:21], v[24:27], off
; W64-NEXT: global_store_b128 v[22:23], v[16:19], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64-NEXT: v_wmma_bf16_16x16x16_bf16 v[16:19], v[8:15], v[8:15], v[16:19]
; W64-NEXT: global_store_b128 v[20:21], v[24:27], off
; W64-NEXT: global_store_b128 v[22:23], v[16:19], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64-NEXT: v_wmma_bf16_16x16x16_bf16 v[16:19], v[8:15], v[8:15], v[16:19] op_sel:[0,0,1]
; W64-NEXT: global_store_b128 v[20:21], v[24:27], off
; W64-NEXT: global_store_b128 v[22:23], v[16:19], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[4:7], v[4:7], v[8:11]
; W64-NEXT: global_store_b128 v[12:13], v[16:19], off
; W64-NEXT: global_store_b128 v[14:15], v[8:11], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[4:7], v[4:7], v[8:11] neg_lo:[0,1,0]
; W64-NEXT: global_store_b128 v[12:13], v[16:19], off
; W64-NEXT: global_store_b128 v[14:15], v[8:11], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[4:7], v[4:7], v[8:11] neg_lo:[1,0,0]
; W64-NEXT: global_store_b128 v[12:13], v[16:19], off
; W64-NEXT: global_store_b128 v[14:15], v[8:11], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[4:7], v[4:7], v[8:11] neg_lo:[1,1,0]
; W64-NEXT: global_store_b128 v[12:13], v[16:19], off
; W64-NEXT: global_store_b128 v[14:15], v[8:11], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[4:7], v[4:7], v[8:11] clamp
; W64-NEXT: global_store_b128 v[12:13], v[16:19], off
; W64-NEXT: global_store_b128 v[14:15], v[8:11], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[4:7], v[4:7], v[8:11] neg_lo:[0,1,0] clamp
; W64-NEXT: global_store_b128 v[12:13], v[16:19], off
; W64-NEXT: global_store_b128 v[14:15], v[8:11], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[4:7], v[4:7], v[8:11] neg_lo:[1,0,0] clamp
; W64-NEXT: global_store_b128 v[12:13], v[16:19], off
; W64-NEXT: global_store_b128 v[14:15], v[8:11], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[4:7], v[4:7], v[8:11] neg_lo:[1,1,0] clamp
; W64-NEXT: global_store_b128 v[12:13], v[16:19], off
; W64-NEXT: global_store_b128 v[14:15], v[8:11], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[2:3], v[2:3], v[4:7]
; W64-NEXT: global_store_b128 v[8:9], v[12:15], off
; W64-NEXT: global_store_b128 v[10:11], v[4:7], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[2:3], v[2:3], v[4:7] neg_lo:[0,1,0]
; W64-NEXT: global_store_b128 v[8:9], v[12:15], off
; W64-NEXT: global_store_b128 v[10:11], v[4:7], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[2:3], v[2:3], v[4:7] neg_lo:[1,0,0]
; W64-NEXT: global_store_b128 v[8:9], v[12:15], off
; W64-NEXT: global_store_b128 v[10:11], v[4:7], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[2:3], v[2:3], v[4:7] neg_lo:[1,1,0]
; W64-NEXT: global_store_b128 v[8:9], v[12:15], off
; W64-NEXT: global_store_b128 v[10:11], v[4:7], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[2:3], v[2:3], v[4:7] clamp
; W64-NEXT: global_store_b128 v[8:9], v[12:15], off
; W64-NEXT: global_store_b128 v[10:11], v[4:7], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[2:3], v[2:3], v[4:7] neg_lo:[0,1,0] clamp
; W64-NEXT: global_store_b128 v[8:9], v[12:15], off
; W64-NEXT: global_store_b128 v[10:11], v[4:7], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[2:3], v[2:3], v[4:7] neg_lo:[1,0,0] clamp
; W64-NEXT: global_store_b128 v[8:9], v[12:15], off
; W64-NEXT: global_store_b128 v[10:11], v[4:7], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb:
; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[2:3], v[2:3], v[4:7] neg_lo:[1,1,0] clamp
; W64-NEXT: global_store_b128 v[8:9], v[12:15], off
; W64-NEXT: global_store_b128 v[10:11], v[4:7], off
+; W64-NEXT: s_nop 0
; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; W64-NEXT: s_endpgm
bb: