return false;
}
- AMDGPU::Waitcnt allZeroWaitcnt() const {
- return AMDGPU::Waitcnt::allZero(ST->hasVscnt());
- }
-
void setForceEmitWaitcnt() {
// For non-debug builds, ForceEmitWaitcnt has been initialized to false;
// For debug builds, get the debug counter info and adjust if need be
MI.getOpcode() == AMDGPU::SI_RETURN ||
MI.getOpcode() == AMDGPU::S_SETPC_B64_return ||
(MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) {
- Wait = Wait.combined(allZeroWaitcnt());
+ Wait = Wait.combined(AMDGPU::Waitcnt::allZeroExceptVsCnt());
}
// Identify S_ENDPGM instructions which may have to wait for outstanding VMEM
// stores. In this case it can be useful to send a message to explicitly
// cause an exception. Otherwise, insert an explicit S_WAITCNT 0 here.
if (MI.getOpcode() == AMDGPU::S_BARRIER &&
!ST->hasAutoWaitcntBeforeBarrier() && !ST->supportsBackOffBarrier()) {
- Wait = Wait.combined(allZeroWaitcnt());
+ Wait = Wait.combined(AMDGPU::Waitcnt::allZero(ST->hasVscnt()));
}
// TODO: Remove this work-around, enable the assert for Bug 457939
ScoreBrackets.simplifyWaitcnt(Wait);
if (ForceEmitZeroWaitcnts)
- Wait = allZeroWaitcnt();
+ Wait = AMDGPU::Waitcnt::allZeroExceptVsCnt();
if (ForceEmitWaitcnt[VM_CNT])
Wait.VmCnt = 0;
Wait.ExpCnt = 0;
if (ForceEmitWaitcnt[LGKM_CNT])
Wait.LgkmCnt = 0;
- if (ForceEmitWaitcnt[VS_CNT])
- Wait.VsCnt = 0;
if (FlushVmCnt) {
if (ScoreBrackets.hasPendingEvent(VM_CNT))
} else if (Inst.isCall()) {
if (callWaitsOnFunctionReturn(Inst)) {
// Act as a wait on everything
- ScoreBrackets->applyWaitcnt(allZeroWaitcnt());
+ ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt::allZeroExceptVsCnt());
} else {
// May need to way wait for anything.
ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt());
I != E && (I->isPHI() || I->isMetaInstruction()); ++I)
;
BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0);
- if (ST->hasVscnt())
- BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT_VSCNT))
- .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
- .addImm(0);
Modified = true;
}
; GFX10-LABEL: v_add_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
%add = add <2 x i16> %a, %b
; GFX10-LABEL: v_add_v2i16_fneg_lhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg <2 x half> %a
; GFX10-LABEL: v_add_v2i16_fneg_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.b = fneg <2 x half> %b
; GFX10-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg <2 x half> %a
; GFX10-LABEL: v_add_v2i16_neg_inline_imm_splat:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_add_u16 v0, 0xffc0, v0 op_sel_hi:[0,1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%add = add <2 x i16> %a, <i16 -64, i16 -64>
; GFX10-LABEL: v_add_v2i16_neg_inline_imm_lo:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_add_u16 v0, 0x4ffc0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%add = add <2 x i16> %a, <i16 -64, i16 4>
; GFX10-LABEL: v_add_v2i16_neg_inline_imm_hi:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_add_u16 v0, 0xffc00004, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%add = add <2 x i16> %a, <i16 4, i16 -64>
; GFX10PLUS-LABEL: v_andn2_i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_not_b32_e32 v1, v1
; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_andn2_i64:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_not_b32_e32 v2, v2
; GFX10PLUS-NEXT: v_not_b32_e32 v3, v3
; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v2
; GFX10PLUS-LABEL: v_andn2_i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_xor_b32_e32 v1, -1, v1
; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_andn2_v2i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_xor_b32_e32 v1, -1, v1
; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_andn2_v3i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_xor_b32_e32 v2, -1, v2
; GFX10PLUS-NEXT: v_xor_b32_e32 v3, -11, v3
; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v2
; GFX10PLUS-LABEL: v_andn2_v4i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_xor_b32_e32 v2, -1, v2
; GFX10PLUS-NEXT: v_xor_b32_e32 v3, -1, v3
; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v2
; CHECK-LABEL: icmp_v2i32_sext_to_v2i64:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; CHECK-LABEL: icmp_v2i32_zext_to_v2i64:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; CHECK-NEXT: v_mov_b32_e32 v3, 0
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX10PLUS-LABEL: v_ashr_i8:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX10PLUS-NEXT: v_ashrrev_i16 v0, v1, v0
; GFX10PLUS-LABEL: v_ashr_i8_7:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX10PLUS-NEXT: v_ashrrev_i16 v0, 7, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_ashr_i24:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_and_b32_e32 v1, 0xffffff, v1
; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 24
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, v1, v0
; GFX10PLUS-LABEL: v_ashr_i24_7:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 24
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 7, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_ashr_i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, v1, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = ashr i32 %value, %amount
; GFX10PLUS-LABEL: v_ashr_i32_31:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 31, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = ashr i32 %value, 31
; GFX10PLUS-LABEL: v_ashr_v2i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, v2, v0
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, v3, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_ashr_v2i32_31:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 31, v0
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_ashr_v3i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, v3, v0
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, v4, v1
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v2, v5, v2
; GFX10PLUS-LABEL: v_ashr_v4i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, v4, v0
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, v5, v1
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v2, v6, v2
; GFX10PLUS-LABEL: v_ashr_v5i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, v5, v0
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, v6, v1
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v2, v7, v2
; GFX10-LABEL: v_ashr_v16i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX10-NEXT: v_ashrrev_i32_e32 v0, v16, v0
; GFX10-NEXT: v_ashrrev_i32_e32 v1, v17, v1
; GFX11-LABEL: v_ashr_v16i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: v_ashrrev_i32_e32 v0, v16, v0
; GFX11-NEXT: v_ashrrev_i32_e32 v1, v17, v1
; GFX10PLUS-LABEL: v_ashr_i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_ashrrev_i16 v0, v1, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = ashr i16 %value, %amount
; GFX10PLUS-LABEL: v_ashr_i16_15:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_ashrrev_i16 v0, 15, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = ashr i16 %value, 15
; GFX10PLUS-LABEL: v_ashr_v2i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, v1, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = ashr <2 x i16> %value, %amount
; GFX10PLUS-LABEL: v_ashr_v2i16_15:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 15, v0 op_sel_hi:[0,1]
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = ashr <2 x i16> %value, <i16 15, i16 15>
; GFX10PLUS-LABEL: v_ashr_v4i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, v2, v0
; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v1, v3, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_ashr_v8i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, v4, v0
; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v1, v5, v1
; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v2, v6, v2
; GFX10PLUS-LABEL: v_ashr_i64:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_ashrrev_i64 v[0:1], v2, v[0:1]
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = ashr i64 %value, %amount
; GFX10PLUS-LABEL: v_ashr_i64_63:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 31, v1
; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_ashr_i64_33:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 1, v1
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_ashr_i64_32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_mov_b32_e32 v0, v1
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_ashr_i64_31:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1]
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = ashr i64 %value, 31
; GFX10PLUS-LABEL: v_ashr_v2i64:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_ashrrev_i64 v[0:1], v4, v[0:1]
; GFX10PLUS-NEXT: v_ashrrev_i64 v[2:3], v6, v[2:3]
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_ashr_v2i64_31:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1]
; GFX10PLUS-NEXT: v_ashrrev_i64 v[2:3], 31, v[2:3]
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_ashr_i65:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_bfe_i32 v4, v2, 0, 1
; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v3
; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v3
; GFX11-LABEL: v_ashr_i65:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_bfe_i32 v4, v2, 0, 1
; GFX11-NEXT: v_sub_nc_u32_e32 v2, 64, v3
; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v3
; GFX10PLUS-LABEL: v_ashr_i65_33:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_mov_b32_e32 v3, v1
; GFX10PLUS-NEXT: v_bfe_i32 v1, v2, 0, 1
; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v3, 1, v3
; WAVE32-LABEL: br_false:
; WAVE32: ; %bb.0: ; %.exit
; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
; WAVE32-NEXT: .LBB0_1: ; %bb0
; WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1
; WAVE32-NEXT: s_mov_b32 s4, 1
; WAVE32-LABEL: br_true:
; WAVE32: ; %bb.0: ; %.exit
; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
; WAVE32-NEXT: .LBB1_1: ; %bb0
; WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1
; WAVE32-NEXT: s_mov_b32 s4, 0
; WAVE32-LABEL: br_undef:
; WAVE32: ; %bb.0: ; %.exit
; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
; WAVE32-NEXT: .LBB2_1: ; %bb0
; WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1
; WAVE32-NEXT: ; implicit-def: $sgpr4
; WAVE32-LABEL: br_poison:
; WAVE32: ; %bb.0: ; %.exit
; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
; WAVE32-NEXT: .LBB3_1: ; %bb0
; WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1
; WAVE32-NEXT: ; implicit-def: $sgpr4
; GFX10-LABEL: v_bswap_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_perm_b32 v0, 0, v0, 0x10203
; GFX10-NEXT: s_setpc_b64 s[30:31]
%bswap = call i32 @llvm.bswap.i32(i32 %src)
; GFX10-LABEL: v_bswap_v2i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_perm_b32 v0, 0, v0, 0x10203
; GFX10-NEXT: v_perm_b32 v1, 0, v1, 0x10203
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_bswap_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_perm_b32 v2, 0, v1, 0x10203
; GFX10-NEXT: v_perm_b32 v1, 0, v0, 0x10203
; GFX10-NEXT: v_mov_b32_e32 v0, v2
; GFX10-LABEL: v_bswap_v2i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_perm_b32 v4, 0, v1, 0x10203
; GFX10-NEXT: v_perm_b32 v5, 0, v3, 0x10203
; GFX10-NEXT: v_perm_b32 v1, 0, v0, 0x10203
; GFX10-LABEL: v_bswap_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001
; GFX10-NEXT: s_setpc_b64 s[30:31]
%bswap = call i16 @llvm.bswap.i16(i16 %src)
; GFX10-LABEL: v_bswap_i16_zext_to_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001
; GFX10-NEXT: s_setpc_b64 s[30:31]
%bswap = call i16 @llvm.bswap.i16(i16 %src)
; GFX10-LABEL: v_bswap_i16_sext_to_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001
; GFX10-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_bswap_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_perm_b32 v0, 0, v0, 0x2030001
; GFX10-NEXT: s_setpc_b64 s[30:31]
%bswap = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %src)
; GFX10-LABEL: v_bswap_v3i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_perm_b32 v0, 0, v0, 0x2030001
; GFX10-NEXT: v_perm_b32 v1, 0, v1, 0x2030001
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_bswap_i48:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_perm_b32 v1, 0, v1, 0x10203
; GFX10-NEXT: v_perm_b32 v2, 0, v0, 0x10203
; GFX10-NEXT: v_lshrrev_b64 v[0:1], 16, v[1:2]
; GFX10-LABEL: value_finder_bug:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
; GFX10-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%vec = load <4 x float>, ptr addrspace(4) %ptr, align 4
%vec.3 = extractelement <4 x float> %vec, i32 3
; GFX10-LABEL: test_fmed3_f32_known_nnan_ieee_true:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul float %a, 2.0
; GFX10-LABEL: test_fmed3_f16_known_nnan_ieee_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul half %a, 2.0
; GFX10-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
; GFX10-NEXT: v_min_f32_e64 v0, 0x41200000, v0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul float %a, 2.0
; GFX10-LABEL: test_fmed3_global_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul float %a, 2.0
; GFX10-LABEL: test_fmed3_f32_maybe_NaN_ieee_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
; GFX10-NEXT: v_med3_f32 v0, v0, 1.0, 0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
; GFX10-NEXT: v_min_f32_e32 v0, 0x41200000, v0
; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0
; GFX10-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul float %a, 2.0
; GFX10-LABEL: test_min_max_ValK0_K1_f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul float %a, 2.0
; GFX10-LABEL: test_min_max_K0Val_K1_f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul double %a, 2.0
; GFX10-LABEL: test_min_K1max_ValK0_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul half %a, 2.0
; GFX10-LABEL: test_min_K1max_K0Val_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul <2 x half> %a, <half 2.0, half 2.0>
; GFX10-LABEL: test_min_max_splat_padded_with_undef:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul <2 x half> %a, <half 2.0, half 2.0>
; GFX10-LABEL: test_max_min_ValK1_K0_f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul float %a, 2.0
; GFX10-LABEL: test_max_min_K1Val_K0_f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul double %a, 2.0
; GFX10-LABEL: test_max_K0min_ValK1_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul half %a, 2.0
; GFX10-LABEL: test_max_K0min_K1Val_v2f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul <2 x half> %a, <half 2.0, half 2.0>
; GFX10-LABEL: test_min_max_global_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_max_f32_e64 v0, v0, v0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%maxnum = call float @llvm.maxnum.f32(float %a, float 0.0)
; GFX10-LABEL: test_max_min_global_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_max_f32_e64 v0, v0, v0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%minnum = call float @llvm.minnum.f32(float %a, float 1.0)
; GFX10-LABEL: test_min_max_K0_gt_K1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_max_f32_e32 v0, 1.0, v0
; GFX10-NEXT: v_min_f32_e32 v0, 0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: test_max_min_K0_gt_K1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_min_f32_e32 v0, 0, v0
; GFX10-NEXT: v_max_f32_e32 v0, 1.0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0
; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0
; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0
; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
; GFX10-CONTRACT-LABEL: test_f32_add_mul:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, v3, v4
; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v2, v0, v1
; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v0, v2
; GFX10-DENORM-LABEL: test_f32_add_mul:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_fma_f32 v2, v2, v3, v4
; GFX10-DENORM-NEXT: v_fmac_f32_e32 v2, v0, v1
; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v2
; GFX11-CONTRACT-LABEL: test_f32_add_mul:
; GFX11-CONTRACT: ; %bb.0: ; %.entry
; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CONTRACT-NEXT: v_fma_f32 v2, v2, v3, v4
; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-CONTRACT-NEXT: v_fmac_f32_e32 v2, v0, v1
; GFX11-DENORM-LABEL: test_f32_add_mul:
; GFX11-DENORM: ; %bb.0: ; %.entry
; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-DENORM-NEXT: v_fma_f32 v2, v2, v3, v4
; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-DENORM-NEXT: v_fmac_f32_e32 v2, v0, v1
; GFX10-CONTRACT-LABEL: test_f32_add_mul_rhs:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, v3, v4
; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v2, v0, v1
; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v0, v2
; GFX10-DENORM-LABEL: test_f32_add_mul_rhs:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_fma_f32 v2, v2, v3, v4
; GFX10-DENORM-NEXT: v_fmac_f32_e32 v2, v0, v1
; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v2
; GFX11-CONTRACT-LABEL: test_f32_add_mul_rhs:
; GFX11-CONTRACT: ; %bb.0: ; %.entry
; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CONTRACT-NEXT: v_fma_f32 v2, v2, v3, v4
; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-CONTRACT-NEXT: v_fmac_f32_e32 v2, v0, v1
; GFX11-DENORM-LABEL: test_f32_add_mul_rhs:
; GFX11-DENORM: ; %bb.0: ; %.entry
; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-DENORM-NEXT: v_fma_f32 v2, v2, v3, v4
; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-DENORM-NEXT: v_fmac_f32_e32 v2, v0, v1
; GFX10-CONTRACT-LABEL: test_half_add_mul:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f16 v2, v2, v3, v4
; GFX10-CONTRACT-NEXT: v_fmac_f16_e32 v2, v0, v1
; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v0, v2
; GFX10-DENORM-LABEL: test_half_add_mul:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mul_f16_e32 v2, v2, v3
; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v0, v2
; GFX11-CONTRACT-LABEL: test_half_add_mul:
; GFX11-CONTRACT: ; %bb.0: ; %.entry
; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CONTRACT-NEXT: v_fma_f16 v2, v2, v3, v4
; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-CONTRACT-NEXT: v_fmac_f16_e32 v2, v0, v1
; GFX11-DENORM-LABEL: test_half_add_mul:
; GFX11-DENORM: ; %bb.0: ; %.entry
; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-DENORM-NEXT: v_mul_f16_e32 v2, v2, v3
; GFX11-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-CONTRACT-LABEL: test_half_add_mul_rhs:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f16 v2, v2, v3, v4
; GFX10-CONTRACT-NEXT: v_fmac_f16_e32 v2, v0, v1
; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v0, v2
; GFX10-DENORM-LABEL: test_half_add_mul_rhs:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mul_f16_e32 v2, v2, v3
; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v0, v2
; GFX11-CONTRACT-LABEL: test_half_add_mul_rhs:
; GFX11-CONTRACT: ; %bb.0: ; %.entry
; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CONTRACT-NEXT: v_fma_f16 v2, v2, v3, v4
; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-CONTRACT-NEXT: v_fmac_f16_e32 v2, v0, v1
; GFX11-DENORM-LABEL: test_half_add_mul_rhs:
; GFX11-DENORM: ; %bb.0: ; %.entry
; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-DENORM-NEXT: v_mul_f16_e32 v2, v2, v3
; GFX11-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-CONTRACT-LABEL: test_double_add_mul:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9]
; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
; GFX10-DENORM-LABEL: test_double_add_mul:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9]
; GFX10-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
; GFX11-CONTRACT-LABEL: test_double_add_mul:
; GFX11-CONTRACT: ; %bb.0: ; %.entry
; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9]
; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX11-DENORM-LABEL: test_double_add_mul:
; GFX11-DENORM: ; %bb.0: ; %.entry
; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9]
; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX10-CONTRACT-LABEL: test_double_add_mul_rhs:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9]
; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
; GFX10-DENORM-LABEL: test_double_add_mul_rhs:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9]
; GFX10-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
; GFX11-CONTRACT-LABEL: test_double_add_mul_rhs:
; GFX11-CONTRACT: ; %bb.0: ; %.entry
; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9]
; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX11-DENORM-LABEL: test_double_add_mul_rhs:
; GFX11-DENORM: ; %bb.0: ; %.entry
; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9]
; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX10-CONTRACT-LABEL: test_v4f32_add_mul:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f32 v8, v8, v12, v16
; GFX10-CONTRACT-NEXT: v_fma_f32 v9, v9, v13, v17
; GFX10-CONTRACT-NEXT: v_fma_f32 v10, v10, v14, v18
; GFX10-DENORM-LABEL: test_v4f32_add_mul:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_fma_f32 v8, v8, v12, v16
; GFX10-DENORM-NEXT: v_fma_f32 v9, v9, v13, v17
; GFX10-DENORM-NEXT: v_fma_f32 v10, v10, v14, v18
; GFX11-CONTRACT-LABEL: test_v4f32_add_mul:
; GFX11-CONTRACT: ; %bb.0: ; %.entry
; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CONTRACT-NEXT: v_fma_f32 v8, v8, v12, v16
; GFX11-CONTRACT-NEXT: v_fma_f32 v9, v9, v13, v17
; GFX11-CONTRACT-NEXT: v_fma_f32 v10, v10, v14, v18
; GFX11-DENORM-LABEL: test_v4f32_add_mul:
; GFX11-DENORM: ; %bb.0: ; %.entry
; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-DENORM-NEXT: v_fma_f32 v8, v8, v12, v16
; GFX11-DENORM-NEXT: v_fma_f32 v9, v9, v13, v17
; GFX11-DENORM-NEXT: v_fma_f32 v10, v10, v14, v18
; GFX10-CONTRACT-LABEL: test_v4f32_add_mul_rhs:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f32 v8, v8, v12, v16
; GFX10-CONTRACT-NEXT: v_fma_f32 v9, v9, v13, v17
; GFX10-CONTRACT-NEXT: v_fma_f32 v10, v10, v14, v18
; GFX10-DENORM-LABEL: test_v4f32_add_mul_rhs:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_fma_f32 v8, v8, v12, v16
; GFX10-DENORM-NEXT: v_fma_f32 v9, v9, v13, v17
; GFX10-DENORM-NEXT: v_fma_f32 v10, v10, v14, v18
; GFX11-CONTRACT-LABEL: test_v4f32_add_mul_rhs:
; GFX11-CONTRACT: ; %bb.0: ; %.entry
; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CONTRACT-NEXT: v_fma_f32 v8, v8, v12, v16
; GFX11-CONTRACT-NEXT: v_fma_f32 v9, v9, v13, v17
; GFX11-CONTRACT-NEXT: v_fma_f32 v10, v10, v14, v18
; GFX11-DENORM-LABEL: test_v4f32_add_mul_rhs:
; GFX11-DENORM: ; %bb.0: ; %.entry
; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-DENORM-NEXT: v_fma_f32 v8, v8, v12, v16
; GFX11-DENORM-NEXT: v_fma_f32 v9, v9, v13, v17
; GFX11-DENORM-NEXT: v_fma_f32 v10, v10, v14, v18
; GFX10-CONTRACT-LABEL: test_f16_add_mul:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v4, v4, v6, v8
; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v5, v5, v7, v9
; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4
; GFX10-DENORM-LABEL: test_f16_add_mul:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_pk_mul_f16 v4, v4, v6
; GFX10-DENORM-NEXT: v_pk_mul_f16 v5, v5, v7
; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX11-CONTRACT-LABEL: test_f16_add_mul:
; GFX11-CONTRACT: ; %bb.0: ; %.entry
; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v4, v4, v6, v8
; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v5, v5, v7, v9
; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-DENORM-LABEL: test_f16_add_mul:
; GFX11-DENORM: ; %bb.0: ; %.entry
; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-DENORM-NEXT: v_pk_mul_f16 v4, v4, v6
; GFX11-DENORM-NEXT: v_pk_mul_f16 v5, v5, v7
; GFX11-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-CONTRACT-LABEL: test_f16_add_mul_rhs:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v4, v4, v6, v8
; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v5, v5, v7, v9
; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4
; GFX10-DENORM-LABEL: test_f16_add_mul_rhs:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_pk_mul_f16 v4, v4, v6
; GFX10-DENORM-NEXT: v_pk_mul_f16 v5, v5, v7
; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX11-CONTRACT-LABEL: test_f16_add_mul_rhs:
; GFX11-CONTRACT: ; %bb.0: ; %.entry
; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v4, v4, v6, v8
; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v5, v5, v7, v9
; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-DENORM-LABEL: test_f16_add_mul_rhs:
; GFX11-DENORM: ; %bb.0: ; %.entry
; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-DENORM-NEXT: v_pk_mul_f16 v4, v4, v6
; GFX11-DENORM-NEXT: v_pk_mul_f16 v5, v5, v7
; GFX11-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-CONTRACT-LABEL: test_f64_add_mul:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: s_clause 0x8
; GFX10-CONTRACT-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX10-CONTRACT-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
; GFX10-DENORM-LABEL: test_f64_add_mul:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: s_clause 0x8
; GFX10-DENORM-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX10-DENORM-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
; GFX11-CONTRACT-LABEL: test_f64_add_mul:
; GFX11-CONTRACT: ; %bb.0: ; %.entry
; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CONTRACT-NEXT: s_clause 0x8
; GFX11-CONTRACT-NEXT: scratch_load_b32 v31, off, s32
; GFX11-CONTRACT-NEXT: scratch_load_b32 v32, off, s32 offset:4
; GFX11-DENORM-LABEL: test_f64_add_mul:
; GFX11-DENORM: ; %bb.0: ; %.entry
; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-DENORM-NEXT: s_clause 0x8
; GFX11-DENORM-NEXT: scratch_load_b32 v31, off, s32
; GFX11-DENORM-NEXT: scratch_load_b32 v32, off, s32 offset:4
; GFX10-CONTRACT-LABEL: test_f64_add_mul_rhs:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: s_clause 0x8
; GFX10-CONTRACT-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX10-CONTRACT-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
; GFX10-DENORM-LABEL: test_f64_add_mul_rhs:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: s_clause 0x8
; GFX10-DENORM-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX10-DENORM-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
; GFX11-CONTRACT-LABEL: test_f64_add_mul_rhs:
; GFX11-CONTRACT: ; %bb.0: ; %.entry
; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CONTRACT-NEXT: s_clause 0x8
; GFX11-CONTRACT-NEXT: scratch_load_b32 v31, off, s32
; GFX11-CONTRACT-NEXT: scratch_load_b32 v32, off, s32 offset:4
; GFX11-DENORM-LABEL: test_f64_add_mul_rhs:
; GFX11-DENORM: ; %bb.0: ; %.entry
; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-DENORM-NEXT: s_clause 0x8
; GFX11-DENORM-NEXT: scratch_load_b32 v31, off, s32
; GFX11-DENORM-NEXT: scratch_load_b32 v32, off, s32 offset:4
; GFX10-LABEL: test_f32_add_mul:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10-NEXT: v_add_f32_e32 v0, v0, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-CONTRACT-LABEL: test_f32_add_mul:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-DENORM-LABEL: test_f32_add_mul:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-UNSAFE-LABEL: test_f32_add_mul:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2
; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31]
.entry:
; GFX10-LABEL: test_f32_add_mul_rhs:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10-NEXT: v_add_f32_e32 v0, v2, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-CONTRACT-LABEL: test_f32_add_mul_rhs:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-DENORM-LABEL: test_f32_add_mul_rhs:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-UNSAFE-LABEL: test_f32_add_mul_rhs:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2
; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31]
.entry:
; GFX10-LABEL: test_add_mul_multiple_defs_z:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[2:3], v[2:3], off
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-CONTRACT-LABEL: test_add_mul_multiple_defs_z:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: global_load_dwordx2 v[2:3], v[2:3], off
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0)
; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v3, v0, v1
; GFX10-DENORM-LABEL: test_add_mul_multiple_defs_z:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: global_load_dwordx2 v[2:3], v[2:3], off
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
; GFX10-DENORM-NEXT: v_mac_f32_e32 v3, v0, v1
; GFX10-UNSAFE-LABEL: test_add_mul_multiple_defs_z:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-UNSAFE-NEXT: global_load_dwordx2 v[2:3], v[2:3], off
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0)
; GFX10-UNSAFE-NEXT: v_fmac_f32_e32 v3, v0, v1
; GFX10-LABEL: test_add_mul_rhs_multiple_defs_z:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[2:3], v[2:3], off
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-CONTRACT-LABEL: test_add_mul_rhs_multiple_defs_z:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: global_load_dwordx2 v[2:3], v[2:3], off
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0)
; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v3, v0, v1
; GFX10-DENORM-LABEL: test_add_mul_rhs_multiple_defs_z:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: global_load_dwordx2 v[2:3], v[2:3], off
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
; GFX10-DENORM-NEXT: v_mac_f32_e32 v3, v0, v1
; GFX10-UNSAFE-LABEL: test_add_mul_rhs_multiple_defs_z:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-UNSAFE-NEXT: global_load_dwordx2 v[2:3], v[2:3], off
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0)
; GFX10-UNSAFE-NEXT: v_fmac_f32_e32 v3, v0, v1
; GFX10-LABEL: test_half_add_mul:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10-NEXT: v_add_f16_e32 v0, v0, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-CONTRACT-LABEL: test_half_add_mul:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-DENORM-LABEL: test_half_add_mul:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v0, v2
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
; GFX10-UNSAFE-LABEL: test_half_add_mul:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-UNSAFE-NEXT: v_fma_f16 v0, v0, v1, v2
; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31]
.entry:
; GFX10-LABEL: test_half_add_mul_rhs:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10-NEXT: v_add_f16_e32 v0, v2, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-CONTRACT-LABEL: test_half_add_mul_rhs:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-DENORM-LABEL: test_half_add_mul_rhs:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v2, v0
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
; GFX10-UNSAFE-LABEL: test_half_add_mul_rhs:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-UNSAFE-NEXT: v_fma_f16 v0, v0, v1, v2
; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31]
.entry:
; GFX10-LABEL: test_double_add_mul:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-CONTRACT-LABEL: test_double_add_mul:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-DENORM-LABEL: test_double_add_mul:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
; GFX10-UNSAFE-LABEL: test_double_add_mul:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31]
.entry:
; GFX10-LABEL: test_double_add_mul_rhs:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; GFX10-NEXT: v_add_f64 v[0:1], v[4:5], v[0:1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-CONTRACT-LABEL: test_double_add_mul_rhs:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-DENORM-LABEL: test_double_add_mul_rhs:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[4:5], v[0:1]
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
; GFX10-UNSAFE-LABEL: test_double_add_mul_rhs:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31]
.entry:
; GFX10-LABEL: test_4xfloat_add_mul:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v4
; GFX10-NEXT: v_mul_f32_e32 v1, v1, v5
; GFX10-NEXT: v_mul_f32_e32 v2, v2, v6
; GFX10-CONTRACT-LABEL: test_4xfloat_add_mul:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v4, v8
; GFX10-CONTRACT-NEXT: v_fma_f32 v1, v1, v5, v9
; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, v6, v10
; GFX10-DENORM-LABEL: test_4xfloat_add_mul:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v4, v8
; GFX10-DENORM-NEXT: v_mad_f32 v1, v1, v5, v9
; GFX10-DENORM-NEXT: v_mad_f32 v2, v2, v6, v10
; GFX10-UNSAFE-LABEL: test_4xfloat_add_mul:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-UNSAFE-NEXT: v_fma_f32 v0, v0, v4, v8
; GFX10-UNSAFE-NEXT: v_fma_f32 v1, v1, v5, v9
; GFX10-UNSAFE-NEXT: v_fma_f32 v2, v2, v6, v10
; GFX10-LABEL: test_3xfloat_add_mul_rhs:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v3
; GFX10-NEXT: v_mul_f32_e32 v1, v1, v4
; GFX10-NEXT: v_mul_f32_e32 v2, v2, v5
; GFX10-CONTRACT-LABEL: test_3xfloat_add_mul_rhs:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v3, v6
; GFX10-CONTRACT-NEXT: v_fma_f32 v1, v1, v4, v7
; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, v5, v8
; GFX10-DENORM-LABEL: test_3xfloat_add_mul_rhs:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v3, v6
; GFX10-DENORM-NEXT: v_mad_f32 v1, v1, v4, v7
; GFX10-DENORM-NEXT: v_mad_f32 v2, v2, v5, v8
; GFX10-UNSAFE-LABEL: test_3xfloat_add_mul_rhs:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-UNSAFE-NEXT: v_fma_f32 v0, v0, v3, v6
; GFX10-UNSAFE-NEXT: v_fma_f32 v1, v1, v4, v7
; GFX10-UNSAFE-NEXT: v_fma_f32 v2, v2, v5, v8
; GFX10-LABEL: test_4xhalf_add_mul:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX10-NEXT: v_pk_add_f16 v0, v0, v4
; GFX10-CONTRACT-LABEL: test_4xhalf_add_mul:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4
; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
; GFX10-DENORM-LABEL: test_4xhalf_add_mul:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v0, v4
; GFX10-UNSAFE-LABEL: test_4xhalf_add_mul:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4
; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5
; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: test_3xhalf_add_mul_rhs:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX10-NEXT: v_pk_add_f16 v0, v4, v0
; GFX10-CONTRACT-LABEL: test_3xhalf_add_mul_rhs:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4
; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
; GFX10-DENORM-LABEL: test_3xhalf_add_mul_rhs:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v4, v0
; GFX10-UNSAFE-LABEL: test_3xhalf_add_mul_rhs:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4
; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5
; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: test_4xdouble_add_mul:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
; GFX10-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
; GFX10-CONTRACT-LABEL: test_4xdouble_add_mul:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17]
; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19]
; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21]
; GFX10-DENORM-LABEL: test_4xdouble_add_mul:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
; GFX10-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
; GFX10-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
; GFX10-UNSAFE-LABEL: test_4xdouble_add_mul:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17]
; GFX10-UNSAFE-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19]
; GFX10-UNSAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21]
; GFX10-LABEL: test_3xdouble_add_mul_rhs:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[6:7]
; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
; GFX10-NEXT: v_mul_f64 v[4:5], v[4:5], v[10:11]
; GFX10-CONTRACT-LABEL: test_3xdouble_add_mul_rhs:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13]
; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15]
; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17]
; GFX10-DENORM-LABEL: test_3xdouble_add_mul_rhs:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[6:7]
; GFX10-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
; GFX10-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[10:11]
; GFX10-UNSAFE-LABEL: test_3xdouble_add_mul_rhs:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13]
; GFX10-UNSAFE-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15]
; GFX10-UNSAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17]
; GFX10-LABEL: test_f32_sub_mul:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-CONTRACT-LABEL: test_f32_sub_mul:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, -v2
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-DENORM-LABEL: test_f32_sub_mul:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v1, -v2
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-CONTRACT-LABEL: test_f32_sub_mul:
; GFX11-CONTRACT: ; %bb.0: ; %.entry
; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, -v2
; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-DENORM-LABEL: test_f32_sub_mul:
; GFX11-DENORM: ; %bb.0: ; %.entry
; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-DENORM-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-DENORM-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX10-LABEL: test_f32_sub_mul_rhs:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10-NEXT: v_sub_f32_e32 v0, v2, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-CONTRACT-LABEL: test_f32_sub_mul_rhs:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f32 v0, -v0, v1, v2
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-DENORM-LABEL: test_f32_sub_mul_rhs:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mad_f32 v0, -v0, v1, v2
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-CONTRACT-LABEL: test_f32_sub_mul_rhs:
; GFX11-CONTRACT: ; %bb.0: ; %.entry
; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CONTRACT-NEXT: v_fma_f32 v0, -v0, v1, v2
; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-DENORM-LABEL: test_f32_sub_mul_rhs:
; GFX11-DENORM: ; %bb.0: ; %.entry
; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-DENORM-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-DENORM-NEXT: v_sub_f32_e32 v0, v2, v0
; GFX10-LABEL: test_half_sub_mul:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10-NEXT: v_sub_f16_e32 v0, v0, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-CONTRACT-LABEL: test_half_sub_mul:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, -v2
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-DENORM-LABEL: test_half_sub_mul:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10-DENORM-NEXT: v_sub_f16_e32 v0, v0, v2
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
; GFX11-CONTRACT-LABEL: test_half_sub_mul:
; GFX11-CONTRACT: ; %bb.0: ; %.entry
; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, -v2
; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-DENORM-LABEL: test_half_sub_mul:
; GFX11-DENORM: ; %bb.0: ; %.entry
; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-DENORM-NEXT: v_sub_f16_e32 v0, v0, v2
; GFX10-LABEL: test_half_sub_mul_rhs:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10-NEXT: v_sub_f16_e32 v0, v2, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-CONTRACT-LABEL: test_half_sub_mul_rhs:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f16 v0, -v0, v1, v2
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-DENORM-LABEL: test_half_sub_mul_rhs:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10-DENORM-NEXT: v_sub_f16_e32 v0, v2, v0
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
; GFX11-CONTRACT-LABEL: test_half_sub_mul_rhs:
; GFX11-CONTRACT: ; %bb.0: ; %.entry
; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CONTRACT-NEXT: v_fma_f16 v0, -v0, v1, v2
; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-DENORM-LABEL: test_half_sub_mul_rhs:
; GFX11-DENORM: ; %bb.0: ; %.entry
; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-DENORM-NEXT: v_sub_f16_e32 v0, v2, v0
; GFX10-LABEL: test_double_sub_mul:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-CONTRACT-LABEL: test_double_sub_mul:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-DENORM-LABEL: test_double_sub_mul:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5]
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
; GFX11-CONTRACT-LABEL: test_double_sub_mul:
; GFX11-CONTRACT: ; %bb.0: ; %.entry
; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-DENORM-LABEL: test_double_sub_mul:
; GFX11-DENORM: ; %bb.0: ; %.entry
; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5]
; GFX10-LABEL: test_double_sub_mul_rhs:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; GFX10-NEXT: v_add_f64 v[0:1], v[4:5], -v[0:1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-CONTRACT-LABEL: test_double_sub_mul_rhs:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], v[4:5]
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-DENORM-LABEL: test_double_sub_mul_rhs:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[4:5], -v[0:1]
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
; GFX11-CONTRACT-LABEL: test_double_sub_mul_rhs:
; GFX11-CONTRACT: ; %bb.0: ; %.entry
; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], v[4:5]
; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-DENORM-LABEL: test_double_sub_mul_rhs:
; GFX11-DENORM: ; %bb.0: ; %.entry
; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-DENORM-NEXT: v_add_f64 v[0:1], v[4:5], -v[0:1]
; GFX10-LABEL: test_v4f32_sub_mul:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v4
; GFX10-NEXT: v_mul_f32_e32 v1, v1, v5
; GFX10-NEXT: v_mul_f32_e32 v2, v2, v6
; GFX10-CONTRACT-LABEL: test_v4f32_sub_mul:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v4, -v8
; GFX10-CONTRACT-NEXT: v_fma_f32 v1, v1, v5, -v9
; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, v6, -v10
; GFX10-DENORM-LABEL: test_v4f32_sub_mul:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v4, -v8
; GFX10-DENORM-NEXT: v_mad_f32 v1, v1, v5, -v9
; GFX10-DENORM-NEXT: v_mad_f32 v2, v2, v6, -v10
; GFX11-CONTRACT-LABEL: test_v4f32_sub_mul:
; GFX11-CONTRACT: ; %bb.0: ; %.entry
; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CONTRACT-NEXT: v_fma_f32 v0, v0, v4, -v8
; GFX11-CONTRACT-NEXT: v_fma_f32 v1, v1, v5, -v9
; GFX11-CONTRACT-NEXT: v_fma_f32 v2, v2, v6, -v10
; GFX11-DENORM-LABEL: test_v4f32_sub_mul:
; GFX11-DENORM: ; %bb.0: ; %.entry
; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-DENORM-NEXT: v_dual_mul_f32 v0, v0, v4 :: v_dual_mul_f32 v1, v1, v5
; GFX11-DENORM-NEXT: v_dual_mul_f32 v2, v2, v6 :: v_dual_mul_f32 v3, v3, v7
; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX10-LABEL: test_v4f32_sub_mul_rhs:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v4
; GFX10-NEXT: v_mul_f32_e32 v1, v1, v5
; GFX10-NEXT: v_mul_f32_e32 v2, v2, v6
; GFX10-CONTRACT-LABEL: test_v4f32_sub_mul_rhs:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f32 v0, -v0, v4, v8
; GFX10-CONTRACT-NEXT: v_fma_f32 v1, -v1, v5, v9
; GFX10-CONTRACT-NEXT: v_fma_f32 v2, -v2, v6, v10
; GFX10-DENORM-LABEL: test_v4f32_sub_mul_rhs:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mad_f32 v0, -v0, v4, v8
; GFX10-DENORM-NEXT: v_mad_f32 v1, -v1, v5, v9
; GFX10-DENORM-NEXT: v_mad_f32 v2, -v2, v6, v10
; GFX11-CONTRACT-LABEL: test_v4f32_sub_mul_rhs:
; GFX11-CONTRACT: ; %bb.0: ; %.entry
; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CONTRACT-NEXT: v_fma_f32 v0, -v0, v4, v8
; GFX11-CONTRACT-NEXT: v_fma_f32 v1, -v1, v5, v9
; GFX11-CONTRACT-NEXT: v_fma_f32 v2, -v2, v6, v10
; GFX11-DENORM-LABEL: test_v4f32_sub_mul_rhs:
; GFX11-DENORM: ; %bb.0: ; %.entry
; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-DENORM-NEXT: v_dual_mul_f32 v0, v0, v4 :: v_dual_mul_f32 v1, v1, v5
; GFX11-DENORM-NEXT: v_dual_mul_f32 v2, v2, v6 :: v_dual_mul_f32 v3, v3, v7
; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX10-LABEL: test_v4f16_sub_mul:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX10-NEXT: v_sub_f16_e32 v2, v0, v4
; GFX10-CONTRACT-LABEL: test_v4f16_sub_mul:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,0,1] neg_hi:[0,0,1]
; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,0,1] neg_hi:[0,0,1]
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
; GFX10-DENORM-LABEL: test_v4f16_sub_mul:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX10-DENORM-NEXT: v_sub_f16_e32 v2, v0, v4
; GFX11-CONTRACT-LABEL: test_v4f16_sub_mul:
; GFX11-CONTRACT: ; %bb.0: ; %.entry
; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,0,1] neg_hi:[0,0,1]
; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,0,1] neg_hi:[0,0,1]
; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31]
; GFX11-DENORM-LABEL: test_v4f16_sub_mul:
; GFX11-DENORM: ; %bb.0: ; %.entry
; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX11-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v4
; GFX10-LABEL: test_v4f16_sub_mul_rhs:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX10-NEXT: v_sub_f16_e32 v2, v4, v0
; GFX10-CONTRACT-LABEL: test_v4f16_sub_mul_rhs:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[1,0,0] neg_hi:[1,0,0]
; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[1,0,0] neg_hi:[1,0,0]
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
; GFX10-DENORM-LABEL: test_v4f16_sub_mul_rhs:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX10-DENORM-NEXT: v_sub_f16_e32 v2, v4, v0
; GFX11-CONTRACT-LABEL: test_v4f16_sub_mul_rhs:
; GFX11-CONTRACT: ; %bb.0: ; %.entry
; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[1,0,0] neg_hi:[1,0,0]
; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[1,0,0] neg_hi:[1,0,0]
; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31]
; GFX11-DENORM-LABEL: test_v4f16_sub_mul_rhs:
; GFX11-DENORM: ; %bb.0: ; %.entry
; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX11-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v4
; GFX10-LABEL: test_v4f64_sub_mul:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
; GFX10-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
; GFX10-CONTRACT-LABEL: test_v4f64_sub_mul:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], -v[16:17]
; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], -v[18:19]
; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], -v[20:21]
; GFX10-DENORM-LABEL: test_v4f64_sub_mul:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
; GFX10-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
; GFX10-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
; GFX11-CONTRACT-LABEL: test_v4f64_sub_mul:
; GFX11-CONTRACT: ; %bb.0: ; %.entry
; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], -v[16:17]
; GFX11-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], -v[18:19]
; GFX11-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], -v[20:21]
; GFX11-DENORM-LABEL: test_v4f64_sub_mul:
; GFX11-DENORM: ; %bb.0: ; %.entry
; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
; GFX11-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
; GFX11-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
; GFX10-LABEL: test_v4f64_sub_mul_rhs:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
; GFX10-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
; GFX10-CONTRACT-LABEL: test_v4f64_sub_mul_rhs:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], v[16:17]
; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], v[18:19]
; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], -v[4:5], v[12:13], v[20:21]
; GFX10-DENORM-LABEL: test_v4f64_sub_mul_rhs:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
; GFX10-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
; GFX10-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
; GFX11-CONTRACT-LABEL: test_v4f64_sub_mul_rhs:
; GFX11-CONTRACT: ; %bb.0: ; %.entry
; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], v[16:17]
; GFX11-CONTRACT-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], v[18:19]
; GFX11-CONTRACT-NEXT: v_fma_f64 v[4:5], -v[4:5], v[12:13], v[20:21]
; GFX11-DENORM-LABEL: test_v4f64_sub_mul_rhs:
; GFX11-DENORM: ; %bb.0: ; %.entry
; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
; GFX11-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
; GFX11-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
; GFX10-LABEL: test_f32_sub_ext_neg_mul:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e64 v0, v0, -v1
; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-CONTRACT-LABEL: test_f32_sub_ext_neg_mul:
; GFX10-CONTRACT: ; %bb.0: ; %entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, -v1, -v2
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-DENORM-LABEL: test_f32_sub_ext_neg_mul:
; GFX10-DENORM: ; %bb.0: ; %entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, -v1, -v2
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
entry:
; GFX10-LABEL: test_f16_sub_ext_neg_mul:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f16_e64 v0, v0, -v1
; GFX10-NEXT: v_sub_f16_e32 v0, v0, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-CONTRACT-LABEL: test_f16_sub_ext_neg_mul:
; GFX10-CONTRACT: ; %bb.0: ; %entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, -v1, -v2
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-DENORM-LABEL: test_f16_sub_ext_neg_mul:
; GFX10-DENORM: ; %bb.0: ; %entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1
; GFX10-DENORM-NEXT: v_sub_f16_e32 v0, v0, v2
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: test_f64_sub_ext_neg_mul:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; GFX10-NEXT: v_add_f64 v[0:1], -v[0:1], -v[4:5]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-CONTRACT-LABEL: test_f64_sub_ext_neg_mul:
; GFX10-CONTRACT: ; %bb.0: ; %entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], -v[4:5]
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-DENORM-LABEL: test_f64_sub_ext_neg_mul:
; GFX10-DENORM: ; %bb.0: ; %entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; GFX10-DENORM-NEXT: v_add_f64 v[0:1], -v[0:1], -v[4:5]
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: test_v4f32_sub_ext_neg_mul:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e64 v0, v0, -v4
; GFX10-NEXT: v_mul_f32_e64 v1, v1, -v5
; GFX10-NEXT: v_mul_f32_e64 v2, v2, -v6
; GFX10-CONTRACT-LABEL: test_v4f32_sub_ext_neg_mul:
; GFX10-CONTRACT: ; %bb.0: ; %entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, -v4, -v8
; GFX10-CONTRACT-NEXT: v_fma_f32 v1, v1, -v5, -v9
; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, -v6, -v10
; GFX10-DENORM-LABEL: test_v4f32_sub_ext_neg_mul:
; GFX10-DENORM: ; %bb.0: ; %entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, -v4, -v8
; GFX10-DENORM-NEXT: v_mad_f32 v1, v1, -v5, -v9
; GFX10-DENORM-NEXT: v_mad_f32 v2, v2, -v6, -v10
; GFX10-LABEL: test_v4f16_sub_ext_neg_mul:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
; GFX10-NEXT: v_sub_f16_e32 v2, v0, v4
; GFX10-CONTRACT-LABEL: test_v4f16_sub_ext_neg_mul:
; GFX10-CONTRACT: ; %bb.0: ; %entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,1,1] neg_hi:[0,1,1]
; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,1,1] neg_hi:[0,1,1]
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
; GFX10-DENORM-LABEL: test_v4f16_sub_ext_neg_mul:
; GFX10-DENORM: ; %bb.0: ; %entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
; GFX10-DENORM-NEXT: v_sub_f16_e32 v2, v0, v4
; GFX10-LABEL: test_v4f64_sub_ext_neg_mul:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
; GFX10-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
; GFX10-CONTRACT-LABEL: test_v4f64_sub_ext_neg_mul:
; GFX10-CONTRACT: ; %bb.0: ; %entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], -v[16:17]
; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], -v[18:19]
; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], -v[4:5], v[12:13], -v[20:21]
; GFX10-DENORM-LABEL: test_v4f64_sub_ext_neg_mul:
; GFX10-DENORM: ; %bb.0: ; %entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
; GFX10-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11]
; GFX10-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13]
; GFX10-LABEL: func_dynamic_stackalloc_sgpr_align4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s6, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_mov_b32_e32 v1, s4
; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_dynamic_stackalloc_sgpr_align4:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s2, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_add_i32 s32, s32, 16
; GFX11-NEXT: s_add_u32 s0, s32, s0
; GFX11-NEXT: s_add_i32 s32, s32, -16
; GFX11-NEXT: scratch_store_b32 off, v0, s0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%n = load i32, ptr addrspace(4) @gv, align 4
%alloca = alloca i32, i32 %n, addrspace(5)
; GFX10-LABEL: func_dynamic_stackalloc_sgpr_align16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s6, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_mov_b32_e32 v1, s4
; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_dynamic_stackalloc_sgpr_align16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s2, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_add_i32 s32, s32, 16
; GFX11-NEXT: s_add_u32 s0, s32, s0
; GFX11-NEXT: s_add_i32 s32, s32, -16
; GFX11-NEXT: scratch_store_b32 off, v0, s0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%n = load i32, ptr addrspace(4) @gv, align 16
%alloca = alloca i32, i32 %n, addrspace(5)
; GFX10-LABEL: func_dynamic_stackalloc_sgpr_align32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s6, s33
; GFX10-NEXT: s_add_i32 s33, s32, 0x3e0
; GFX10-NEXT: s_addk_i32 s32, 0x800
; GFX10-NEXT: s_and_b32 s4, s4, 0xfffffc00
; GFX10-NEXT: v_mov_b32_e32 v1, s4
; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_dynamic_stackalloc_sgpr_align32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s2, s33
; GFX11-NEXT: s_add_i32 s33, s32, 31
; GFX11-NEXT: s_add_i32 s32, s32, 64
; GFX11-NEXT: s_addk_i32 s32, 0xffc0
; GFX11-NEXT: s_and_b32 s0, s0, 0xfffffc00
; GFX11-NEXT: scratch_store_b32 off, v0, s0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%n = load i32, ptr addrspace(4) @gv
%alloca = alloca i32, i32 %n, align 32, addrspace(5)
; GFX10-LABEL: extractelement_vgpr_v4i128_vgpr_idx:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: global_load_dwordx4 v[12:15], v[0:1], off
; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
; GFX11-LABEL: extractelement_vgpr_v4i128_vgpr_idx:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: global_load_b128 v[16:19], v[0:1], off
; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16
; GFX10-LABEL: extractelement_vgpr_v4i128_idx0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: extractelement_vgpr_v4i128_idx0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: extractelement_vgpr_v4i128_idx1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: extractelement_vgpr_v4i128_idx1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: extractelement_vgpr_v4i128_idx2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:32
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: extractelement_vgpr_v4i128_idx2:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:32
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: extractelement_vgpr_v4i128_idx3:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:48
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: extractelement_vgpr_v4i128_idx3:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:48
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: extractelement_vgpr_v4i16_vgpr_idx:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 1, v2
; GFX10-NEXT: v_and_b32_e32 v2, 1, v2
; GFX11-LABEL: extractelement_vgpr_v4i16_vgpr_idx:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 1, v2
; GFX11-NEXT: v_and_b32_e32 v2, 1, v2
; GFX10-LABEL: extractelement_vgpr_v4i16_idx0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: extractelement_vgpr_v4i16_idx0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: extractelement_vgpr_v4i16_idx1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11-LABEL: extractelement_vgpr_v4i16_idx1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX10-LABEL: extractelement_vgpr_v4i16_idx2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, v1
; GFX11-LABEL: extractelement_vgpr_v4i16_idx2:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, v1
; GFX10-LABEL: extractelement_vgpr_v4i16_idx3:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v1
; GFX11-LABEL: extractelement_vgpr_v4i16_idx3:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v1
; GFX10-LABEL: extractelement_vgpr_v8i16_vgpr_idx:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[3:6], v[0:1], off
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v2
; GFX10-NEXT: v_and_b32_e32 v2, 1, v2
; GFX11-LABEL: extractelement_vgpr_v8i16_vgpr_idx:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[3:6], v[0:1], off
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v2
; GFX11-NEXT: v_and_b32_e32 v2, 1, v2
; GFX10-LABEL: extractelement_vgpr_v8i16_idx0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: extractelement_vgpr_v8i16_idx0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: extractelement_vgpr_v8i16_idx1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11-LABEL: extractelement_vgpr_v8i16_idx1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX10-LABEL: extractelement_vgpr_v8i16_idx2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, v1
; GFX11-LABEL: extractelement_vgpr_v8i16_idx2:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, v1
; GFX10-LABEL: extractelement_vgpr_v8i16_idx3:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v1
; GFX11-LABEL: extractelement_vgpr_v8i16_idx3:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v1
; GFX10-LABEL: extractelement_vgpr_v8i16_idx4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, v2
; GFX11-LABEL: extractelement_vgpr_v8i16_idx4:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, v2
; GFX10-LABEL: extractelement_vgpr_v8i16_idx5:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v2
; GFX11-LABEL: extractelement_vgpr_v8i16_idx5:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v2
; GFX10-LABEL: extractelement_vgpr_v8i16_idx6:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, v3
; GFX11-LABEL: extractelement_vgpr_v8i16_idx6:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, v3
; GFX10-LABEL: extractelement_vgpr_v8i16_idx7:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v3
; GFX11-LABEL: extractelement_vgpr_v8i16_idx7:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v3
; GFX10-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: v_and_b32_e32 v1, 3, v2
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 3, v1
; GFX11-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: v_and_b32_e32 v1, 3, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX10-LABEL: extractelement_vgpr_v4i8_idx0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: extractelement_vgpr_v4i8_idx0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: extractelement_vgpr_v4i8_idx1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0
; GFX11-LABEL: extractelement_vgpr_v4i8_idx1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v0
; GFX10-LABEL: extractelement_vgpr_v4i8_idx2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11-LABEL: extractelement_vgpr_v4i8_idx2:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX10-LABEL: extractelement_vgpr_v4i8_idx3:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0
; GFX11-LABEL: extractelement_vgpr_v4i8_idx3:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v0
; GFX10-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 2, v2
; GFX10-NEXT: v_and_b32_e32 v2, 3, v2
; GFX11-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 2, v2
; GFX11-NEXT: v_and_b32_e32 v2, 3, v2
; GFX10-LABEL: extractelement_vgpr_v8i8_idx0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: extractelement_vgpr_v8i8_idx0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: extractelement_vgpr_v8i8_idx1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0
; GFX11-LABEL: extractelement_vgpr_v8i8_idx1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v0
; GFX10-LABEL: extractelement_vgpr_v8i8_idx2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11-LABEL: extractelement_vgpr_v8i8_idx2:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX10-LABEL: extractelement_vgpr_v8i8_idx3:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0
; GFX11-LABEL: extractelement_vgpr_v8i8_idx3:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v0
; GFX10-LABEL: extractelement_vgpr_v8i8_idx4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, v1
; GFX11-LABEL: extractelement_vgpr_v8i8_idx4:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, v1
; GFX10-LABEL: extractelement_vgpr_v8i8_idx5:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v1
; GFX11-LABEL: extractelement_vgpr_v8i8_idx5:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v1
; GFX10-LABEL: extractelement_vgpr_v8i8_idx6:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v1
; GFX11-LABEL: extractelement_vgpr_v8i8_idx6:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v1
; GFX10-LABEL: extractelement_vgpr_v8i8_idx7:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v1
; GFX11-LABEL: extractelement_vgpr_v8i8_idx7:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v1
; GFX10-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[3:6], v[0:1], off
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 2, v2
; GFX10-NEXT: v_and_b32_e32 v2, 3, v2
; GFX11-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[3:6], v[0:1], off
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 2, v2
; GFX11-NEXT: v_and_b32_e32 v2, 3, v2
; GFX10-LABEL: extractelement_vgpr_v16i8_idx0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: extractelement_vgpr_v16i8_idx0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: extractelement_vgpr_v16i8_idx1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0
; GFX11-LABEL: extractelement_vgpr_v16i8_idx1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v0
; GFX10-LABEL: extractelement_vgpr_v16i8_idx2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11-LABEL: extractelement_vgpr_v16i8_idx2:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX10-LABEL: extractelement_vgpr_v16i8_idx3:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0
; GFX11-LABEL: extractelement_vgpr_v16i8_idx3:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v0
; GFX10-LABEL: extractelement_vgpr_v16i8_idx4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, v1
; GFX11-LABEL: extractelement_vgpr_v16i8_idx4:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, v1
; GFX10-LABEL: extractelement_vgpr_v16i8_idx5:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v1
; GFX11-LABEL: extractelement_vgpr_v16i8_idx5:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v1
; GFX10-LABEL: extractelement_vgpr_v16i8_idx6:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v1
; GFX11-LABEL: extractelement_vgpr_v16i8_idx6:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v1
; GFX10-LABEL: extractelement_vgpr_v16i8_idx7:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v1
; GFX11-LABEL: extractelement_vgpr_v16i8_idx7:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v1
; GFX10-LABEL: extractelement_vgpr_v16i8_idx8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, v2
; GFX11-LABEL: extractelement_vgpr_v16i8_idx8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, v2
; GFX10-LABEL: extractelement_vgpr_v16i8_idx9:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v2
; GFX11-LABEL: extractelement_vgpr_v16i8_idx9:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v2
; GFX10-LABEL: extractelement_vgpr_v16i8_idx10:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v2
; GFX11-LABEL: extractelement_vgpr_v16i8_idx10:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v2
; GFX10-LABEL: extractelement_vgpr_v16i8_idx11:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v2
; GFX11-LABEL: extractelement_vgpr_v16i8_idx11:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v2
; GFX10-LABEL: extractelement_vgpr_v16i8_idx12:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, v3
; GFX11-LABEL: extractelement_vgpr_v16i8_idx12:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, v3
; GFX10-LABEL: extractelement_vgpr_v16i8_idx13:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v3
; GFX11-LABEL: extractelement_vgpr_v16i8_idx13:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 8, v3
; GFX10-LABEL: extractelement_vgpr_v16i8_idx14:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v3
; GFX11-LABEL: extractelement_vgpr_v16i8_idx14:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v3
; GFX10-LABEL: extractelement_vgpr_v16i8_idx15:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v3
; GFX11-LABEL: extractelement_vgpr_v16i8_idx15:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v3
; GFX10PLUS-LABEL: dyn_extract_v8f32_const_s_v:
; GFX10PLUS: ; %bb.0: ; %entry
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
; GFX10PLUS-LABEL: dyn_extract_v8f32_v_v:
; GFX10PLUS: ; %bb.0: ; %entry
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8
; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8
; GFX10-LABEL: dyn_extract_v8i64_const_s_v:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b64 s[6:7], 2
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GFX10-NEXT: v_mov_b32_e32 v1, s6
; GFX11-LABEL: dyn_extract_v8i64_const_s_v:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b64 s[2:3], 2
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GFX11-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s3
; GFX10-LABEL: dyn_extract_v8i64_v_v:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX11-LABEL: dyn_extract_v8i64_v_v:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16
; GFX10PLUS-LABEL: dyn_extract_v8f32_v_v_offset3:
; GFX10PLUS: ; %bb.0: ; %entry
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_nc_u32_e32 v8, 3, v8
; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8
; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX10-LABEL: dyn_extract_v8f64_v_v_offset3:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v16, 3, v16
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX11-LABEL: dyn_extract_v8f64_v_v_offset3:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_add_nc_u32_e32 v16, 3, v16
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2
; GFX10PLUS-LABEL: dyn_extract_v8p3_v_v:
; GFX10PLUS: ; %bb.0: ; %entry
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8
; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8
; GFX10-LABEL: dyn_extract_v8p1_v_v:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX11-LABEL: dyn_extract_v8p1_v_v:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16
; GFX10PLUS-LABEL: dyn_extract_v6f32_v_v:
; GFX10PLUS: ; %bb.0: ; %entry
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v6
; GFX10PLUS-LABEL: dyn_extract_v7f32_v_v:
; GFX10PLUS: ; %bb.0: ; %entry
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v7
; GFX10-LABEL: dyn_extract_v6f64_v_v:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX11-LABEL: dyn_extract_v6f64_v_v:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12
; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12
; GFX10-LABEL: dyn_extract_v7f64_v_v:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX11-LABEL: dyn_extract_v7f64_v_v:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14
; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v14
; GFX10-LABEL: dyn_extract_v15f32_const_s_v:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
; GFX11-LABEL: dyn_extract_v15f32_const_s_v:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0
; GFX10-LABEL: dyn_extract_v15f32_v_v:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15
; GFX11-LABEL: dyn_extract_v15f32_v_v:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15
; GFX10-LABEL: dyn_extract_v15f32_v_v_offset3:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v15, 3, v15
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX11-LABEL: dyn_extract_v15f32_v_v_offset3:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_add_nc_u32_e32 v15, 3, v15
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX10-LABEL: v_extract_v64i32_7:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, v7
; GFX11-LABEL: v_extract_v64i32_7:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, v7
; GFX10-LABEL: v_extract_v64i32_32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_extract_v64i32_32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_extract_v64i32_33:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, v1
; GFX11-LABEL: v_extract_v64i32_33:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, v1
; GFX10-LABEL: v_extract_v64i32_37:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:144
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, v5
; GFX11-LABEL: v_extract_v64i32_37:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off offset:144
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, v5
; GFX10-LABEL: v_fdiv_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v1
; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v0
; GFX10-NEXT: v_rcp_f32_e32 v2, v2
; GFX11-LABEL: v_fdiv_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v1
; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v0
; GFX11-NEXT: v_rcp_f32_e32 v2, v2
; GFX10-LABEL: v_fdiv_f16_afn:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_rcp_f16_e32 v1, v1
; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_fdiv_f16_afn:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_rcp_f16_e32 v1, v1
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10-LABEL: v_fdiv_f16_ulp25:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v1
; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v0
; GFX10-NEXT: v_rcp_f32_e32 v2, v2
; GFX11-LABEL: v_fdiv_f16_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v1
; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v0
; GFX11-NEXT: v_rcp_f32_e32 v2, v2
; GFX10-LABEL: v_rcp_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v0
; GFX10-NEXT: v_cvt_f32_f16_e32 v2, 1.0
; GFX10-NEXT: v_rcp_f32_e32 v1, v1
; GFX11-LABEL: v_rcp_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v0
; GFX11-NEXT: v_cvt_f32_f16_e32 v2, 1.0
; GFX11-NEXT: v_rcp_f32_e32 v1, v1
; GFX10-LABEL: v_rcp_f16_arcp:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v0
; GFX10-NEXT: v_cvt_f32_f16_e32 v2, 1.0
; GFX10-NEXT: v_rcp_f32_e32 v1, v1
; GFX11-LABEL: v_rcp_f16_arcp:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v0
; GFX11-NEXT: v_cvt_f32_f16_e32 v2, 1.0
; GFX11-NEXT: v_rcp_f32_e32 v1, v1
; GFX10PLUS-LABEL: v_rcp_f16_arcp_afn:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_rcp_f16_e32 v0, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv arcp afn half 1.0, %x
; GFX10PLUS-LABEL: v_rcp_f16_ulp25:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_rcp_f16_e32 v0, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv half 1.0, %x, !fpmath !0
; GFX10-LABEL: v_fdiv_f16_afn_ulp25:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_rcp_f16_e32 v1, v1
; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_fdiv_f16_afn_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_rcp_f16_e32 v1, v1
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10-LABEL: v_fdiv_f16_arcp_ulp25:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v1
; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v0
; GFX10-NEXT: v_rcp_f32_e32 v2, v2
; GFX11-LABEL: v_fdiv_f16_arcp_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v1
; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v0
; GFX11-NEXT: v_rcp_f32_e32 v2, v2
; GFX10-LABEL: v_fdiv_v2f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v1
; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v0
; GFX11-LABEL: v_fdiv_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v1
; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v0
; GFX10-LABEL: v_fdiv_v2f16_afn:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_rcp_f16_e32 v2, v1
; GFX10-NEXT: v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX10-NEXT: v_mul_f16_e32 v2, v0, v2
; GFX11-LABEL: v_fdiv_v2f16_afn:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX11-NEXT: v_rcp_f16_e32 v1, v1
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX10-LABEL: v_fdiv_v2f16_ulp25:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v1
; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v0
; GFX11-LABEL: v_fdiv_v2f16_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v1
; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v0
; GFX10-LABEL: v_rcp_v2f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v0
; GFX10-NEXT: v_cvt_f32_f16_e32 v4, 1.0
; GFX11-LABEL: v_rcp_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v0
; GFX11-NEXT: v_cvt_f32_f16_e32 v4, 1.0
; GFX10-LABEL: v_rcp_v2f16_arcp:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v0
; GFX10-NEXT: v_cvt_f32_f16_e32 v4, 1.0
; GFX11-LABEL: v_rcp_v2f16_arcp:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v0
; GFX11-NEXT: v_cvt_f32_f16_e32 v4, 1.0
; GFX10-LABEL: v_rcp_v2f16_arcp_afn:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_rcp_f16_e32 v1, v0
; GFX10-NEXT: v_rcp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX10-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX11-LABEL: v_rcp_v2f16_arcp_afn:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX11-NEXT: v_rcp_f16_e32 v0, v0
; GFX11-NEXT: v_rcp_f16_e32 v1, v1
; GFX10-LABEL: v_rcp_v2f16_ulp25:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_rcp_f16_e32 v1, v0
; GFX10-NEXT: v_rcp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX10-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX11-LABEL: v_rcp_v2f16_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX11-NEXT: v_rcp_f16_e32 v0, v0
; GFX11-NEXT: v_rcp_f16_e32 v1, v1
; GFX10-LABEL: v_fdiv_v2f16_afn_ulp25:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_rcp_f16_e32 v2, v1
; GFX10-NEXT: v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX10-NEXT: v_mul_f16_e32 v2, v0, v2
; GFX11-LABEL: v_fdiv_v2f16_afn_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX11-NEXT: v_rcp_f16_e32 v1, v1
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX10-LABEL: v_fdiv_v2f16_arcp_ulp25:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v1
; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v0
; GFX11-LABEL: v_fdiv_v2f16_arcp_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v1
; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v0
; GFX10-LABEL: v_fdiv_v2f16_arcp_afn_ulp25:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_rcp_f16_e32 v2, v1
; GFX10-NEXT: v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX10-NEXT: v_mul_f16_e32 v2, v0, v2
; GFX11-LABEL: v_fdiv_v2f16_arcp_afn_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX11-NEXT: v_rcp_f16_e32 v1, v1
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX10-IEEE-LABEL: v_fdiv_f32:
; GFX10-IEEE: ; %bb.0:
; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-IEEE-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0
; GFX10-IEEE-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0
; GFX10-IEEE-NEXT: v_rcp_f32_e32 v3, v2
; GFX10-FLUSH-LABEL: v_fdiv_f32:
; GFX10-FLUSH: ; %bb.0:
; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-FLUSH-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0
; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0
; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v3, v2
; GFX11-IEEE-LABEL: v_fdiv_f32:
; GFX11-IEEE: ; %bb.0:
; GFX11-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-IEEE-NEXT: v_div_scale_f32 v2, null, v1, v1, v0
; GFX11-IEEE-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0
; GFX11-IEEE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-FLUSH-LABEL: v_fdiv_f32:
; GFX11-FLUSH: ; %bb.0:
; GFX11-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FLUSH-NEXT: v_div_scale_f32 v2, null, v1, v1, v0
; GFX11-FLUSH-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0
; GFX11-FLUSH-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX10-LABEL: v_fdiv_f32_afn:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_rcp_f32_e32 v1, v1
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_fdiv_f32_afn:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_rcp_f32_e32 v1, v1
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10-IEEE-LABEL: v_fdiv_f32_ulp25:
; GFX10-IEEE: ; %bb.0:
; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-IEEE-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0
; GFX10-IEEE-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0
; GFX10-IEEE-NEXT: v_rcp_f32_e32 v3, v2
; GFX10-FLUSH-LABEL: v_fdiv_f32_ulp25:
; GFX10-FLUSH: ; %bb.0:
; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-FLUSH-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v1|
; GFX10-FLUSH-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s4
; GFX10-FLUSH-NEXT: v_mul_f32_e32 v1, v1, v2
; GFX11-IEEE-LABEL: v_fdiv_f32_ulp25:
; GFX11-IEEE: ; %bb.0:
; GFX11-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-IEEE-NEXT: v_div_scale_f32 v2, null, v1, v1, v0
; GFX11-IEEE-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0
; GFX11-IEEE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-FLUSH-LABEL: v_fdiv_f32_ulp25:
; GFX11-FLUSH: ; %bb.0:
; GFX11-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FLUSH-NEXT: v_cmp_lt_f32_e64 s0, 0x6f800000, |v1|
; GFX11-FLUSH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-FLUSH-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s0
; GFX10-IEEE-LABEL: v_rcp_f32:
; GFX10-IEEE: ; %bb.0:
; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-IEEE-NEXT: v_div_scale_f32 v1, s4, v0, v0, 1.0
; GFX10-IEEE-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0
; GFX10-IEEE-NEXT: v_rcp_f32_e32 v2, v1
; GFX10-FLUSH-LABEL: v_rcp_f32:
; GFX10-FLUSH: ; %bb.0:
; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-FLUSH-NEXT: v_div_scale_f32 v1, s4, v0, v0, 1.0
; GFX10-FLUSH-NEXT: v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0
; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v2, v1
; GFX11-IEEE-LABEL: v_rcp_f32:
; GFX11-IEEE: ; %bb.0:
; GFX11-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-IEEE-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0
; GFX11-IEEE-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0
; GFX11-IEEE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-FLUSH-LABEL: v_rcp_f32:
; GFX11-FLUSH: ; %bb.0:
; GFX11-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FLUSH-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0
; GFX11-FLUSH-NEXT: v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0
; GFX11-FLUSH-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX10-IEEE-LABEL: v_rcp_f32_arcp:
; GFX10-IEEE: ; %bb.0:
; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-IEEE-NEXT: v_div_scale_f32 v1, s4, v0, v0, 1.0
; GFX10-IEEE-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0
; GFX10-IEEE-NEXT: v_rcp_f32_e32 v2, v1
; GFX10-FLUSH-LABEL: v_rcp_f32_arcp:
; GFX10-FLUSH: ; %bb.0:
; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-FLUSH-NEXT: v_div_scale_f32 v1, s4, v0, v0, 1.0
; GFX10-FLUSH-NEXT: v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0
; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v2, v1
; GFX11-IEEE-LABEL: v_rcp_f32_arcp:
; GFX11-IEEE: ; %bb.0:
; GFX11-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-IEEE-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0
; GFX11-IEEE-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0
; GFX11-IEEE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-FLUSH-LABEL: v_rcp_f32_arcp:
; GFX11-FLUSH: ; %bb.0:
; GFX11-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FLUSH-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0
; GFX11-FLUSH-NEXT: v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0
; GFX11-FLUSH-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX10-LABEL: v_rcp_f32_arcp_afn:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_rcp_f32_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_rcp_f32_arcp_afn:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_rcp_f32_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv arcp afn float 1.0, %x
; GFX10-IEEE-LABEL: v_rcp_f32_ulp25:
; GFX10-IEEE: ; %bb.0:
; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-IEEE-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v0|
; GFX10-IEEE-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x2f800000, s4
; GFX10-IEEE-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10-FLUSH-LABEL: v_rcp_f32_ulp25:
; GFX10-FLUSH: ; %bb.0:
; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v0, v0
; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-IEEE-LABEL: v_rcp_f32_ulp25:
; GFX11-IEEE: ; %bb.0:
; GFX11-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-IEEE-NEXT: v_cmp_lt_f32_e64 s0, 0x6f800000, |v0|
; GFX11-IEEE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-IEEE-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x2f800000, s0
; GFX11-FLUSH-LABEL: v_rcp_f32_ulp25:
; GFX11-FLUSH: ; %bb.0:
; GFX11-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FLUSH-NEXT: v_rcp_f32_e32 v0, v0
; GFX11-FLUSH-NEXT: s_setpc_b64 s[30:31]
%fdiv = fdiv float 1.0, %x, !fpmath !0
; GFX10-LABEL: v_fdiv_f32_afn_ulp25:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_rcp_f32_e32 v1, v1
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_fdiv_f32_afn_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_rcp_f32_e32 v1, v1
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10-IEEE-LABEL: v_fdiv_f32_arcp_ulp25:
; GFX10-IEEE: ; %bb.0:
; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-IEEE-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0
; GFX10-IEEE-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0
; GFX10-IEEE-NEXT: v_rcp_f32_e32 v3, v2
; GFX10-FLUSH-LABEL: v_fdiv_f32_arcp_ulp25:
; GFX10-FLUSH: ; %bb.0:
; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-FLUSH-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v1|
; GFX10-FLUSH-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s4
; GFX10-FLUSH-NEXT: v_mul_f32_e32 v1, v1, v2
; GFX11-IEEE-LABEL: v_fdiv_f32_arcp_ulp25:
; GFX11-IEEE: ; %bb.0:
; GFX11-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-IEEE-NEXT: v_div_scale_f32 v2, null, v1, v1, v0
; GFX11-IEEE-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0
; GFX11-IEEE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-FLUSH-LABEL: v_fdiv_f32_arcp_ulp25:
; GFX11-FLUSH: ; %bb.0:
; GFX11-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FLUSH-NEXT: v_cmp_lt_f32_e64 s0, 0x6f800000, |v1|
; GFX11-FLUSH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-FLUSH-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s0
; GFX10-IEEE-LABEL: v_fdiv_v2f32:
; GFX10-IEEE: ; %bb.0:
; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-IEEE-NEXT: v_div_scale_f32 v4, s4, v2, v2, v0
; GFX10-IEEE-NEXT: v_div_scale_f32 v5, s4, v3, v3, v1
; GFX10-IEEE-NEXT: v_div_scale_f32 v10, vcc_lo, v0, v2, v0
; GFX10-FLUSH-LABEL: v_fdiv_v2f32:
; GFX10-FLUSH: ; %bb.0:
; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, s4, v2, v2, v0
; GFX10-FLUSH-NEXT: v_div_scale_f32 v6, vcc_lo, v0, v2, v0
; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v5, v4
; GFX11-IEEE-LABEL: v_fdiv_v2f32:
; GFX11-IEEE: ; %bb.0:
; GFX11-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-IEEE-NEXT: v_div_scale_f32 v4, null, v2, v2, v0
; GFX11-IEEE-NEXT: v_div_scale_f32 v5, null, v3, v3, v1
; GFX11-IEEE-NEXT: v_div_scale_f32 v10, vcc_lo, v0, v2, v0
; GFX11-FLUSH-LABEL: v_fdiv_v2f32:
; GFX11-FLUSH: ; %bb.0:
; GFX11-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FLUSH-NEXT: v_div_scale_f32 v4, null, v2, v2, v0
; GFX11-FLUSH-NEXT: v_div_scale_f32 v6, vcc_lo, v0, v2, v0
; GFX11-FLUSH-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX10-LABEL: v_fdiv_v2f32_afn:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_rcp_f32_e32 v2, v2
; GFX10-NEXT: v_rcp_f32_e32 v3, v3
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2
; GFX11-LABEL: v_fdiv_v2f32_afn:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_rcp_f32_e32 v2, v2
; GFX11-NEXT: v_rcp_f32_e32 v3, v3
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX10-IEEE-LABEL: v_fdiv_v2f32_ulp25:
; GFX10-IEEE: ; %bb.0:
; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-IEEE-NEXT: v_div_scale_f32 v4, s4, v2, v2, v0
; GFX10-IEEE-NEXT: v_div_scale_f32 v5, s4, v3, v3, v1
; GFX10-IEEE-NEXT: v_div_scale_f32 v10, vcc_lo, v0, v2, v0
; GFX10-FLUSH-LABEL: v_fdiv_v2f32_ulp25:
; GFX10-FLUSH: ; %bb.0:
; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-FLUSH-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v2|
; GFX10-FLUSH-NEXT: v_cndmask_b32_e64 v4, 1.0, 0x2f800000, s4
; GFX10-FLUSH-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v3|
; GFX11-IEEE-LABEL: v_fdiv_v2f32_ulp25:
; GFX11-IEEE: ; %bb.0:
; GFX11-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-IEEE-NEXT: v_div_scale_f32 v4, null, v2, v2, v0
; GFX11-IEEE-NEXT: v_div_scale_f32 v5, null, v3, v3, v1
; GFX11-IEEE-NEXT: v_div_scale_f32 v10, vcc_lo, v0, v2, v0
; GFX11-FLUSH-LABEL: v_fdiv_v2f32_ulp25:
; GFX11-FLUSH: ; %bb.0:
; GFX11-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FLUSH-NEXT: v_cmp_lt_f32_e64 s0, 0x6f800000, |v2|
; GFX11-FLUSH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-FLUSH-NEXT: v_cndmask_b32_e64 v4, 1.0, 0x2f800000, s0
; GFX10-IEEE-LABEL: v_rcp_v2f32:
; GFX10-IEEE: ; %bb.0:
; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-IEEE-NEXT: v_div_scale_f32 v2, s4, v0, v0, 1.0
; GFX10-IEEE-NEXT: v_div_scale_f32 v3, s4, v1, v1, 1.0
; GFX10-IEEE-NEXT: v_div_scale_f32 v8, vcc_lo, 1.0, v0, 1.0
; GFX10-FLUSH-LABEL: v_rcp_v2f32:
; GFX10-FLUSH: ; %bb.0:
; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-FLUSH-NEXT: v_div_scale_f32 v2, s4, v0, v0, 1.0
; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0
; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v3, v2
; GFX11-IEEE-LABEL: v_rcp_v2f32:
; GFX11-IEEE: ; %bb.0:
; GFX11-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-IEEE-NEXT: v_div_scale_f32 v2, null, v0, v0, 1.0
; GFX11-IEEE-NEXT: v_div_scale_f32 v3, null, v1, v1, 1.0
; GFX11-IEEE-NEXT: v_div_scale_f32 v8, vcc_lo, 1.0, v0, 1.0
; GFX11-FLUSH-LABEL: v_rcp_v2f32:
; GFX11-FLUSH: ; %bb.0:
; GFX11-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FLUSH-NEXT: v_div_scale_f32 v2, null, v0, v0, 1.0
; GFX11-FLUSH-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0
; GFX11-FLUSH-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX10-IEEE-LABEL: v_rcp_v2f32_arcp:
; GFX10-IEEE: ; %bb.0:
; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-IEEE-NEXT: v_div_scale_f32 v2, s4, v0, v0, 1.0
; GFX10-IEEE-NEXT: v_div_scale_f32 v3, s4, v1, v1, 1.0
; GFX10-IEEE-NEXT: v_div_scale_f32 v8, vcc_lo, 1.0, v0, 1.0
; GFX10-FLUSH-LABEL: v_rcp_v2f32_arcp:
; GFX10-FLUSH: ; %bb.0:
; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-FLUSH-NEXT: v_div_scale_f32 v2, s4, v0, v0, 1.0
; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0
; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v3, v2
; GFX11-IEEE-LABEL: v_rcp_v2f32_arcp:
; GFX11-IEEE: ; %bb.0:
; GFX11-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-IEEE-NEXT: v_div_scale_f32 v2, null, v0, v0, 1.0
; GFX11-IEEE-NEXT: v_div_scale_f32 v3, null, v1, v1, 1.0
; GFX11-IEEE-NEXT: v_div_scale_f32 v8, vcc_lo, 1.0, v0, 1.0
; GFX11-FLUSH-LABEL: v_rcp_v2f32_arcp:
; GFX11-FLUSH: ; %bb.0:
; GFX11-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FLUSH-NEXT: v_div_scale_f32 v2, null, v0, v0, 1.0
; GFX11-FLUSH-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0
; GFX11-FLUSH-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX10-LABEL: v_rcp_v2f32_arcp_afn:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_rcp_f32_e32 v0, v0
; GFX10-NEXT: v_rcp_f32_e32 v1, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_rcp_v2f32_arcp_afn:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_rcp_f32_e32 v0, v0
; GFX11-NEXT: v_rcp_f32_e32 v1, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-IEEE-LABEL: v_rcp_v2f32_ulp25:
; GFX10-IEEE: ; %bb.0:
; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-IEEE-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v0|
; GFX10-IEEE-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s4
; GFX10-IEEE-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v1|
; GFX10-FLUSH-LABEL: v_rcp_v2f32_ulp25:
; GFX10-FLUSH: ; %bb.0:
; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v0, v0
; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v1, v1
; GFX10-FLUSH-NEXT: s_setpc_b64 s[30:31]
; GFX11-IEEE-LABEL: v_rcp_v2f32_ulp25:
; GFX11-IEEE: ; %bb.0:
; GFX11-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-IEEE-NEXT: v_cmp_lt_f32_e64 s0, 0x6f800000, |v0|
; GFX11-IEEE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-IEEE-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s0
; GFX11-FLUSH-LABEL: v_rcp_v2f32_ulp25:
; GFX11-FLUSH: ; %bb.0:
; GFX11-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FLUSH-NEXT: v_rcp_f32_e32 v0, v0
; GFX11-FLUSH-NEXT: v_rcp_f32_e32 v1, v1
; GFX11-FLUSH-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_fdiv_v2f32_afn_ulp25:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_rcp_f32_e32 v2, v2
; GFX10-NEXT: v_rcp_f32_e32 v3, v3
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2
; GFX11-LABEL: v_fdiv_v2f32_afn_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_rcp_f32_e32 v2, v2
; GFX11-NEXT: v_rcp_f32_e32 v3, v3
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX10-IEEE-LABEL: v_fdiv_v2f32_arcp_ulp25:
; GFX10-IEEE: ; %bb.0:
; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-IEEE-NEXT: v_div_scale_f32 v4, s4, v2, v2, v0
; GFX10-IEEE-NEXT: v_div_scale_f32 v5, s4, v3, v3, v1
; GFX10-IEEE-NEXT: v_div_scale_f32 v10, vcc_lo, v0, v2, v0
; GFX10-FLUSH-LABEL: v_fdiv_v2f32_arcp_ulp25:
; GFX10-FLUSH: ; %bb.0:
; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-FLUSH-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v2|
; GFX10-FLUSH-NEXT: v_cndmask_b32_e64 v4, 1.0, 0x2f800000, s4
; GFX10-FLUSH-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v3|
; GFX11-IEEE-LABEL: v_fdiv_v2f32_arcp_ulp25:
; GFX11-IEEE: ; %bb.0:
; GFX11-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-IEEE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-IEEE-NEXT: v_div_scale_f32 v4, null, v2, v2, v0
; GFX11-IEEE-NEXT: v_div_scale_f32 v5, null, v3, v3, v1
; GFX11-IEEE-NEXT: v_div_scale_f32 v10, vcc_lo, v0, v2, v0
; GFX11-FLUSH-LABEL: v_fdiv_v2f32_arcp_ulp25:
; GFX11-FLUSH: ; %bb.0:
; GFX11-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FLUSH-NEXT: v_cmp_lt_f32_e64 s0, 0x6f800000, |v2|
; GFX11-FLUSH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-FLUSH-NEXT: v_cndmask_b32_e64 v4, 1.0, 0x2f800000, s0
; GFX10-LABEL: v_fdiv_v2f32_arcp_afn_ulp25:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_rcp_f32_e32 v2, v2
; GFX10-NEXT: v_rcp_f32_e32 v3, v3
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2
; GFX11-LABEL: v_fdiv_v2f32_arcp_afn_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_rcp_f32_e32 v2, v2
; GFX11-NEXT: v_rcp_f32_e32 v3, v3
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX10-LABEL: v_fdiv_f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[2:3], v[2:3], v[0:1]
; GFX10-NEXT: v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1]
; GFX10-NEXT: v_rcp_f64_e32 v[6:7], v[4:5]
; GFX11-LABEL: v_fdiv_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_div_scale_f64 v[4:5], null, v[2:3], v[2:3], v[0:1]
; GFX11-NEXT: v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX10-LABEL: v_fdiv_f64_afn:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; GFX10-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; GFX10-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[4:5]
; GFX11-LABEL: v_fdiv_f64_afn:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; GFX10-LABEL: v_fdiv_f64_ulp25:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[2:3], v[2:3], v[0:1]
; GFX10-NEXT: v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1]
; GFX10-NEXT: v_rcp_f64_e32 v[6:7], v[4:5]
; GFX11-LABEL: v_fdiv_f64_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_div_scale_f64 v[4:5], null, v[2:3], v[2:3], v[0:1]
; GFX11-NEXT: v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX10-LABEL: v_rcp_f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_div_scale_f64 v[2:3], s4, v[0:1], v[0:1], 1.0
; GFX10-NEXT: v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0
; GFX10-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; GFX11-LABEL: v_rcp_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_div_scale_f64 v[2:3], null, v[0:1], v[0:1], 1.0
; GFX11-NEXT: v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX10-LABEL: v_rcp_f64_arcp:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_div_scale_f64 v[2:3], s4, v[0:1], v[0:1], 1.0
; GFX10-NEXT: v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0
; GFX10-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; GFX11-LABEL: v_rcp_f64_arcp:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_div_scale_f64 v[2:3], null, v[0:1], v[0:1], 1.0
; GFX11-NEXT: v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX10-LABEL: v_rcp_f64_arcp_afn:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
; GFX10-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; GFX10-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; GFX11-LABEL: v_rcp_f64_arcp_afn:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; GFX10-LABEL: v_rcp_f64_ulp25:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_div_scale_f64 v[2:3], s4, v[0:1], v[0:1], 1.0
; GFX10-NEXT: v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0
; GFX10-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; GFX11-LABEL: v_rcp_f64_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_div_scale_f64 v[2:3], null, v[0:1], v[0:1], 1.0
; GFX11-NEXT: v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX10-LABEL: v_fdiv_f64_afn_ulp25:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; GFX10-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; GFX10-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[4:5]
; GFX11-LABEL: v_fdiv_f64_afn_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; GFX10-LABEL: v_fdiv_f64_arcp_ulp25:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[2:3], v[2:3], v[0:1]
; GFX10-NEXT: v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1]
; GFX10-NEXT: v_rcp_f64_e32 v[6:7], v[4:5]
; GFX11-LABEL: v_fdiv_f64_arcp_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_div_scale_f64 v[4:5], null, v[2:3], v[2:3], v[0:1]
; GFX11-NEXT: v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX10-LABEL: v_fdiv_v2f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_div_scale_f64 v[8:9], s4, v[4:5], v[4:5], v[0:1]
; GFX10-NEXT: v_div_scale_f64 v[10:11], s4, v[6:7], v[6:7], v[2:3]
; GFX10-NEXT: v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1]
; GFX11-LABEL: v_fdiv_v2f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_div_scale_f64 v[8:9], null, v[4:5], v[4:5], v[0:1]
; GFX11-NEXT: v_div_scale_f64 v[10:11], null, v[6:7], v[6:7], v[2:3]
; GFX11-NEXT: v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1]
; GFX10-LABEL: v_fdiv_v2f64_afn:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_rcp_f64_e32 v[8:9], v[4:5]
; GFX10-NEXT: v_rcp_f64_e32 v[10:11], v[6:7]
; GFX10-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
; GFX11-LABEL: v_fdiv_v2f64_afn:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_rcp_f64_e32 v[8:9], v[4:5]
; GFX11-NEXT: v_rcp_f64_e32 v[10:11], v[6:7]
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX10-LABEL: v_fdiv_v2f64_ulp25:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_div_scale_f64 v[8:9], s4, v[4:5], v[4:5], v[0:1]
; GFX10-NEXT: v_div_scale_f64 v[10:11], s4, v[6:7], v[6:7], v[2:3]
; GFX10-NEXT: v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1]
; GFX11-LABEL: v_fdiv_v2f64_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_div_scale_f64 v[8:9], null, v[4:5], v[4:5], v[0:1]
; GFX11-NEXT: v_div_scale_f64 v[10:11], null, v[6:7], v[6:7], v[2:3]
; GFX11-NEXT: v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1]
; GFX10-LABEL: v_rcp_v2f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[0:1], v[0:1], 1.0
; GFX10-NEXT: v_div_scale_f64 v[6:7], s4, v[2:3], v[2:3], 1.0
; GFX10-NEXT: v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0
; GFX11-LABEL: v_rcp_v2f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_div_scale_f64 v[4:5], null, v[0:1], v[0:1], 1.0
; GFX11-NEXT: v_div_scale_f64 v[6:7], null, v[2:3], v[2:3], 1.0
; GFX11-NEXT: v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0
; GFX10-LABEL: v_rcp_v2f64_arcp:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[0:1], v[0:1], 1.0
; GFX10-NEXT: v_div_scale_f64 v[6:7], s4, v[2:3], v[2:3], 1.0
; GFX10-NEXT: v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0
; GFX11-LABEL: v_rcp_v2f64_arcp:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_div_scale_f64 v[4:5], null, v[0:1], v[0:1], 1.0
; GFX11-NEXT: v_div_scale_f64 v[6:7], null, v[2:3], v[2:3], 1.0
; GFX11-NEXT: v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0
; GFX10-LABEL: v_rcp_v2f64_arcp_afn:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_rcp_f64_e32 v[4:5], v[0:1]
; GFX10-NEXT: v_rcp_f64_e32 v[6:7], v[2:3]
; GFX10-NEXT: v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
; GFX11-LABEL: v_rcp_v2f64_arcp_afn:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_rcp_f64_e32 v[4:5], v[0:1]
; GFX11-NEXT: v_rcp_f64_e32 v[6:7], v[2:3]
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX10-LABEL: v_rcp_v2f64_ulp25:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[0:1], v[0:1], 1.0
; GFX10-NEXT: v_div_scale_f64 v[6:7], s4, v[2:3], v[2:3], 1.0
; GFX10-NEXT: v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0
; GFX11-LABEL: v_rcp_v2f64_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_div_scale_f64 v[4:5], null, v[0:1], v[0:1], 1.0
; GFX11-NEXT: v_div_scale_f64 v[6:7], null, v[2:3], v[2:3], 1.0
; GFX11-NEXT: v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0
; GFX10-LABEL: v_fdiv_v2f64_afn_ulp25:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_rcp_f64_e32 v[8:9], v[4:5]
; GFX10-NEXT: v_rcp_f64_e32 v[10:11], v[6:7]
; GFX10-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
; GFX11-LABEL: v_fdiv_v2f64_afn_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_rcp_f64_e32 v[8:9], v[4:5]
; GFX11-NEXT: v_rcp_f64_e32 v[10:11], v[6:7]
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX10-LABEL: v_fdiv_v2f64_arcp_ulp25:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_div_scale_f64 v[8:9], s4, v[4:5], v[4:5], v[0:1]
; GFX10-NEXT: v_div_scale_f64 v[10:11], s4, v[6:7], v[6:7], v[2:3]
; GFX10-NEXT: v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1]
; GFX11-LABEL: v_fdiv_v2f64_arcp_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_div_scale_f64 v[8:9], null, v[4:5], v[4:5], v[0:1]
; GFX11-NEXT: v_div_scale_f64 v[10:11], null, v[6:7], v[6:7], v[2:3]
; GFX11-NEXT: v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1]
; GFX10-LABEL: v_fdiv_v2f64_arcp_afn_ulp25:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_rcp_f64_e32 v[8:9], v[4:5]
; GFX10-NEXT: v_rcp_f64_e32 v[10:11], v[6:7]
; GFX10-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
; GFX11-LABEL: v_fdiv_v2f64_arcp_afn_ulp25:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_rcp_f64_e32 v[8:9], v[4:5]
; GFX11-NEXT: v_rcp_f64_e32 v[10:11], v[6:7]
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX10-LABEL: store_load_vindex_foo:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v1, 15, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-NEXT: v_mov_b32_e32 v2, 15
; GFX11-LABEL: store_load_vindex_foo:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v0
; GFX11-NEXT: v_and_b32_e32 v0, 15, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX10-LABEL: private_ptr_foo:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v0, 4, v0
; GFX10-NEXT: v_mov_b32_e32 v1, 0x41200000
; GFX10-NEXT: scratch_store_dword v0, v1, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX940-LABEL: private_ptr_foo:
; GFX11-LABEL: private_ptr_foo:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v1, 0x41200000 :: v_dual_add_nc_u32 v0, 4, v0
; GFX11-NEXT: scratch_store_b32 v0, v1, off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr inbounds float, ptr addrspace(5) %arg, i32 1
store float 1.000000e+01, ptr addrspace(5) %gep, align 4
; GFX10-LABEL: store_load_vindex_small_offset_foo:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v1, 15, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-NEXT: s_add_i32 s0, s32, 0x100
; GFX11-LABEL: store_load_vindex_small_offset_foo:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v0
; GFX11-NEXT: v_and_b32_e32 v0, 15, v0
; GFX11-NEXT: s_add_i32 s0, s32, 0x100
; GFX10-LABEL: store_load_vindex_large_offset_foo:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v1, 15, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-NEXT: s_add_i32 s0, s32, 0x4004
; GFX11-LABEL: store_load_vindex_large_offset_foo:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v0
; GFX11-NEXT: v_and_b32_e32 v0, 15, v0
; GFX11-NEXT: s_add_i32 s0, s32, 0x4004
; GFX10-LABEL: store_load_large_imm_offset_foo:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v0, 13
; GFX10-NEXT: v_mov_b32_e32 v1, 15
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
; GFX11-LABEL: store_load_large_imm_offset_foo:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 0x3e80
; GFX11-NEXT: v_mov_b32_e32 v2, 15
; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
; GFX10-LABEL: store_load_i64_aligned:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v1, 15
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX10-NEXT: scratch_store_dwordx2 v0, v[1:2], off
; GFX11-LABEL: store_load_i64_aligned:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v1, 15
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: scratch_store_b64 v0, v[1:2], off dlc
; GFX10-LABEL: store_load_i64_unaligned:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v1, 15
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX10-NEXT: scratch_store_dwordx2 v0, v[1:2], off
; GFX11-LABEL: store_load_i64_unaligned:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v1, 15
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: scratch_store_b64 v0, v[1:2], off dlc
; GFX10-LABEL: store_load_v3i32_unaligned:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s2, 3
; GFX10-NEXT: s_mov_b32 s1, 2
; GFX10-NEXT: s_mov_b32 s0, 1
; GFX11-LABEL: store_load_v3i32_unaligned:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s2, 3
; GFX11-NEXT: s_mov_b32 s1, 2
; GFX11-NEXT: s_mov_b32 s0, 1
; GFX10-LABEL: store_load_v4i32_unaligned:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s3, 4
; GFX10-NEXT: s_mov_b32 s2, 3
; GFX10-NEXT: s_mov_b32 s1, 2
; GFX11-LABEL: store_load_v4i32_unaligned:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 4
; GFX11-NEXT: s_mov_b32 s2, 3
; GFX11-NEXT: s_mov_b32 s1, 2
; GFX10-LABEL: v_fma_f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f32 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fma_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_f32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fma = call float @llvm.fma.f32(float %x, float %y, float %z)
; GFX10-LABEL: v_fma_v2f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f32 v0, v0, v2, v4
; GFX10-NEXT: v_fma_f32 v1, v1, v3, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_fma_v2f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_f32 v0, v0, v2, v4
; GFX11-NEXT: v_fma_f32 v1, v1, v3, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_fma_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fma_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_f16 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fma = call half @llvm.fma.f16(half %x, half %y, half %z)
; GFX10-LABEL: v_fma_f16_fneg_lhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f16 v0, -v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fma_f16_fneg_lhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_f16 v0, -v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.x = fneg half %x
; GFX10-LABEL: v_fma_f16_fneg_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f16 v0, v0, -v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fma_f16_fneg_rhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_f16 v0, v0, -v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.y = fneg half %y
; GFX10-LABEL: v_fma_f16_fneg_add:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f16 v0, v0, v1, -v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fma_f16_fneg_add:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_f16 v0, v0, v1, -v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.z = fneg half %z
; GFX10-LABEL: v_fma_v2f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fma_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z)
; GFX10-LABEL: v_fma_v2f16_fneg_lhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fma_v2f16_fneg_lhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
; GFX11-NEXT: s_setpc_b64 s[30:31]
%x.fneg = fneg <2 x half> %x
; GFX10-LABEL: v_fma_v2f16_fneg_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fma_v2f16_fneg_rhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
; GFX11-NEXT: s_setpc_b64 s[30:31]
%y.fneg = fneg <2 x half> %y
; GFX10-LABEL: v_fma_v2f16_fneg_lhs_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fma_v2f16_fneg_lhs_rhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%x.fneg = fneg <2 x half> %x
; GFX10-LABEL: v_fma_v3f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_fma_f16 v0, v0, v2, v4
; GFX10-NEXT: v_pk_fma_f16 v1, v1, v3, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_fma_v3f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_pk_fma_f16 v0, v0, v2, v4
; GFX11-NEXT: v_pk_fma_f16 v1, v1, v3, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_fma_v4f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_fma_f16 v0, v0, v2, v4
; GFX10-NEXT: v_pk_fma_f16 v1, v1, v3, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_fma_v4f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_pk_fma_f16 v0, v0, v2, v4
; GFX11-NEXT: v_pk_fma_f16 v1, v1, v3, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_fma_f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fma_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fma = call double @llvm.fma.f64(double %x, double %y, double %z)
; GFX10-LABEL: v_fma_f64_fneg_all:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fma_f64_fneg_all:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.x = fneg double %x
; GFX10-LABEL: v_fma_v2f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_fma_v2f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
; GFX11-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_fma_f32_fabs_lhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f32 v0, |v0|, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fma_f32_fabs_lhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_f32 v0, |v0|, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call float @llvm.fabs.f32(float %x)
; GFX10-LABEL: v_fma_f32_fabs_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f32 v0, v0, |v1|, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fma_f32_fabs_rhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_f32 v0, v0, |v1|, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fabs.y = call float @llvm.fabs.f32(float %y)
; GFX10-LABEL: v_fma_f32_fabs_lhs_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f32 v0, |v0|, |v1|, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fma_f32_fabs_lhs_rhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_f32 v0, |v0|, |v1|, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call float @llvm.fabs.f32(float %x)
; GFX10-LABEL: v_fma_f32_fneg_lhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f32 v0, -v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fma_f32_fneg_lhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_f32 v0, -v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.x = fneg float %x
; GFX10-LABEL: v_fma_f32_fneg_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f32 v0, v0, -v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fma_f32_fneg_rhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_f32 v0, v0, -v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.y = fneg float %y
; GFX10-LABEL: v_fma_f32_fneg_z:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f32 v0, v0, v1, -v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fma_f32_fneg_z:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_f32 v0, v0, v1, -v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.z = fneg float %z
; GFX10-LABEL: test_min_max_ValK0_K1_f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_min_max_K0Val_K1_f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_min_K1max_ValK0_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_max_f16_e32 v0, v0, v0
; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: test_min_K1max_K0Val_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_max_min_ValK1_K0_f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_max_min_K1Val_K0_f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_max_K0min_ValK1_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_max_K0min_K1Val_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_min_max_global_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_max_min_global_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_min_max_K0_gt_K1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_max_f32_e32 v0, 4.0, v0
; GFX10-NEXT: v_min_f32_e32 v0, 2.0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: test_max_min_K0_gt_K1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_min_f32_e32 v0, 2.0, v0
; GFX10-NEXT: v_max_f32_e32 v0, 4.0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: test_min_max_non_inline_const:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
; GFX10-NEXT: v_min_f32_e32 v0, 0x41000000, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: test_min_max_f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], 2.0
; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: test_min_max_v2f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_max_f16 v0, v0, 2.0 op_sel_hi:[1,0]
; GFX10-NEXT: v_pk_min_f16 v0, v0, 4.0 op_sel_hi:[1,0]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0
; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0
; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
; GFX10-LABEL: v_fmul_v2f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
%mul = fmul <2 x half> %a, %b
; GFX10-LABEL: v_fmul_v2f16_fneg_lhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg <2 x half> %a
; GFX10-LABEL: v_fmul_v2f16_fneg_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.b = fneg <2 x half> %b
; GFX10-LABEL: v_fmul_v2f16_fneg_lhs_fneg_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg <2 x half> %a
; GFX10-LABEL: v_fmul_v3f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_fmul_v3f16_fneg_lhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[1,0] neg_hi:[1,0]
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[1,0] neg_hi:[1,0]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_fmul_v3f16_fneg_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_fmul_v3f16_fneg_lhs_fneg_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_fmul_v4f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_fmul_v4f16_fneg_lhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[1,0] neg_hi:[1,0]
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[1,0] neg_hi:[1,0]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_fmul_v4f16_fneg_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_fmul_v4f16_fneg_lhs_fneg_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_fmul_v6f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v3
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v4
; GFX10-NEXT: v_pk_mul_f16 v2, v2, v5
; GFX10-LABEL: v_fmul_v6f16_fneg_lhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v3 neg_lo:[1,0] neg_hi:[1,0]
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v4 neg_lo:[1,0] neg_hi:[1,0]
; GFX10-NEXT: v_pk_mul_f16 v2, v2, v5 neg_lo:[1,0] neg_hi:[1,0]
; GFX10-LABEL: v_fmul_v6f16_fneg_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v3 neg_lo:[0,1] neg_hi:[0,1]
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v4 neg_lo:[0,1] neg_hi:[0,1]
; GFX10-NEXT: v_pk_mul_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX10-LABEL: v_fmul_v6f16_fneg_lhs_fneg_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v3
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v4
; GFX10-NEXT: v_pk_mul_f16 v2, v2, v5
; GFX10-LABEL: v_fmul_v8f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v4
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v5
; GFX10-NEXT: v_pk_mul_f16 v2, v2, v6
; GFX10-LABEL: v_fmul_v8f16_fneg_lhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v4 neg_lo:[1,0] neg_hi:[1,0]
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v5 neg_lo:[1,0] neg_hi:[1,0]
; GFX10-NEXT: v_pk_mul_f16 v2, v2, v6 neg_lo:[1,0] neg_hi:[1,0]
; GFX10-LABEL: v_fmul_v8f16_fneg_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1]
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX10-NEXT: v_pk_mul_f16 v2, v2, v6 neg_lo:[0,1] neg_hi:[0,1]
; GFX10-LABEL: v_fmul_v8f16_fneg_lhs_fneg_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v4
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v5
; GFX10-NEXT: v_pk_mul_f16 v2, v2, v6
; GFX10-LABEL: v_pow_f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2
; GFX11-LABEL: v_pow_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX10-LABEL: v_pow_v2f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX10-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, v1
; GFX10-NEXT: v_cndmask_b32_e64 v4, 1.0, 0x4f800000, vcc_lo
; GFX11-LABEL: v_pow_v2f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX11-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, v1
; GFX11-NEXT: v_cndmask_b32_e64 v4, 1.0, 0x4f800000, vcc_lo
; GFX10-LABEL: v_pow_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_log_f16_e32 v0, v0
; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-LABEL: v_pow_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_log_f16_e32 v0, v0
; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX10-LABEL: v_pow_v2f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_log_f16_e32 v2, v0
; GFX10-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v1
; GFX11-LABEL: v_pow_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_log_f16_e32 v2, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX10-LABEL: v_pow_v2f16_fneg_lhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v1
; GFX10-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX11-LABEL: v_pow_v2f16_fneg_lhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX10-LABEL: v_pow_v2f16_fneg_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_log_f16_e32 v2, v0
; GFX10-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
; GFX10-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX11-LABEL: v_pow_v2f16_fneg_rhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_log_f16_e32 v2, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
; GFX10-LABEL: v_pow_v2f16_fneg_lhs_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX10-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
; GFX10-NEXT: v_log_f16_e32 v2, v0
; GFX11-LABEL: v_pow_v2f16_fneg_lhs_rhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX11-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX10-LABEL: v_pow_f32_fabs_lhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, |v0|
; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s4
; GFX10-NEXT: v_mul_f32_e64 v0, |v0|, v2
; GFX11-LABEL: v_pow_f32_fabs_lhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
; GFX10-LABEL: v_pow_f32_fabs_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2
; GFX11-LABEL: v_pow_f32_fabs_rhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX10-LABEL: v_pow_f32_fabs_lhs_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, |v0|
; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s4
; GFX10-NEXT: v_mul_f32_e64 v0, |v0|, v2
; GFX11-LABEL: v_pow_f32_fabs_lhs_rhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
; GFX10-LABEL: v_pow_f32_fneg_lhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, -v0
; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s4
; GFX10-NEXT: v_mul_f32_e64 v0, -v0, v2
; GFX11-LABEL: v_pow_f32_fneg_lhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
; GFX10-LABEL: v_pow_f32_fneg_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2
; GFX11-LABEL: v_pow_f32_fneg_rhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX10-LABEL: v_fshl_i7:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 7
; GFX10-NEXT: v_and_b32_e32 v2, 0x7f, v2
; GFX10-NEXT: v_and_b32_e32 v1, 0x7f, v1
; GFX11-LABEL: v_fshl_i7:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v3, 7
; GFX11-NEXT: v_and_b32_e32 v2, 0x7f, v2
; GFX11-NEXT: v_and_b32_e32 v1, 0x7f, v1
; GFX10-LABEL: v_fshl_i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_not_b32_e32 v3, v2
; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX10-NEXT: v_and_b32_e32 v2, 7, v2
; GFX11-LABEL: v_fshl_i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_not_b32_e32 v3, v2
; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX11-NEXT: v_and_b32_e32 v2, 7, v2
; GFX10-LABEL: v_fshl_i8_4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX10-NEXT: v_lshlrev_b16 v0, 4, v0
; GFX10-NEXT: v_lshrrev_b16 v1, 4, v1
; GFX11-LABEL: v_fshl_i8_4:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX11-NEXT: v_lshlrev_b16 v0, 4, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-LABEL: v_fshl_i8_5:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX10-NEXT: v_lshlrev_b16 v0, 5, v0
; GFX10-NEXT: v_lshrrev_b16 v1, 3, v1
; GFX11-LABEL: v_fshl_i8_5:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX11-NEXT: v_lshlrev_b16 v0, 5, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-LABEL: v_fshl_v2i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v2
; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v1
; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v0
; GFX11-LABEL: v_fshl_v2i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v2
; GFX11-NEXT: v_lshrrev_b32_e32 v4, 8, v1
; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v0
; GFX10-LABEL: v_fshl_v4i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v8, 8, v2
; GFX10-NEXT: v_and_b32_e32 v10, 7, v2
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0
; GFX11-LABEL: v_fshl_v4i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v6, 8, v1
; GFX11-NEXT: v_lshrrev_b32_e32 v9, 8, v2
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v0
; GFX10-LABEL: v_fshl_i24:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2
; GFX10-NEXT: v_bfe_u32 v1, v1, 1, 23
; GFX11-LABEL: v_fshl_i24:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2
; GFX11-NEXT: v_bfe_u32 v1, v1, 1, 23
; GFX10-LABEL: v_fshl_v2i24:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v7, 24
; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4
; GFX11-LABEL: v_fshl_v2i24:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v7, 24
; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5
}
define i32 @v_fshl_i32(i32 %lhs, i32 %rhs, i32 %amt) {
-; GFX6-LABEL: v_fshl_i32:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_alignbit_b32 v1, v0, v1, 1
-; GFX6-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-; GFX6-NEXT: v_not_b32_e32 v2, v2
-; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, v2
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_fshl_i32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_alignbit_b32 v1, v0, v1, 1
-; GFX8-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-; GFX8-NEXT: v_not_b32_e32 v2, v2
-; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, v2
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_fshl_i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_alignbit_b32 v1, v0, v1, 1
-; GFX9-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-; GFX9-NEXT: v_not_b32_e32 v2, v2
-; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, v2
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_fshl_i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_alignbit_b32 v1, v0, v1, 1
-; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-; GFX10-NEXT: v_not_b32_e32 v2, v2
-; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, v2
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_fshl_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_alignbit_b32 v1, v0, v1, 1
+; GCN-NEXT: v_lshrrev_b32_e32 v0, 1, v0
+; GCN-NEXT: v_not_b32_e32 v2, v2
+; GCN-NEXT: v_alignbit_b32 v0, v0, v1, v2
+; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fshl_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_alignbit_b32 v1, v0, v1, 1
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v0
; GFX11-NEXT: v_not_b32_e32 v2, v2
}
define i32 @v_fshl_i32_5(i32 %lhs, i32 %rhs) {
-; GFX6-LABEL: v_fshl_i32_5:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, -5
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_fshl_i32_5:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, -5
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_fshl_i32_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, -5
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_fshl_i32_5:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, -5
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_fshl_i32_5:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_alignbit_b32 v0, v0, v1, -5
+; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fshl_i32_5:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, -5
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 5)
}
define i32 @v_fshl_i32_8(i32 %lhs, i32 %rhs) {
-; GFX6-LABEL: v_fshl_i32_8:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, -8
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_fshl_i32_8:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, -8
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_fshl_i32_8:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, -8
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_fshl_i32_8:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, -8
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_fshl_i32_8:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_alignbit_b32 v0, v0, v1, -8
+; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fshl_i32_8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, -8
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 8)
; GFX10-LABEL: v_fshl_v2i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_alignbit_b32 v2, v0, v2, 1
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0
; GFX10-NEXT: v_not_b32_e32 v4, v4
; GFX11-LABEL: v_fshl_v2i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_alignbit_b32 v2, v0, v2, 1
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v0
; GFX11-NEXT: v_not_b32_e32 v4, v4
; GFX10-LABEL: v_fshl_v3i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_alignbit_b32 v3, v0, v3, 1
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0
; GFX10-NEXT: v_not_b32_e32 v6, v6
; GFX11-LABEL: v_fshl_v3i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_alignbit_b32 v3, v0, v3, 1
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v0
; GFX11-NEXT: v_not_b32_e32 v6, v6
; GFX10-LABEL: v_fshl_v4i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_alignbit_b32 v4, v0, v4, 1
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0
; GFX10-NEXT: v_not_b32_e32 v8, v8
; GFX11-LABEL: v_fshl_v4i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_alignbit_b32 v4, v0, v4, 1
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v0
; GFX11-NEXT: v_not_b32_e32 v8, v8
; GFX10-LABEL: v_fshl_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2
; GFX10-NEXT: v_and_b32_e32 v2, 15, v2
; GFX10-NEXT: v_lshrrev_b16 v1, 1, v1
; GFX11-LABEL: v_fshl_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2
; GFX11-NEXT: v_and_b32_e32 v2, 15, v2
; GFX11-NEXT: v_lshrrev_b16 v1, 1, v1
; GFX10-LABEL: v_fshl_i16_4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshlrev_b16 v0, 4, v0
; GFX10-NEXT: v_lshrrev_b16 v1, 12, v1
; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
; GFX11-LABEL: v_fshl_i16_4:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshlrev_b16 v0, 4, v0
; GFX11-NEXT: v_lshrrev_b16 v1, 12, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX10-LABEL: v_fshl_i16_5:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshlrev_b16 v0, 5, v0
; GFX10-NEXT: v_lshrrev_b16 v1, 11, v1
; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
; GFX11-LABEL: v_fshl_i16_5:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshlrev_b16 v0, 5, v0
; GFX11-NEXT: v_lshrrev_b16 v1, 11, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX10-LABEL: v_fshl_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2
; GFX10-NEXT: v_and_b32_e32 v2, 0xf000f, v2
; GFX10-NEXT: v_pk_lshrrev_b16 v1, 1, v1 op_sel_hi:[0,1]
; GFX11-LABEL: v_fshl_v2i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2
; GFX11-NEXT: v_and_b32_e32 v2, 0xf000f, v2
; GFX11-NEXT: v_pk_lshrrev_b16 v1, 1, v1 op_sel_hi:[0,1]
; GFX10-LABEL: v_fshl_v2i16_4_8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_lshlrev_b16 v0, 0x80004, v0
; GFX10-NEXT: v_pk_lshrrev_b16 v1, 0x8000c, v1
; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
; GFX11-LABEL: v_fshl_v2i16_4_8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_pk_lshlrev_b16 v0, 0x80004, v0
; GFX11-NEXT: v_pk_lshrrev_b16 v1, 0x8000c, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX10-LABEL: v_fshl_v3i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4
; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5
; GFX10-NEXT: v_and_b32_e32 v4, 0xf000f, v4
; GFX11-LABEL: v_fshl_v3i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v6, -1, v4
; GFX11-NEXT: v_xor_b32_e32 v7, -1, v5
; GFX11-NEXT: v_and_b32_e32 v4, 0xf000f, v4
; GFX10-LABEL: v_fshl_v4i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4
; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5
; GFX10-NEXT: v_and_b32_e32 v4, 0xf000f, v4
; GFX11-LABEL: v_fshl_v4i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v6, -1, v4
; GFX11-NEXT: v_xor_b32_e32 v7, -1, v5
; GFX11-NEXT: v_and_b32_e32 v4, 0xf000f, v4
; GFX10-LABEL: v_fshl_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_not_b32_e32 v5, v4
; GFX10-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3]
; GFX10-NEXT: v_and_b32_e32 v4, 63, v4
; GFX11-LABEL: v_fshl_i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_not_b32_e32 v5, v4
; GFX11-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3]
; GFX11-NEXT: v_and_b32_e32 v4, 63, v4
; GFX10-LABEL: v_fshl_i64_5:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 5, v[0:1]
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 27, v3
; GFX10-NEXT: v_or_b32_e32 v0, v0, v2
; GFX11-LABEL: v_fshl_i64_5:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshlrev_b64 v[0:1], 5, v[0:1]
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 27, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
}
define i64 @v_fshl_i64_32(i64 %lhs, i64 %rhs) {
-; GFX6-LABEL: v_fshl_i64_32:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v1, v0
-; GFX6-NEXT: v_mov_b32_e32 v0, v3
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_fshl_i64_32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mov_b32_e32 v1, v0
-; GFX8-NEXT: v_mov_b32_e32 v0, v3
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_fshl_i64_32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-NEXT: v_mov_b32_e32 v0, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_fshl_i64_32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_mov_b32_e32 v1, v0
-; GFX10-NEXT: v_mov_b32_e32 v0, v3
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_fshl_i64_32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v1, v0
+; GCN-NEXT: v_mov_b32_e32 v0, v3
+; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fshl_i64_32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 32)
; GFX10-LABEL: v_fshl_i64_48:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v4, v0
; GFX10-NEXT: v_lshrrev_b64 v[0:1], 16, v[2:3]
; GFX10-NEXT: v_lshl_or_b32 v1, v4, 16, v1
; GFX11-LABEL: v_fshl_i64_48:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v4, v0
; GFX11-NEXT: v_lshrrev_b64 v[0:1], 16, v[2:3]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX10-LABEL: v_fshl_v2i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_not_b32_e32 v9, v8
; GFX10-NEXT: v_not_b32_e32 v11, v10
; GFX10-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
; GFX11-LABEL: v_fshl_v2i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_not_b32_e32 v9, v8
; GFX11-NEXT: v_not_b32_e32 v11, v10
; GFX11-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
; GFX10-LABEL: v_fshl_i128:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v18, 0x7f, v8
; GFX10-NEXT: v_not_b32_e32 v8, v8
; GFX10-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
; GFX11-LABEL: v_fshl_i128:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
; GFX11-NEXT: v_and_b32_e32 v18, 0x7f, v8
; GFX11-NEXT: v_not_b32_e32 v8, v8
; GFX10-LABEL: v_fshl_i128_65:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[0:1]
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[6:7]
; GFX10-NEXT: v_lshrrev_b32_e32 v4, 31, v5
; GFX11-LABEL: v_fshl_i128_65:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[0:1]
; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[6:7]
; GFX11-NEXT: v_lshrrev_b32_e32 v4, 31, v5
; GFX10-LABEL: v_fshl_v2i128:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v27, 0x7f, v16
; GFX10-NEXT: v_not_b32_e32 v16, v16
; GFX10-NEXT: v_lshrrev_b64 v[8:9], 1, v[8:9]
; GFX11-LABEL: v_fshl_v2i128:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v27, 0x7f, v16
; GFX11-NEXT: v_not_b32_e32 v16, v16
; GFX11-NEXT: v_lshrrev_b64 v[8:9], 1, v[8:9]
; GFX10-LABEL: v_fshr_i7:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 7
; GFX10-NEXT: v_and_b32_e32 v2, 0x7f, v2
; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0
; GFX11-LABEL: v_fshr_i7:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v3, 7
; GFX11-NEXT: v_and_b32_e32 v2, 0x7f, v2
; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0
; GFX10-LABEL: v_fshr_i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_not_b32_e32 v3, v2
; GFX10-NEXT: v_and_b32_e32 v2, 7, v2
; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0
; GFX11-LABEL: v_fshr_i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_not_b32_e32 v3, v2
; GFX11-NEXT: v_and_b32_e32 v2, 7, v2
; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0
; GFX10-LABEL: v_fshr_i8_4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX10-NEXT: v_lshlrev_b16 v0, 4, v0
; GFX10-NEXT: v_lshrrev_b16 v1, 4, v1
; GFX11-LABEL: v_fshr_i8_4:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX11-NEXT: v_lshlrev_b16 v0, 4, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-LABEL: v_fshr_i8_5:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX10-NEXT: v_lshlrev_b16 v0, 3, v0
; GFX10-NEXT: v_lshrrev_b16 v1, 5, v1
; GFX11-LABEL: v_fshr_i8_5:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX11-NEXT: v_lshlrev_b16 v0, 3, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-LABEL: v_fshr_v2i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v2
; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1
; GFX11-LABEL: v_fshr_v2i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v2
; GFX11-NEXT: v_lshrrev_b32_e32 v4, 8, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1
; GFX10-LABEL: v_fshr_v4i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v7, 8, v2
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0
; GFX10-NEXT: v_not_b32_e32 v8, v2
; GFX11-LABEL: v_fshr_v4i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v6, 8, v1
; GFX11-NEXT: v_lshrrev_b32_e32 v7, 8, v2
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v0
; GFX10-LABEL: v_fshr_i24:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2
; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1
; GFX11-LABEL: v_fshr_i24:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX10-LABEL: v_fshr_v2i24:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v7, 24
; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4
; GFX11-LABEL: v_fshr_v2i24:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v7, 24
; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5
}
define i32 @v_fshr_i32(i32 %lhs, i32 %rhs, i32 %amt) {
-; GFX6-LABEL: v_fshr_i32:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, v2
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_fshr_i32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, v2
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_fshr_i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, v2
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_fshr_i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, v2
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_fshr_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_alignbit_b32 v0, v0, v1, v2
+; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fshr_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
}
define i32 @v_fshr_i32_5(i32 %lhs, i32 %rhs) {
-; GFX6-LABEL: v_fshr_i32_5:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, 5
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_fshr_i32_5:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, 5
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_fshr_i32_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, 5
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_fshr_i32_5:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, 5
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_fshr_i32_5:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_alignbit_b32 v0, v0, v1, 5
+; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fshr_i32_5:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, 5
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 5)
}
define i32 @v_fshr_i32_8(i32 %lhs, i32 %rhs) {
-; GFX6-LABEL: v_fshr_i32_8:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, 8
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_fshr_i32_8:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, 8
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_fshr_i32_8:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, 8
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_fshr_i32_8:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, 8
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_fshr_i32_8:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_alignbit_b32 v0, v0, v1, 8
+; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fshr_i32_8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, 8
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 8)
}
define <2 x i32> @v_fshr_v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt) {
-; GFX6-LABEL: v_fshr_v2i32:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_alignbit_b32 v0, v0, v2, v4
-; GFX6-NEXT: v_alignbit_b32 v1, v1, v3, v5
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_fshr_v2i32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_alignbit_b32 v0, v0, v2, v4
-; GFX8-NEXT: v_alignbit_b32 v1, v1, v3, v5
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_fshr_v2i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_alignbit_b32 v0, v0, v2, v4
-; GFX9-NEXT: v_alignbit_b32 v1, v1, v3, v5
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_fshr_v2i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_alignbit_b32 v0, v0, v2, v4
-; GFX10-NEXT: v_alignbit_b32 v1, v1, v3, v5
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_fshr_v2i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_alignbit_b32 v0, v0, v2, v4
+; GCN-NEXT: v_alignbit_b32 v1, v1, v3, v5
+; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fshr_v2i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_alignbit_b32 v0, v0, v2, v4
; GFX11-NEXT: v_alignbit_b32 v1, v1, v3, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
}
define <3 x i32> @v_fshr_v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt) {
-; GFX6-LABEL: v_fshr_v3i32:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_alignbit_b32 v0, v0, v3, v6
-; GFX6-NEXT: v_alignbit_b32 v1, v1, v4, v7
-; GFX6-NEXT: v_alignbit_b32 v2, v2, v5, v8
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_fshr_v3i32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_alignbit_b32 v0, v0, v3, v6
-; GFX8-NEXT: v_alignbit_b32 v1, v1, v4, v7
-; GFX8-NEXT: v_alignbit_b32 v2, v2, v5, v8
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_fshr_v3i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_alignbit_b32 v0, v0, v3, v6
-; GFX9-NEXT: v_alignbit_b32 v1, v1, v4, v7
-; GFX9-NEXT: v_alignbit_b32 v2, v2, v5, v8
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_fshr_v3i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_alignbit_b32 v0, v0, v3, v6
-; GFX10-NEXT: v_alignbit_b32 v1, v1, v4, v7
-; GFX10-NEXT: v_alignbit_b32 v2, v2, v5, v8
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_fshr_v3i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_alignbit_b32 v0, v0, v3, v6
+; GCN-NEXT: v_alignbit_b32 v1, v1, v4, v7
+; GCN-NEXT: v_alignbit_b32 v2, v2, v5, v8
+; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fshr_v3i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_alignbit_b32 v0, v0, v3, v6
; GFX11-NEXT: v_alignbit_b32 v1, v1, v4, v7
; GFX11-NEXT: v_alignbit_b32 v2, v2, v5, v8
}
define <4 x i32> @v_fshr_v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt) {
-; GFX6-LABEL: v_fshr_v4i32:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_alignbit_b32 v0, v0, v4, v8
-; GFX6-NEXT: v_alignbit_b32 v1, v1, v5, v9
-; GFX6-NEXT: v_alignbit_b32 v2, v2, v6, v10
-; GFX6-NEXT: v_alignbit_b32 v3, v3, v7, v11
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_fshr_v4i32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_alignbit_b32 v0, v0, v4, v8
-; GFX8-NEXT: v_alignbit_b32 v1, v1, v5, v9
-; GFX8-NEXT: v_alignbit_b32 v2, v2, v6, v10
-; GFX8-NEXT: v_alignbit_b32 v3, v3, v7, v11
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_fshr_v4i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_alignbit_b32 v0, v0, v4, v8
-; GFX9-NEXT: v_alignbit_b32 v1, v1, v5, v9
-; GFX9-NEXT: v_alignbit_b32 v2, v2, v6, v10
-; GFX9-NEXT: v_alignbit_b32 v3, v3, v7, v11
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_fshr_v4i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_alignbit_b32 v0, v0, v4, v8
-; GFX10-NEXT: v_alignbit_b32 v1, v1, v5, v9
-; GFX10-NEXT: v_alignbit_b32 v2, v2, v6, v10
-; GFX10-NEXT: v_alignbit_b32 v3, v3, v7, v11
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_fshr_v4i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_alignbit_b32 v0, v0, v4, v8
+; GCN-NEXT: v_alignbit_b32 v1, v1, v5, v9
+; GCN-NEXT: v_alignbit_b32 v2, v2, v6, v10
+; GCN-NEXT: v_alignbit_b32 v3, v3, v7, v11
+; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fshr_v4i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_alignbit_b32 v0, v0, v4, v8
; GFX11-NEXT: v_alignbit_b32 v1, v1, v5, v9
; GFX11-NEXT: v_alignbit_b32 v2, v2, v6, v10
; GFX10-LABEL: v_fshr_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2
; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0
; GFX10-NEXT: v_and_b32_e32 v2, 15, v2
; GFX11-LABEL: v_fshr_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2
; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0
; GFX11-NEXT: v_and_b32_e32 v2, 15, v2
; GFX10-LABEL: v_fshr_i16_4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshlrev_b16 v0, 12, v0
; GFX10-NEXT: v_lshrrev_b16 v1, 4, v1
; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
; GFX11-LABEL: v_fshr_i16_4:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshlrev_b16 v0, 12, v0
; GFX11-NEXT: v_lshrrev_b16 v1, 4, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX10-LABEL: v_fshr_i16_5:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshlrev_b16 v0, 11, v0
; GFX10-NEXT: v_lshrrev_b16 v1, 5, v1
; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
; GFX11-LABEL: v_fshr_i16_5:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshlrev_b16 v0, 11, v0
; GFX11-NEXT: v_lshrrev_b16 v1, 5, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX10-LABEL: v_fshr_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2
; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
; GFX10-NEXT: v_and_b32_e32 v2, 0xf000f, v2
; GFX11-LABEL: v_fshr_v2i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2
; GFX11-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
; GFX11-NEXT: v_and_b32_e32 v2, 0xf000f, v2
; GFX10-LABEL: v_fshr_v2i16_4_8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_lshlrev_b16 v0, 0x8000c, v0
; GFX10-NEXT: v_pk_lshrrev_b16 v1, 0x80004, v1
; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
; GFX11-LABEL: v_fshr_v2i16_4_8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_pk_lshlrev_b16 v0, 0x8000c, v0
; GFX11-NEXT: v_pk_lshrrev_b16 v1, 0x80004, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX10-LABEL: v_fshr_v3i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4
; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5
; GFX10-NEXT: v_and_b32_e32 v4, 0xf000f, v4
; GFX11-LABEL: v_fshr_v3i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v6, -1, v4
; GFX11-NEXT: v_xor_b32_e32 v7, -1, v5
; GFX11-NEXT: v_and_b32_e32 v4, 0xf000f, v4
; GFX10-LABEL: v_fshr_v4i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4
; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5
; GFX10-NEXT: v_and_b32_e32 v4, 0xf000f, v4
; GFX11-LABEL: v_fshr_v4i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v6, -1, v4
; GFX11-NEXT: v_xor_b32_e32 v7, -1, v5
; GFX11-NEXT: v_and_b32_e32 v4, 0xf000f, v4
; GFX10-LABEL: v_fshr_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_not_b32_e32 v5, v4
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GFX10-NEXT: v_and_b32_e32 v4, 63, v4
; GFX11-LABEL: v_fshr_i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_not_b32_e32 v5, v4
; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GFX11-NEXT: v_and_b32_e32 v4, 63, v4
; GFX10-LABEL: v_fshr_i64_5:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v4, v0
; GFX10-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3]
; GFX10-NEXT: v_lshl_or_b32 v1, v4, 27, v1
; GFX11-LABEL: v_fshr_i64_5:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v4, v0
; GFX11-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
}
define i64 @v_fshr_i64_32(i64 %lhs, i64 %rhs) {
-; GFX6-LABEL: v_fshr_i64_32:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v1, v0
-; GFX6-NEXT: v_mov_b32_e32 v0, v3
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_fshr_i64_32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mov_b32_e32 v1, v0
-; GFX8-NEXT: v_mov_b32_e32 v0, v3
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_fshr_i64_32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-NEXT: v_mov_b32_e32 v0, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_fshr_i64_32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_mov_b32_e32 v1, v0
-; GFX10-NEXT: v_mov_b32_e32 v0, v3
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_fshr_i64_32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v1, v0
+; GCN-NEXT: v_mov_b32_e32 v0, v3
+; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fshr_i64_32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 32)
; GFX10-LABEL: v_fshr_i64_48:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1]
; GFX10-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_fshr_i64_48:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1]
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX10-LABEL: v_fshr_v2i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_not_b32_e32 v9, v8
; GFX10-NEXT: v_not_b32_e32 v11, v10
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GFX11-LABEL: v_fshr_v2i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_not_b32_e32 v9, v8
; GFX11-NEXT: v_not_b32_e32 v11, v10
; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GFX10-LABEL: v_fshr_i128:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_not_b32_e32 v9, v8
; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
; GFX10-NEXT: v_lshrrev_b32_e32 v10, 31, v1
; GFX11-LABEL: v_fshr_i128:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_not_b32_e32 v9, v8
; GFX11-NEXT: v_lshrrev_b32_e32 v10, 31, v1
; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GFX10-LABEL: v_fshr_i128_65:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v8, v2
; GFX10-NEXT: v_lshrrev_b64 v[4:5], 1, v[6:7]
; GFX10-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1]
; GFX11-LABEL: v_fshr_i128_65:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v8, v2
; GFX11-NEXT: v_lshrrev_b64 v[4:5], 1, v[6:7]
; GFX11-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1]
; GFX10-LABEL: v_fshr_v2i128:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_not_b32_e32 v17, v16
; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
; GFX10-NEXT: v_and_b32_e32 v26, 0x7f, v16
; GFX11-LABEL: v_fshr_v2i128:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_not_b32_e32 v17, v16
; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
; GFX11-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7]
; CHECK-LABEL: waterfall_loop:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; CHECK-NEXT: s_xor_saveexec_b32 s4, -1
; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b32 exec_lo, s4
; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b32 exec_lo, s4
; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; CHECK-NEXT: s_setpc_b64 s[30:31]
bb:
%ret = tail call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 1, float 0.000000e+00, float 0.000000e+00, <8 x i32> %vgpr_srd, <4 x i32> zeroinitializer, i1 false, i32 0, i32 0)
; GFX10-LABEL: dyn_insertelement_v8f32_const_s_v_v:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX10-NEXT: v_cndmask_b32_e32 v8, 1.0, v0, vcc_lo
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
; GFX11-LABEL: dyn_insertelement_v8f32_const_s_v_v:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX11-NEXT: v_cndmask_b32_e32 v8, 1.0, v0, vcc_lo
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
; GFX10-LABEL: dyn_insertelement_v8f64_const_s_v_v:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s18, 0
; GFX10-NEXT: s_mov_b64 s[4:5], 1.0
; GFX10-NEXT: s_mov_b32 s19, 0x40200000
; GFX11-LABEL: dyn_insertelement_v8f64_const_s_v_v:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s14, 0
; GFX11-NEXT: s_mov_b32 s15, 0x40200000
; GFX11-NEXT: s_mov_b64 s[0:1], 1.0
; GFX10_W32-LABEL: v_div_fmas_f32:
; GFX10_W32: ; %bb.0:
; GFX10_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10_W32-NEXT: v_and_b32_e32 v3, 1, v3
; GFX10_W32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3
; GFX10_W32-NEXT: v_div_fmas_f32 v0, v0, v1, v2
; GFX10_W64-LABEL: v_div_fmas_f32:
; GFX10_W64: ; %bb.0:
; GFX10_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_W64-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10_W64-NEXT: v_and_b32_e32 v3, 1, v3
; GFX10_W64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
; GFX10_W64-NEXT: v_div_fmas_f32 v0, v0, v1, v2
; GFX11_W32-LABEL: v_div_fmas_f32:
; GFX11_W32: ; %bb.0:
; GFX11_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11_W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11_W32-NEXT: v_and_b32_e32 v3, 1, v3
; GFX11_W32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3
; GFX11_W32-NEXT: v_div_fmas_f32 v0, v0, v1, v2
; GFX11_W64-LABEL: v_div_fmas_f32:
; GFX11_W64: ; %bb.0:
; GFX11_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11_W64-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11_W64-NEXT: v_and_b32_e32 v3, 1, v3
; GFX11_W64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
; GFX11_W64-NEXT: v_div_fmas_f32 v0, v0, v1, v2
; GFX10_W32-LABEL: v_div_fmas_f64:
; GFX10_W32: ; %bb.0:
; GFX10_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10_W32-NEXT: v_and_b32_e32 v6, 1, v6
; GFX10_W32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6
; GFX10_W32-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX10_W64-LABEL: v_div_fmas_f64:
; GFX10_W64: ; %bb.0:
; GFX10_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_W64-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10_W64-NEXT: v_and_b32_e32 v6, 1, v6
; GFX10_W64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
; GFX10_W64-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX11_W32-LABEL: v_div_fmas_f64:
; GFX11_W32: ; %bb.0:
; GFX11_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11_W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11_W32-NEXT: v_and_b32_e32 v6, 1, v6
; GFX11_W32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6
; GFX11_W32-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX11_W64-LABEL: v_div_fmas_f64:
; GFX11_W64: ; %bb.0:
; GFX11_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11_W64-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11_W64-NEXT: v_and_b32_e32 v6, 1, v6
; GFX11_W64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
; GFX11_W64-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX10PLUS-LABEL: ds_fadd_f32_vv:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: ds_add_rtn_f32 v0, v0, v1
; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0)
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: ds_fadd_f32_vv_offset:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: ds_add_rtn_f32 v0, v0, v1 offset:512
; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0)
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: ds_fadd_f32_vv_nortn:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: ds_add_f32 v0, v1
; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0)
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: ds_fadd_f32_vv_offset_nortn:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: ds_add_f32 v0, v1 offset:512
; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0)
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: ds_fadd_f32_vv_volatile:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: ds_add_rtn_f32 v0, v0, v1
; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0)
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: ds_fmin_f32_vv:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: ds_min_rtn_f32 v0, v0, v1
; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0)
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: ds_fmin_f32_vv_offset:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: ds_min_rtn_f32 v0, v0, v1 offset:512
; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0)
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: ds_fmin_f32_vv_nortn:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: ds_min_f32 v0, v1
; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0)
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: ds_fmin_f32_vv_offset_nortn:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: ds_min_f32 v0, v1 offset:512
; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0)
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: ds_fmin_f32_vv_volatile:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: ds_min_rtn_f32 v0, v0, v1
; GFX10PLUS-NEXT: s_waitcnt lgkmcnt(0)
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_fdot2_clamp:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 true)
; GFX101-LABEL: v_mul_legacy_f32:
; GFX101: ; %bb.0:
; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
; GFX101-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GFX101-NEXT: s_setpc_b64 s[30:31]
;
; GFX103-LABEL: v_mul_legacy_f32:
; GFX103: ; %bb.0:
; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
; GFX103-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GFX103-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_mul_legacy_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
; GFX101-LABEL: v_mul_legacy_undef0_f32:
; GFX101: ; %bb.0:
; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
; GFX101-NEXT: v_mul_legacy_f32_e32 v0, s4, v0
; GFX101-NEXT: s_setpc_b64 s[30:31]
;
; GFX103-LABEL: v_mul_legacy_undef0_f32:
; GFX103: ; %bb.0:
; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
; GFX103-NEXT: v_mul_legacy_f32_e32 v0, s4, v0
; GFX103-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_mul_legacy_undef0_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, s0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.amdgcn.fmul.legacy(float undef, float %a)
; GFX101-LABEL: v_mul_legacy_undef1_f32:
; GFX101: ; %bb.0:
; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
; GFX101-NEXT: v_mul_legacy_f32_e32 v0, s4, v0
; GFX101-NEXT: s_setpc_b64 s[30:31]
;
; GFX103-LABEL: v_mul_legacy_undef1_f32:
; GFX103: ; %bb.0:
; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
; GFX103-NEXT: v_mul_legacy_f32_e32 v0, s4, v0
; GFX103-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_mul_legacy_undef1_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, s0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.amdgcn.fmul.legacy(float %a, float undef)
; GFX101-LABEL: v_mul_legacy_undef_f32:
; GFX101: ; %bb.0:
; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
; GFX101-NEXT: v_mul_legacy_f32_e64 v0, s4, s4
; GFX101-NEXT: s_setpc_b64 s[30:31]
;
; GFX103-LABEL: v_mul_legacy_undef_f32:
; GFX103: ; %bb.0:
; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
; GFX103-NEXT: v_mul_legacy_f32_e64 v0, s4, s4
; GFX103-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_mul_legacy_undef_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, s0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.amdgcn.fmul.legacy(float undef, float undef)
; GFX101-LABEL: v_mul_legacy_fabs_f32:
; GFX101: ; %bb.0:
; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
; GFX101-NEXT: v_mul_legacy_f32_e64 v0, |v0|, |v1|
; GFX101-NEXT: s_setpc_b64 s[30:31]
;
; GFX103-LABEL: v_mul_legacy_fabs_f32:
; GFX103: ; %bb.0:
; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
; GFX103-NEXT: v_mul_legacy_f32_e64 v0, |v0|, |v1|
; GFX103-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_mul_legacy_fabs_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, |v0|, |v1|
; GFX11-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
; GFX101-LABEL: v_mul_legacy_fneg_f32:
; GFX101: ; %bb.0:
; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
; GFX101-NEXT: v_mul_legacy_f32_e64 v0, -v0, -v1
; GFX101-NEXT: s_setpc_b64 s[30:31]
;
; GFX103-LABEL: v_mul_legacy_fneg_f32:
; GFX103: ; %bb.0:
; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
; GFX103-NEXT: v_mul_legacy_f32_e64 v0, -v0, -v1
; GFX103-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_mul_legacy_fneg_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, -v0, -v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg float %a
; GFX101-LABEL: v_add_mul_legacy_f32:
; GFX101: ; %bb.0:
; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
; GFX101-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GFX101-NEXT: v_add_f32_e32 v0, v0, v2
; GFX101-NEXT: s_setpc_b64 s[30:31]
; GFX103-LABEL: v_add_mul_legacy_f32:
; GFX103: ; %bb.0:
; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
; GFX103-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GFX103-NEXT: v_add_f32_e32 v0, v0, v2
; GFX103-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_add_mul_legacy_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_add_f32_e32 v0, v0, v2
; GFX101-LABEL: v_mad_legacy_f32:
; GFX101: ; %bb.0:
; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
; GFX101-NEXT: v_mad_legacy_f32 v0, v0, v1, v2
; GFX101-NEXT: s_setpc_b64 s[30:31]
;
; GFX103-LABEL: v_mad_legacy_f32:
; GFX103: ; %bb.0:
; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
; GFX103-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GFX103-NEXT: v_add_f32_e32 v0, v0, v2
; GFX103-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_mad_legacy_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_add_f32_e32 v0, v0, v2
; GFX101-LABEL: v_mad_legacy_fneg_f32:
; GFX101: ; %bb.0:
; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
; GFX101-NEXT: v_mad_legacy_f32 v0, -v0, -v1, v2
; GFX101-NEXT: s_setpc_b64 s[30:31]
;
; GFX103-LABEL: v_mad_legacy_fneg_f32:
; GFX103: ; %bb.0:
; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
; GFX103-NEXT: v_mul_legacy_f32_e64 v0, -v0, -v1
; GFX103-NEXT: v_add_f32_e32 v0, v0, v2
; GFX103-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_mad_legacy_fneg_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, -v0, -v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_add_f32_e32 v0, v0, v2
; GFX101-LABEL: v_mul_legacy_f32_1.0:
; GFX101: ; %bb.0:
; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
; GFX101-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0
; GFX101-NEXT: s_setpc_b64 s[30:31]
;
; GFX103-LABEL: v_mul_legacy_f32_1.0:
; GFX103: ; %bb.0:
; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
; GFX103-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0
; GFX103-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_mul_legacy_f32_1.0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 1.0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.amdgcn.fmul.legacy(float %a, float 1.0)
; GFX101-LABEL: v_mul_legacy_f32_1.0_swap:
; GFX101: ; %bb.0:
; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
; GFX101-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0
; GFX101-NEXT: s_setpc_b64 s[30:31]
;
; GFX103-LABEL: v_mul_legacy_f32_1.0_swap:
; GFX103: ; %bb.0:
; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
; GFX103-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0
; GFX103-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_mul_legacy_f32_1.0_swap:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 1.0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.amdgcn.fmul.legacy(float 1.0, float %b)
; GFX101-LABEL: v_mul_legacy_f32_2.0:
; GFX101: ; %bb.0:
; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
; GFX101-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0
; GFX101-NEXT: s_setpc_b64 s[30:31]
;
; GFX103-LABEL: v_mul_legacy_f32_2.0:
; GFX103: ; %bb.0:
; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
; GFX103-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0
; GFX103-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_mul_legacy_f32_2.0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 2.0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.amdgcn.fmul.legacy(float %a, float 2.0)
; GFX101-LABEL: v_mul_legacy_f32_2.0_swap:
; GFX101: ; %bb.0:
; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
; GFX101-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0
; GFX101-NEXT: s_setpc_b64 s[30:31]
;
; GFX103-LABEL: v_mul_legacy_f32_2.0_swap:
; GFX103: ; %bb.0:
; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
; GFX103-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0
; GFX103-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_mul_legacy_f32_2.0_swap:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 2.0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.amdgcn.fmul.legacy(float 2.0, float %b)
; GFX10-LABEL: global_atomic_csub:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_atomic_csub v0, v[0:1], v2, off glc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: global_atomic_csub:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_atomic_csub_u32 v0, v[0:1], v2, off glc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: global_atomic_csub_offset:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b64 s[4:5], 0x1000
; GFX10-NEXT: v_mov_b32_e32 v3, s4
; GFX10-NEXT: v_mov_b32_e32 v4, s5
; GFX11-LABEL: global_atomic_csub_offset:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b64 s[0:1], 0x1000
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0
; GFX10-LABEL: global_atomic_csub_nortn:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_atomic_csub v0, v[0:1], v2, off glc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: global_atomic_csub_nortn:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_atomic_csub_u32 v0, v[0:1], v2, off glc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: global_atomic_csub_offset_nortn:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b64 s[4:5], 0x1000
; GFX10-NEXT: v_mov_b32_e32 v3, s4
; GFX10-NEXT: v_mov_b32_e32 v4, s5
; GFX11-LABEL: global_atomic_csub_offset_nortn:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b64 s[0:1], 0x1000
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0
; GFX10-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v7, v4, s[10:11]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_2d_v4f32_xyzw_tfe:
; GFX11-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v7, v4, s[10:11]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
%v.vec = extractvalue { <4 x float>, i32 } %v, 0
; GFX10-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe lwe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v7, v4, s[10:11]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_2d_v4f32_xyzw_tfe_lwe:
; GFX11-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe lwe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v7, v4, s[10:11]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 3, i32 0)
%v.vec = extractvalue { <4 x float>, i32 } %v, 0
; GFX10-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v5, v4, s[10:11]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_2darraymsaa_v4f32_xyzw_tfe:
; GFX11-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v5, v4, s[10:11]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
%v.vec = extractvalue { <4 x float>, i32 } %v, 0
; GFX10-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe lwe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v5, v4, s[10:11]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe:
; GFX11-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe lwe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v5, v4, s[10:11]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 3, i32 0)
%v.vec = extractvalue { <4 x float>, i32 } %v, 0
; GFX10-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v9, v4, s[10:11]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_2darraymsaa_v4f32_xyzw_tfe:
; GFX11-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v9, v4, s[10:11]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
%v.vec = extractvalue { <4 x float>, i32 } %v, 0
; GFX10-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe lwe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v9, v4, s[10:11]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe:
; GFX11-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe lwe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v9, v4, s[10:11]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0)
%v.vec = extractvalue { <4 x float>, i32 } %v, 0
; GFX10-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 tfe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v7, v4, s[10:11]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_3d_v4f32_xyzw_tfe:
; GFX11-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 tfe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v7, v4, s[10:11]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 1, i32 0)
%v.vec = extractvalue { <4 x float>, i32 } %v, 0
; GFX10-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 tfe lwe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v7, v4, s[10:11]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_3d_v4f32_xyzw_tfe_lwe:
; GFX11-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 tfe lwe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v7, v4, s[10:11]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 3, i32 0)
%v.vec = extractvalue { <4 x float>, i32 } %v, 0
; GFX10-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v8, v4, s[10:11]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_3d_v4f32_xyzw_tfe:
; GFX11-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v8, v4, s[10:11]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 1, i32 0)
%v.vec = extractvalue { <4 x float>, i32 } %v, 0
; GFX10-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe lwe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v8, v4, s[10:11]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_3d_v4f32_xyzw_tfe_lwe:
; GFX11-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe lwe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v8, v4, s[10:11]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 3, i32 0)
%v.vec = extractvalue { <4 x float>, i32 } %v, 0
; GFX10-LABEL: test_setreg_roundingmode_var_vgpr:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; GFX10-NEXT: v_readfirstlane_b32 s4, v0 ; encoding: [0x00,0x05,0x08,0x7e]
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX11-LABEL: test_setreg_roundingmode_var_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; encoding: [0x00,0x05,0x00,0x7e]
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ;;#ASMEND
; GFX10-LABEL: v_sdot2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 false)
; GFX10-LABEL: v_sdot2_clamp:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 true)
; GFX10-LABEL: v_sdot2_inline_literal_a:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_dot2_i32_i16 v0, 4, v0, v1 op_sel_hi:[0,1,1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call i32 @llvm.amdgcn.sdot2(<2 x i16> <i16 4, i16 4>, <2 x i16> %b, i32 %c, i1 false)
; GFX10-LABEL: v_sdot2_inline_literal_b:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_dot2_i32_i16 v0, v0, 4, v1 op_sel_hi:[1,0,1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> <i16 4, i16 4>, i32 %c, i1 false)
; GFX10-LABEL: v_sdot2_inline_literal_a_b:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_dot2_i32_i16 v0, 8, 4, v1 op_sel_hi:[0,0,1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call i32 @llvm.amdgcn.sdot2(<2 x i16> <i16 8, i16 8>, <2 x i16> <i16 4, i16 4>, i32 %c, i1 false)
; GFX10-LABEL: v_sdot2_inline_literal_a_b_c:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_dot2_i32_i16 v0, 8, 4, 8 op_sel_hi:[0,0,1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call i32 @llvm.amdgcn.sdot2(<2 x i16> <i16 8, i16 8>, <2 x i16> <i16 4, i16 4>, i32 8, i1 false)
; GFX10-LABEL: v_sdot2_inline_literal_c:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, 7
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %b, i32 7, i1 false)
; GFX10-LABEL: v_sdot2_fneg_a:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg <2 x half> %a
; GFX10-LABEL: v_sdot2_fneg_b:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.b = fneg <2 x half> %b
; GFX10-LABEL: v_sdot2_fnegf32_c:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_sdot2_fnegv2f16_c:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_sdot2_shuffle10_a:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_alignbit_b32 v0, v0, v0, 16
; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_sdot2_shuffle10_b:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_alignbit_b32 v1, v1, v1, 16
; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_sdot4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_dot4c_i32_i8_e32 v2, v0, v1
; GFX10-NEXT: v_mov_b32_e32 v0, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_sdot4_clamp:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_dot4_i32_i8 v0, v0, v1, v2 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call i32 @llvm.amdgcn.sdot4(i32 %a, i32 %b, i32 %c, i1 true)
; GFX10-LABEL: v_sdot4_cast_v4i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s4, 8
; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX10-NEXT: v_and_or_b32 v0, v0, 0xff, v1
; GFX10-LABEL: v_sdot4_fnegf32_a:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GFX10-NEXT: v_dot4c_i32_i8_e32 v2, v0, v1
; GFX10-NEXT: v_mov_b32_e32 v0, v2
; GFX10-LABEL: v_sdot4_fnegv2f16_a:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX10-NEXT: v_dot4c_i32_i8_e32 v2, v0, v1
; GFX10-NEXT: v_mov_b32_e32 v0, v2
; GFX10-LABEL: v_sdot8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_dot8_i32_i4 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call i32 @llvm.amdgcn.sdot8(i32 %a, i32 %b, i32 %c, i1 false)
; GFX10-LABEL: v_sdot8_clamp:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_dot8_i32_i4 v0, v0, v1, v2 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call i32 @llvm.amdgcn.sdot8(i32 %a, i32 %b, i32 %c, i1 true)
; GFX10-LABEL: v_sdot8_fnegf32_a:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GFX10-NEXT: v_dot8_i32_i4 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_sdot8_fnegv2f16_a:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX10-NEXT: v_dot8_i32_i4 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: test_llvm_amdgcn_sudot4_uu:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 ; encoding: [0x00,0x40,0x16,0xcc,0x00,0x03,0x0a,0x1c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot4_us:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 neg_lo:[0,1,0] ; encoding: [0x00,0x40,0x16,0xcc,0x00,0x03,0x0a,0x5c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot4_su:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 neg_lo:[1,0,0] ; encoding: [0x00,0x40,0x16,0xcc,0x00,0x03,0x0a,0x3c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot4_ss:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 neg_lo:[1,1,0] ; encoding: [0x00,0x40,0x16,0xcc,0x00,0x03,0x0a,0x7c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot4_uu_clamp:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 clamp ; encoding: [0x00,0xc0,0x16,0xcc,0x00,0x03,0x0a,0x1c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot4_us_clamp:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 neg_lo:[0,1,0] clamp ; encoding: [0x00,0xc0,0x16,0xcc,0x00,0x03,0x0a,0x5c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot4_su_clamp:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 neg_lo:[1,0,0] clamp ; encoding: [0x00,0xc0,0x16,0xcc,0x00,0x03,0x0a,0x3c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot4_ss_clamp:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 neg_lo:[1,1,0] clamp ; encoding: [0x00,0xc0,0x16,0xcc,0x00,0x03,0x0a,0x7c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot8_uu:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x1c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot8_us:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[0,1,0] ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x5c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot8_su:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[1,0,0] ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x3c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot8_ss:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[1,1,0] ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x7c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot8_uu_clamp:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 clamp ; encoding: [0x00,0xc0,0x18,0xcc,0x00,0x03,0x0a,0x1c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot8_us_clamp:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[0,1,0] clamp ; encoding: [0x00,0xc0,0x18,0xcc,0x00,0x03,0x0a,0x5c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot8_su_clamp:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[1,0,0] clamp ; encoding: [0x00,0xc0,0x18,0xcc,0x00,0x03,0x0a,0x3c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot8_ss_clamp:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[1,1,0] clamp ; encoding: [0x00,0xc0,0x18,0xcc,0x00,0x03,0x0a,0x7c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX10PLUS-LABEL: v_trig_preop_f64:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_trig_preop_f64 v[0:1], v[0:1], v2
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 %b)
; GFX10PLUS-LABEL: v_trig_preop_f64_imm:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_trig_preop_f64 v[0:1], v[0:1], 7
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 7)
; GFX10-LABEL: v_udot2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 false)
; GFX10-LABEL: v_udot2_clamp:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 true)
; GFX10-LABEL: v_udot2_inline_literal_a:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_dot2_u32_u16 v0, 4, v0, v1 op_sel_hi:[0,1,1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call i32 @llvm.amdgcn.udot2(<2 x i16> <i16 4, i16 4>, <2 x i16> %b, i32 %c, i1 false)
; GFX10-LABEL: v_udot2_inline_literal_b:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_dot2_u32_u16 v0, v0, 4, v1 op_sel_hi:[1,0,1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> <i16 4, i16 4>, i32 %c, i1 false)
; GFX10-LABEL: v_udot2_inline_literal_a_b:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_dot2_u32_u16 v0, 8, 4, v1 op_sel_hi:[0,0,1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call i32 @llvm.amdgcn.udot2(<2 x i16> <i16 8, i16 8>, <2 x i16> <i16 4, i16 4>, i32 %c, i1 false)
; GFX10-LABEL: v_udot2_inline_literal_a_b_c:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_dot2_u32_u16 v0, 8, 4, 8 op_sel_hi:[0,0,1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call i32 @llvm.amdgcn.udot2(<2 x i16> <i16 8, i16 8>, <2 x i16> <i16 4, i16 4>, i32 8, i1 false)
; GFX10-LABEL: v_udot2_inline_literal_c:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, 7
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> %b, i32 7, i1 false)
; GFX10-LABEL: v_udot2_fneg_a:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg <2 x half> %a
; GFX10-LABEL: v_udot2_fneg_b:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.b = fneg <2 x half> %b
; GFX10-LABEL: v_udot2_fnegf32_c:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_udot2_fnegv2f16_c:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_udot2_shuffle10_a:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_alignbit_b32 v0, v0, v0, 16
; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_udot2_shuffle10_b:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_alignbit_b32 v1, v1, v1, 16
; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_udot4:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_dot4_u32_u8 v0, v0, v1, v2
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%r = call i32 @llvm.amdgcn.udot4(i32 %a, i32 %b, i32 %c, i1 false)
; GFX10PLUS-LABEL: v_udot4_clamp:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_dot4_u32_u8 v0, v0, v1, v2 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%r = call i32 @llvm.amdgcn.udot4(i32 %a, i32 %b, i32 %c, i1 true)
; GFX10-LABEL: v_udot4_cast_v4i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s4, 8
; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX10-NEXT: v_and_or_b32 v0, v0, 0xff, v1
; GFX11-LABEL: v_udot4_cast_v4i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v5
; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX10PLUS-LABEL: v_udot4_fnegf32_a:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GFX10PLUS-NEXT: v_dot4_u32_u8 v0, v0, v1, v2
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_udot4_fnegv2f16_a:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX10PLUS-NEXT: v_dot4_u32_u8 v0, v0, v1, v2
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_udot8:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_dot8_u32_u4 v0, v0, v1, v2
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%r = call i32 @llvm.amdgcn.udot8(i32 %a, i32 %b, i32 %c, i1 false)
; GFX10PLUS-LABEL: v_udot8_clamp:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_dot8_u32_u4 v0, v0, v1, v2 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%r = call i32 @llvm.amdgcn.udot8(i32 %a, i32 %b, i32 %c, i1 true)
; GFX10PLUS-LABEL: v_udot8_fnegf32_a:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GFX10PLUS-NEXT: v_dot8_u32_u4 v0, v0, v1, v2
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_udot8_fnegv2f16_a:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX10PLUS-NEXT: v_dot8_u32_u4 v0, v0, v1, v2
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_powi_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_log_f16_e32 v0, v0
; GFX11-NEXT: v_cvt_f32_i32_e32 v1, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX11-LABEL: v_powi_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX11-NEXT: v_cvt_f32_i32_e32 v1, v1
; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX11-LABEL: v_powi_0_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v0, 1.0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32.i32(float %l, i32 0)
; GFX11-LABEL: v_powi_1_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32.i32(float %l, i32 1)
ret float %res
; GFX11-LABEL: v_powi_neg1_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-LABEL: v_powi_2_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-LABEL: v_powi_neg2_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-LABEL: v_powi_4_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-LABEL: v_powi_8_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-LABEL: v_powi_16_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-LABEL: v_powi_128_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-LABEL: v_powi_neg128_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX10-LABEL: load_lds_v4i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_read_b128 v[0:3], v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: load_lds_v4i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_b128 v[0:3], v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: load_lds_v4i32_align1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_read_u8 v1, v0
; GFX10-NEXT: ds_read_u8 v2, v0 offset:1
; GFX10-NEXT: ds_read_u8 v3, v0 offset:2
; GFX11-LABEL: load_lds_v4i32_align1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_u8 v1, v0
; GFX11-NEXT: ds_load_u8 v2, v0 offset:1
; GFX11-NEXT: ds_load_u8 v3, v0 offset:2
; GFX10-LABEL: load_lds_v4i32_align2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_read_u16 v1, v0
; GFX10-NEXT: ds_read_u16 v2, v0 offset:2
; GFX10-NEXT: ds_read_u16 v3, v0 offset:4
; GFX11-LABEL: load_lds_v4i32_align2:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_u16 v1, v0
; GFX11-NEXT: ds_load_u16 v2, v0 offset:2
; GFX11-NEXT: ds_load_u16 v3, v0 offset:4
; GFX10-LABEL: load_lds_v4i32_align4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v2, v0
; GFX10-NEXT: ds_read2_b32 v[0:1], v0 offset1:1
; GFX10-NEXT: ds_read2_b32 v[2:3], v2 offset0:2 offset1:3
; GFX11-LABEL: load_lds_v4i32_align4:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v2, v0
; GFX11-NEXT: ds_load_2addr_b32 v[0:1], v0 offset1:1
; GFX11-NEXT: ds_load_2addr_b32 v[2:3], v2 offset0:2 offset1:3
; GFX10-LABEL: load_lds_v4i32_align8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v2, v0
; GFX10-NEXT: ds_read2_b32 v[0:1], v0 offset1:1
; GFX10-NEXT: ds_read2_b32 v[2:3], v2 offset0:2 offset1:3
; GFX11-LABEL: load_lds_v4i32_align8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_2addr_b64 v[0:3], v0 offset1:1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: load_lds_v4i32_align16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_read_b128 v[0:3], v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: load_lds_v4i32_align16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_b128 v[0:3], v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: load_lds_v3i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_read_b96 v[0:2], v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: load_lds_v3i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_b96 v[0:2], v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: load_lds_v3i32_align1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_read_u8 v1, v0
; GFX10-NEXT: ds_read_u8 v2, v0 offset:1
; GFX10-NEXT: ds_read_u8 v3, v0 offset:2
; GFX11-LABEL: load_lds_v3i32_align1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_u8 v1, v0
; GFX11-NEXT: ds_load_u8 v2, v0 offset:1
; GFX11-NEXT: ds_load_u8 v3, v0 offset:2
; GFX10-LABEL: load_lds_v3i32_align2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_read_u16 v1, v0
; GFX10-NEXT: ds_read_u16 v2, v0 offset:2
; GFX10-NEXT: ds_read_u16 v3, v0 offset:4
; GFX11-LABEL: load_lds_v3i32_align2:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_u16 v1, v0
; GFX11-NEXT: ds_load_u16 v2, v0 offset:2
; GFX11-NEXT: ds_load_u16 v3, v0 offset:4
; GFX10-LABEL: load_lds_v3i32_align4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v2, v0
; GFX10-NEXT: ds_read2_b32 v[0:1], v0 offset1:1
; GFX10-NEXT: ds_read_b32 v2, v2 offset:8
; GFX11-LABEL: load_lds_v3i32_align4:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v2, v0
; GFX11-NEXT: ds_load_2addr_b32 v[0:1], v0 offset1:1
; GFX11-NEXT: ds_load_b32 v2, v2 offset:8
; GFX10-LABEL: load_lds_v3i32_align8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v2, v0
; GFX10-NEXT: ds_read2_b32 v[0:1], v0 offset1:1
; GFX10-NEXT: ds_read_b32 v2, v2 offset:8
; GFX11-LABEL: load_lds_v3i32_align8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v2, v0
; GFX11-NEXT: ds_load_2addr_b32 v[0:1], v0 offset1:1
; GFX11-NEXT: ds_load_b32 v2, v2 offset:8
; GFX10-LABEL: load_lds_v3i32_align16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_read_b96 v[0:2], v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: load_lds_v3i32_align16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_b96 v[0:2], v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: load_lds_v4i32_align1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v2, v0
; GFX10-NEXT: ds_read2_b32 v[0:1], v0 offset1:1
; GFX10-NEXT: ds_read2_b32 v[2:3], v2 offset0:2 offset1:3
; GFX11-LABEL: load_lds_v4i32_align1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_b128 v[0:3], v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: load_lds_v3i32_align1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v2, v0
; GFX10-NEXT: ds_read2_b32 v[0:1], v0 offset1:1
; GFX10-NEXT: ds_read_b32 v2, v2 offset:8
; GFX11-LABEL: load_lds_v3i32_align1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_b96 v[0:2], v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: store_lds_v4i32_align1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_write2_b32 v0, v1, v2 offset1:1
; GFX10-NEXT: ds_write2_b32 v0, v3, v4 offset0:2 offset1:3
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-LABEL: store_lds_v4i32_align1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_store_b128 v0, v[1:4]
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: store_lds_v3i32_align1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_write2_b32 v0, v1, v2 offset1:1
; GFX10-NEXT: ds_write_b32 v0, v3 offset:8
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-LABEL: store_lds_v3i32_align1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_store_b96 v0, v[1:3]
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_lshr_i8:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX10PLUS-NEXT: v_lshrrev_b16 v0, v1, v0
; GFX10PLUS-LABEL: v_lshr_i8_7:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX10PLUS-NEXT: v_lshrrev_b16 v0, 7, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_lshr_i24:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_and_b32_e32 v1, 0xffffff, v1
; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffffff, v0
; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v0, v1, v0
; GFX10PLUS-LABEL: v_lshr_i24_7:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_bfe_u32 v0, v0, 7, 17
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = lshr i24 %value, 7
; GFX10PLUS-LABEL: v_lshr_i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v0, v1, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = lshr i32 %value, %amount
; GFX10PLUS-LABEL: v_lshr_i32_31:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v0, 31, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = lshr i32 %value, 31
; GFX10PLUS-LABEL: v_lshr_v2i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v0, v2, v0
; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v1, v3, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_lshr_v2i32_31:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v0, 31, v0
; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v1, 31, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_lshr_v3i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v0, v3, v0
; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v1, v4, v1
; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v2, v5, v2
; GFX10PLUS-LABEL: v_lshr_v4i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v0, v4, v0
; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v1, v5, v1
; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v2, v6, v2
; GFX10PLUS-LABEL: v_lshr_v5i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v0, v5, v0
; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v1, v6, v1
; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v2, v7, v2
; GFX10-LABEL: v_lshr_v16i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX10-NEXT: v_lshrrev_b32_e32 v0, v16, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v1, v17, v1
; GFX11-LABEL: v_lshr_v16i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: v_lshrrev_b32_e32 v0, v16, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v1, v17, v1
; GFX10PLUS-LABEL: v_lshr_i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshrrev_b16 v0, v1, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = lshr i16 %value, %amount
; GFX10PLUS-LABEL: v_lshr_i16_15:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshrrev_b16 v0, 15, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = lshr i16 %value, 15
; GFX10PLUS-LABEL: v_lshr_v2i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_lshrrev_b16 v0, v1, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = lshr <2 x i16> %value, %amount
; GFX10PLUS-LABEL: v_lshr_v2i16_15:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_lshrrev_b16 v0, 15, v0 op_sel_hi:[0,1]
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = lshr <2 x i16> %value, <i16 15, i16 15>
; GFX10PLUS-LABEL: v_lshr_v4i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_lshrrev_b16 v0, v2, v0
; GFX10PLUS-NEXT: v_pk_lshrrev_b16 v1, v3, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_lshr_v8i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_lshrrev_b16 v0, v4, v0
; GFX10PLUS-NEXT: v_pk_lshrrev_b16 v1, v5, v1
; GFX10PLUS-NEXT: v_pk_lshrrev_b16 v2, v6, v2
; GFX10PLUS-LABEL: v_lshr_i64:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshrrev_b64 v[0:1], v2, v[0:1]
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = lshr i64 %value, %amount
; GFX10PLUS-LABEL: v_lshr_i64_63:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v0, 31, v1
; GFX10PLUS-NEXT: v_mov_b32_e32 v1, 0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_lshr_i64_33:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v0, 1, v1
; GFX10PLUS-NEXT: v_mov_b32_e32 v1, 0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_lshr_i64_32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v0, v1
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_lshr_i64_32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v0, v1 :: v_dual_mov_b32 v1, 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = lshr i64 %value, 32
; GFX10PLUS-LABEL: v_lshr_i64_31:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshrrev_b64 v[0:1], 31, v[0:1]
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = lshr i64 %value, 31
; GFX10PLUS-LABEL: v_lshr_v2i64:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshrrev_b64 v[0:1], v4, v[0:1]
; GFX10PLUS-NEXT: v_lshrrev_b64 v[2:3], v6, v[2:3]
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_lshr_v2i64_31:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshrrev_b64 v[0:1], 31, v[0:1]
; GFX10PLUS-NEXT: v_lshrrev_b64 v[2:3], 31, v[2:3]
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_lshr_i65:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v5, 0
; GFX10-NEXT: v_and_b32_e32 v4, 1, v2
; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v3
; GFX11-LABEL: v_lshr_i65:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_and_b32 v4, 1, v2
; GFX11-NEXT: v_sub_nc_u32_e32 v2, 64, v3
; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v3
; GFX10-LABEL: v_lshr_i65_33:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v3, v1
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_and_b32_e32 v0, 1, v2
; GFX11-LABEL: v_lshr_i65_33:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v0, 1, v2
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 1, v3
; GFX10PLUS-LABEL: v_mul_i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = mul i16 %num, %den
; GFX10PLUS-LABEL: v_mul_i16_zeroext:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_mul_i16_signext:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_mul_i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_mul_lo_u32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = mul i32 %num, %den
; GFX10PLUS-LABEL: v_mul_v2i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_mul_lo_u32 v0, v0, v2
; GFX10PLUS-NEXT: v_mul_lo_u32 v1, v1, v3
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v4, v0
; GFX10-NEXT: v_mov_b32_e32 v5, v1
; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v4, v2, 0
; GFX11-LABEL: v_mul_i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v4, v0 :: v_dual_mov_b32 v5, v1
; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v4, v2, 0
; GFX11-NEXT: v_mul_lo_u32 v3, v4, v3
; GFX10-LABEL: v_mul_i96:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v6, v0
; GFX10-NEXT: v_mov_b32_e32 v7, v1
; GFX10-NEXT: v_mul_lo_u32 v2, v2, v3
; GFX11-LABEL: v_mul_i96:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v6, v0 :: v_dual_mov_b32 v7, v1
; GFX11-NEXT: v_mul_lo_u32 v2, v2, v3
; GFX11-NEXT: v_mul_lo_u32 v5, v6, v5
; GFX10-LABEL: v_mul_i128:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v8, v0
; GFX10-NEXT: v_mov_b32_e32 v9, v1
; GFX10-NEXT: v_mov_b32_e32 v10, v2
; GFX11-LABEL: v_mul_i128:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v8, v0 :: v_dual_mov_b32 v9, v1
; GFX11-NEXT: v_mov_b32_e32 v10, v2
; GFX11-NEXT: v_mul_lo_u32 v3, v3, v4
; GFX10-LABEL: v_mul_i256:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v16, v0
; GFX10-NEXT: v_mov_b32_e32 v17, v1
; GFX10-NEXT: v_mul_lo_u32 v27, v6, v9
; GFX11-LABEL: v_mul_i256:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v16, v0 :: v_dual_mov_b32 v17, v1
; GFX11-NEXT: v_mul_lo_u32 v7, v7, v8
; GFX11-NEXT: v_mul_lo_u32 v27, v6, v9
; GFX10-LABEL: v_mul_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
%mul = mul <2 x i16> %a, %b
; GFX10-LABEL: v_mul_v2i16_fneg_lhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg <2 x half> %a
; GFX10-LABEL: v_mul_v2i16_fneg_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.b = fneg <2 x half> %b
; GFX10-LABEL: v_mul_v2i16_fneg_lhs_fneg_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg <2 x half> %a
; GFX10PLUS-LABEL: v_orn2_i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_not_b32_e32 v1, v1
; GFX10PLUS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_orn2_i64:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_not_b32_e32 v2, v2
; GFX10PLUS-NEXT: v_not_b32_e32 v3, v3
; GFX10PLUS-NEXT: v_or_b32_e32 v0, v0, v2
; GFX10PLUS-LABEL: v_orn2_i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_xor_b32_e32 v1, -1, v1
; GFX10PLUS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_orn2_v2i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_xor_b32_e32 v1, -1, v1
; GFX10PLUS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_orn2_v3i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_xor_b32_e32 v2, -1, v2
; GFX10PLUS-NEXT: v_xor_b32_e32 v3, -11, v3
; GFX10PLUS-NEXT: v_or_b32_e32 v0, v0, v2
; GFX10PLUS-LABEL: v_orn2_v4i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_xor_b32_e32 v2, -1, v2
; GFX10PLUS-NEXT: v_xor_b32_e32 v3, -1, v3
; GFX10PLUS-NEXT: v_or_b32_e32 v0, v0, v2
; GFX10PLUS-LABEL: v_saddsat_i7:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 9, v0
; GFX10PLUS-NEXT: v_lshlrev_b16 v1, 9, v1
; GFX10PLUS-NEXT: v_add_nc_i16 v0, v0, v1 clamp
; GFX10PLUS-LABEL: v_saddsat_i8:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 8, v0
; GFX10PLUS-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX10PLUS-NEXT: v_add_nc_i16 v0, v0, v1 clamp
; GFX10-LABEL: v_saddsat_v2i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v1
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-LABEL: v_saddsat_v2i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-LABEL: v_saddsat_v4i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0
; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v0
; GFX11-LABEL: v_saddsat_v4i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1
; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v0
; GFX10PLUS-LABEL: v_saddsat_i24:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, 8, v0
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; GFX10PLUS-NEXT: v_add_nc_i32 v0, v0, v1 clamp
; GFX10PLUS-LABEL: v_saddsat_i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_nc_i32 v0, v0, v1 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call i32 @llvm.sadd.sat.i32(i32 %lhs, i32 %rhs)
; GFX10PLUS-LABEL: v_saddsat_v2i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_nc_i32 v0, v0, v2 clamp
; GFX10PLUS-NEXT: v_add_nc_i32 v1, v1, v3 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_saddsat_v3i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_nc_i32 v0, v0, v3 clamp
; GFX10PLUS-NEXT: v_add_nc_i32 v1, v1, v4 clamp
; GFX10PLUS-NEXT: v_add_nc_i32 v2, v2, v5 clamp
; GFX10PLUS-LABEL: v_saddsat_v4i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_nc_i32 v0, v0, v4 clamp
; GFX10PLUS-NEXT: v_add_nc_i32 v1, v1, v5 clamp
; GFX10PLUS-NEXT: v_add_nc_i32 v2, v2, v6 clamp
; GFX10PLUS-LABEL: v_saddsat_v5i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_nc_i32 v0, v0, v5 clamp
; GFX10PLUS-NEXT: v_add_nc_i32 v1, v1, v6 clamp
; GFX10PLUS-NEXT: v_add_nc_i32 v2, v2, v7 clamp
; GFX10-LABEL: v_saddsat_v16i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX10-NEXT: v_add_nc_i32 v0, v0, v16 clamp
; GFX10-NEXT: v_add_nc_i32 v1, v1, v17 clamp
; GFX11-LABEL: v_saddsat_v16i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: v_add_nc_i32 v0, v0, v16 clamp
; GFX11-NEXT: v_add_nc_i32 v1, v1, v17 clamp
; GFX10PLUS-LABEL: v_saddsat_i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_nc_i16 v0, v0, v1 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call i16 @llvm.sadd.sat.i16(i16 %lhs, i16 %rhs)
; GFX10PLUS-LABEL: v_saddsat_v2i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_add_i16 v0, v0, v1 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs)
; GFX10PLUS-LABEL: v_saddsat_v4i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_add_i16 v0, v0, v2 clamp
; GFX10PLUS-NEXT: v_pk_add_i16 v1, v1, v3 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_saddsat_v6i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_add_i16 v0, v0, v3 clamp
; GFX10PLUS-NEXT: v_pk_add_i16 v1, v1, v4 clamp
; GFX10PLUS-NEXT: v_pk_add_i16 v2, v2, v5 clamp
; GFX10PLUS-LABEL: v_saddsat_v8i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_add_i16 v0, v0, v4 clamp
; GFX10PLUS-NEXT: v_pk_add_i16 v1, v1, v5 clamp
; GFX10PLUS-NEXT: v_pk_add_i16 v2, v2, v6 clamp
; GFX10-LABEL: v_saddsat_i48:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1]
; GFX10-NEXT: v_lshlrev_b64 v[2:3], 16, v[2:3]
; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2
; GFX11-LABEL: v_saddsat_i48:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1]
; GFX11-NEXT: v_lshlrev_b64 v[2:3], 16, v[2:3]
; GFX11-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2
; GFX10-LABEL: v_saddsat_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2
; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
; GFX10-NEXT: v_cmp_gt_i64_e64 s4, 0, v[2:3]
; GFX11-LABEL: v_saddsat_i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2
; GFX11-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
; GFX11-NEXT: v_cmp_gt_i64_e64 s0, 0, v[2:3]
; GFX10-LABEL: v_saddsat_v2i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v0, v4
; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v1, v5, vcc_lo
; GFX10-NEXT: v_add_co_u32 v10, vcc_lo, v2, v6
; GFX11-LABEL: v_saddsat_v2i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_add_co_u32 v8, vcc_lo, v0, v4
; GFX11-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v1, v5, vcc_lo
; GFX11-NEXT: v_add_co_u32 v10, vcc_lo, v2, v6
; GFX10-LABEL: v_saddsat_v2i128:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v0, v8
; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v1, v9, vcc_lo
; GFX10-NEXT: v_add_co_ci_u32_e32 v16, vcc_lo, v2, v10, vcc_lo
; GFX11-LABEL: v_saddsat_v2i128:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_add_co_u32 v8, vcc_lo, v0, v8
; GFX11-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v1, v9, vcc_lo
; GFX11-NEXT: v_add_co_ci_u32_e32 v16, vcc_lo, v2, v10, vcc_lo
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - < %s | FileCheck --check-prefixes=GCN,GFX89 %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - < %s | FileCheck --check-prefixes=GCN,GFX89 %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - < %s | FileCheck --check-prefixes=GCN,GFX10 %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - < %s | FileCheck --check-prefixes=GCN %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - < %s | FileCheck --check-prefixes=GCN %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - < %s | FileCheck --check-prefixes=GCN %s
; Test vector signed bitfield extract.
define signext i8 @v_ashr_i8_i32(i32 %value) {
-; GFX89-LABEL: v_ashr_i8_i32:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_bfe_i32 v0, v0, 4, 8
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_ashr_i8_i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_bfe_i32 v0, v0, 4, 8
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_ashr_i8_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_bfe_i32 v0, v0, 4, 8
+; GCN-NEXT: s_setpc_b64 s[30:31]
%1 = ashr i32 %value, 4
%2 = trunc i32 %1 to i8
ret i8 %2
}
define signext i16 @v_ashr_i16_i32(i32 %value) {
-; GFX89-LABEL: v_ashr_i16_i32:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_bfe_i32 v0, v0, 9, 16
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_ashr_i16_i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_bfe_i32 v0, v0, 9, 16
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_ashr_i16_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_bfe_i32 v0, v0, 9, 16
+; GCN-NEXT: s_setpc_b64 s[30:31]
%1 = ashr i32 %value, 9
%2 = trunc i32 %1 to i16
ret i16 %2
}
define signext i8 @v_lshr_i8_i32(i32 %value) {
-; GFX89-LABEL: v_lshr_i8_i32:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_bfe_i32 v0, v0, 4, 8
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_lshr_i8_i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_bfe_i32 v0, v0, 4, 8
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_lshr_i8_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_bfe_i32 v0, v0, 4, 8
+; GCN-NEXT: s_setpc_b64 s[30:31]
%1 = lshr i32 %value, 4
%2 = trunc i32 %1 to i8
ret i8 %2
}
define signext i16 @v_lshr_i16_i32(i32 %value) {
-; GFX89-LABEL: v_lshr_i16_i32:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_bfe_i32 v0, v0, 9, 16
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_lshr_i16_i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_bfe_i32 v0, v0, 9, 16
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_lshr_i16_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_bfe_i32 v0, v0, 9, 16
+; GCN-NEXT: s_setpc_b64 s[30:31]
%1 = lshr i32 %value, 9
%2 = trunc i32 %1 to i16
ret i16 %2
; Test vector bitfield extract for 64-bits.
define i64 @v_ashr_i64(i64 %value) {
-; GFX89-LABEL: v_ashr_i64:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_ashrrev_i64 v[0:1], 10, v[0:1]
-; GFX89-NEXT: v_bfe_i32 v0, v0, 0, 4
-; GFX89-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_ashr_i64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_ashrrev_i64 v[0:1], 10, v[0:1]
-; GFX10-NEXT: v_bfe_i32 v0, v0, 0, 4
-; GFX10-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_ashr_i64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_ashrrev_i64 v[0:1], 10, v[0:1]
+; GCN-NEXT: v_bfe_i32 v0, v0, 0, 4
+; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
%1 = ashr i64 %value, 10
%2 = shl i64 %1, 60
%3 = ashr i64 %2, 60
}
define i64 @v_lshr_i64(i64 %value) {
-; GFX89-LABEL: v_lshr_i64:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_ashrrev_i64 v[0:1], 10, v[0:1]
-; GFX89-NEXT: v_bfe_i32 v0, v0, 0, 4
-; GFX89-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_lshr_i64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_ashrrev_i64 v[0:1], 10, v[0:1]
-; GFX10-NEXT: v_bfe_i32 v0, v0, 0, 4
-; GFX10-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_lshr_i64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_ashrrev_i64 v[0:1], 10, v[0:1]
+; GCN-NEXT: v_bfe_i32 v0, v0, 0, 4
+; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
%1 = lshr i64 %value, 10
%2 = shl i64 %1, 60
%3 = ashr i64 %2, 60
; GFX10PLUS-LABEL: v_sext_inreg_i8_4:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 4
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%shl = shl i8 %value, 4
; GFX10PLUS-LABEL: v_sext_inreg_i8_7:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%shl = shl i8 %value, 7
; GFX10PLUS-LABEL: v_sext_inreg_i24_12:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 24
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 12, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_sext_inreg_i24_7:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 17
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%shl = shl i24 %value, 7
; GFX10PLUS-LABEL: v_sext_inreg_i32_3:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 29
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%shl = shl i32 %value, 3
; GFX10PLUS-LABEL: v_sext_inreg_i32_31:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 31, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%shl = shl i32 %value, 31
; GFX10PLUS-LABEL: v_sext_inreg_v2i32_14:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 18
; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 18
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_sext_inreg_v2i32_31:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_sext_inreg_v3i32_16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 16
; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 16
; GFX10PLUS-LABEL: v_sext_inreg_v4i32_6:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 26
; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 26
; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 26
; GFX10PLUS-LABEL: v_sext_inreg_v5i32_30:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 2
; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 2
; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 2
; GFX10PLUS-LABEL: v_sext_inreg_v16i32_27:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 5
; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 5
; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 5
; GFX10PLUS-LABEL: v_sext_inreg_i16_4:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 12
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%shl = shl i16 %value, 4
; GFX10PLUS-LABEL: v_sext_inreg_i16_15:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%shl = shl i16 %value, 15
; GFX10PLUS-LABEL: v_sext_inreg_v2i16_8:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1]
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_sext_inreg_v2i16_15:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 15, v0 op_sel_hi:[0,1]
; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 15, v0 op_sel_hi:[0,1]
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_sext_inreg_v4i16_3:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 3, v0 op_sel_hi:[0,1]
; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v1, 3, v1 op_sel_hi:[0,1]
; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 3, v0 op_sel_hi:[0,1]
; GFX10PLUS-LABEL: v_sext_inreg_v8i16_11:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 11, v0 op_sel_hi:[0,1]
; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v1, 11, v1 op_sel_hi:[0,1]
; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v2, 11, v2 op_sel_hi:[0,1]
; GFX10PLUS-LABEL: v_sext_inreg_i64_23:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_bfe_i32 v1, v0, 0, 9
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%shl = shl i64 %value, 23
; GFX10PLUS-LABEL: v_sext_inreg_i64_40:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 24
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_sext_inreg_i64_63:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_sext_inreg_i64_33:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 31
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_sext_inreg_i64_32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_mov_b32_e32 v0, v1
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_sext_inreg_i64_31:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_bfe_i32 v1, v0, 0, 1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%shl = shl i64 %value, 31
; GFX10PLUS-LABEL: v_sext_inreg_v2i64_16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_bfe_i32 v1, v0, 0, 16
; GFX10PLUS-NEXT: v_bfe_i32 v3, v2, 0, 16
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_sext_inreg_v2i64_31:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_bfe_i32 v1, v0, 0, 1
; GFX10PLUS-NEXT: v_bfe_i32 v3, v2, 0, 1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_sext_inreg_i65_22:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b64 v[2:3], 22, v[2:3]
; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v3, 10, v1
; GFX10PLUS-NEXT: v_lshrrev_b64 v[0:1], 0, v[0:1]
; GFX10PLUS-LABEL: v_sext_inreg_i65_33:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_mov_b32_e32 v3, v1
; GFX10PLUS-NEXT: v_bfe_i32 v1, v2, 0, 1
; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v3, 1, v3
; GFX10-LABEL: v_shl_i64_zext_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v0, 0x3fffffff, v0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-LABEL: v_shl_i64_zext_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3fffffff, v0
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_shl_i64_sext_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v0, 0x1fffffff, v0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-LABEL: v_shl_i64_sext_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x1fffffff, v0
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_shl_i64_zext_i32_overflow:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GFX11-LABEL: v_shl_i64_zext_i32_overflow:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x7fffffff, v0
; GFX11-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_shl_i64_sext_i32_overflow:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GFX10-LABEL: v_shl_v2i64_zext_v2i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v0, 0x3fffffff, v0
; GFX10-NEXT: v_and_b32_e32 v1, 0x3fffffff, v1
; GFX10-NEXT: v_mov_b32_e32 v3, 0
; GFX11-LABEL: v_shl_v2i64_zext_v2i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 0x3fffffff, v0
; GFX11-NEXT: v_and_b32_e32 v1, 0x3fffffff, v1
; GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_lshlrev_b32 v2, 2, v1
; GFX10-LABEL: v_shl_v2i64_sext_v2i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v0, 0x1fffffff, v0
; GFX10-NEXT: v_and_b32_e32 v1, 0x1fffffff, v1
; GFX10-NEXT: v_mov_b32_e32 v3, 0
; GFX11-LABEL: v_shl_v2i64_sext_v2i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 0x1fffffff, v0
; GFX11-NEXT: v_and_b32_e32 v1, 0x1fffffff, v1
; GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_lshlrev_b32 v2, 2, v1
; GFX10PLUS-LABEL: v_shl_i32_zext_i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0x3fff, v0
; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 2, v0
; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10PLUS-LABEL: v_shl_v2i32_zext_v2i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0x3fff3fff, v0
; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v1, 2, v0 op_sel_hi:[0,1]
; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v1
; GFX10PLUS-LABEL: v_shl_i8:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX10PLUS-NEXT: v_lshlrev_b16 v0, v1, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_shl_i8_7:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 7, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = shl i8 %value, 7
; GFX10PLUS-LABEL: v_shl_i24:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_and_b32_e32 v1, 0xffffff, v1
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, v1, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_shl_i24_7:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, 7, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = shl i24 %value, 7
; GFX10PLUS-LABEL: v_shl_i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, v1, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = shl i32 %value, %amount
; GFX10PLUS-LABEL: v_shl_i32_31:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, 31, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = shl i32 %value, 31
; GFX10PLUS-LABEL: v_shl_v2i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, v2, v0
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, v3, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_shl_v2i32_31:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, 31, v0
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, 31, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_shl_v3i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, v3, v0
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, v4, v1
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v2, v5, v2
; GFX10PLUS-LABEL: v_shl_v4i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, v4, v0
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, v5, v1
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v2, v6, v2
; GFX10PLUS-LABEL: v_shl_v5i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, v5, v0
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, v6, v1
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v2, v7, v2
; GFX10-LABEL: v_shl_v16i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX10-NEXT: v_lshlrev_b32_e32 v0, v16, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v1, v17, v1
; GFX11-LABEL: v_shl_v16i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: v_lshlrev_b32_e32 v0, v16, v0
; GFX11-NEXT: v_lshlrev_b32_e32 v1, v17, v1
; GFX10PLUS-LABEL: v_shl_i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b16 v0, v1, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = shl i16 %value, %amount
; GFX10PLUS-LABEL: v_shl_i16_15:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 15, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = shl i16 %value, 15
; GFX10PLUS-LABEL: v_shl_v2i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, v1, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = shl <2 x i16> %value, %amount
; GFX10PLUS-LABEL: v_shl_v2i16_15:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 15, v0 op_sel_hi:[0,1]
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = shl <2 x i16> %value, <i16 15, i16 15>
; GFX10PLUS-LABEL: v_shl_v4i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, v2, v0
; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v1, v3, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_shl_v8i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, v4, v0
; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v1, v5, v1
; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v2, v6, v2
; GFX10PLUS-LABEL: v_shl_i64:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = shl i64 %value, %amount
; GFX10-LABEL: v_shl_i64_63:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 31, v0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_shl_i64_63:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_lshlrev_b32 v1, 31, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = shl i64 %value, 63
; GFX10-LABEL: v_shl_i64_33:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 1, v0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_shl_i64_33:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_lshlrev_b32 v1, 1, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = shl i64 %value, 33
; GFX10-LABEL: v_shl_i64_32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v1, v0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_shl_i64_32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = shl i64 %value, 32
; GFX10PLUS-LABEL: v_shl_i64_31:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = shl i64 %value, 31
; GFX10PLUS-LABEL: v_shl_v2i64:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1]
; GFX10PLUS-NEXT: v_lshlrev_b64 v[2:3], v6, v[2:3]
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_shl_v2i64_31:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
; GFX10PLUS-NEXT: v_lshlrev_b64 v[2:3], 31, v[2:3]
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_shl_i65:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_sub_nc_u32_e32 v6, 64, v3
; GFX10-NEXT: v_lshlrev_b64 v[4:5], v3, v[2:3]
; GFX10-NEXT: v_subrev_nc_u32_e32 v8, 64, v3
; GFX11-LABEL: v_shl_i65:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_sub_nc_u32_e32 v6, 64, v3
; GFX11-NEXT: v_lshlrev_b64 v[4:5], v3, v[2:3]
; GFX11-NEXT: v_subrev_nc_u32_e32 v8, 64, v3
; GFX10-LABEL: v_shl_i65_33:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 1, v0
; GFX10-NEXT: v_lshrrev_b64 v[2:3], 31, v[0:1]
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX11-LABEL: v_shl_i65_33:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshlrev_b32_e32 v4, 1, v0
; GFX11-NEXT: v_lshrrev_b64 v[2:3], 31, v[0:1]
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, v4
; GFX10-LABEL: v_shl1_add_u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshl_add_u32 v0, v0, 1, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
%shl = shl i32 %src0, 1
; GFX10-LABEL: v_shl2_add_u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
%shl = shl i32 %src0, 2
; GFX10-LABEL: v_shl3_add_u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshl_add_u32 v0, v0, 3, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
%shl = shl i32 %src0, 3
; GFX10-LABEL: v_shl4_add_u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshl_add_u32 v0, v0, 4, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
%shl = shl i32 %src0, 4
; GFX10-LABEL: v_shl5_add_u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshl_add_u32 v0, v0, 5, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
%shl = shl i32 %src0, 5
; GFX10-LABEL: test_min_max_ValK0_K1_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smax = call i32 @llvm.smax.i32(i32 %a, i32 -12)
; GFX10-LABEL: min_max_ValK0_K1_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smax = call i32 @llvm.smax.i32(i32 -12, i32 %a)
; GFX10-LABEL: test_min_K1max_ValK0__i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smax = call i32 @llvm.smax.i32(i32 %a, i32 -12)
; GFX10-LABEL: test_min_K1max_K0Val__i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smax = call i32 @llvm.smax.i32(i32 -12, i32 %a)
; GFX10-LABEL: test_max_min_ValK1_K0_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smin = call i32 @llvm.smin.i32(i32 %a, i32 17)
; GFX10-LABEL: test_max_min_K1Val_K0_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smin = call i32 @llvm.smin.i32(i32 17, i32 %a)
; GFX10-LABEL: test_max_K0min_ValK1__i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smin = call i32 @llvm.smin.i32(i32 %a, i32 17)
; GFX10-LABEL: test_max_K0min_K1Val__i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smin = call i32 @llvm.smin.i32(i32 17, i32 %a)
; GFX10-LABEL: test_max_K0min_K1Val__v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_min_i16 v0, 17, v0 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_max_i16 v0, -12, v0 op_sel_hi:[0,1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: test_non_inline_constant_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 0x41
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smax = call i32 @llvm.smax.i32(i32 %a, i32 -12)
; GFX10PLUS-LABEL: v_ssubsat_i7:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 9, v0
; GFX10PLUS-NEXT: v_lshlrev_b16 v1, 9, v1
; GFX10PLUS-NEXT: v_sub_nc_i16 v0, v0, v1 clamp
; GFX10PLUS-LABEL: v_ssubsat_i8:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 8, v0
; GFX10PLUS-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX10PLUS-NEXT: v_sub_nc_i16 v0, v0, v1 clamp
; GFX10-LABEL: v_ssubsat_v2i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v1
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-LABEL: v_ssubsat_v2i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-LABEL: v_ssubsat_v4i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0
; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v0
; GFX11-LABEL: v_ssubsat_v4i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1
; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v0
; GFX10PLUS-LABEL: v_ssubsat_i24:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, 8, v0
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v1 clamp
; GFX10PLUS-LABEL: v_ssubsat_i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v1 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call i32 @llvm.ssub.sat.i32(i32 %lhs, i32 %rhs)
; GFX10PLUS-LABEL: v_ssubsat_v2i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v2 clamp
; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v3 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_ssubsat_v3i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v3 clamp
; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v4 clamp
; GFX10PLUS-NEXT: v_sub_nc_i32 v2, v2, v5 clamp
; GFX10PLUS-LABEL: v_ssubsat_v4i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v4 clamp
; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v5 clamp
; GFX10PLUS-NEXT: v_sub_nc_i32 v2, v2, v6 clamp
; GFX10PLUS-LABEL: v_ssubsat_v5i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v5 clamp
; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v6 clamp
; GFX10PLUS-NEXT: v_sub_nc_i32 v2, v2, v7 clamp
; GFX10-LABEL: v_ssubsat_v16i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX10-NEXT: v_sub_nc_i32 v0, v0, v16 clamp
; GFX10-NEXT: v_sub_nc_i32 v1, v1, v17 clamp
; GFX11-LABEL: v_ssubsat_v16i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: v_sub_nc_i32 v0, v0, v16 clamp
; GFX11-NEXT: v_sub_nc_i32 v1, v1, v17 clamp
; GFX10PLUS-LABEL: v_ssubsat_i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_i16 v0, v0, v1 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call i16 @llvm.ssub.sat.i16(i16 %lhs, i16 %rhs)
; GFX10PLUS-LABEL: v_ssubsat_v2i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v1 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs)
; GFX10PLUS-LABEL: v_ssubsat_v4i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v2 clamp
; GFX10PLUS-NEXT: v_pk_sub_i16 v1, v1, v3 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_ssubsat_v6i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v3 clamp
; GFX10PLUS-NEXT: v_pk_sub_i16 v1, v1, v4 clamp
; GFX10PLUS-NEXT: v_pk_sub_i16 v2, v2, v5 clamp
; GFX10PLUS-LABEL: v_ssubsat_v8i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v4 clamp
; GFX10PLUS-NEXT: v_pk_sub_i16 v1, v1, v5 clamp
; GFX10PLUS-NEXT: v_pk_sub_i16 v2, v2, v6 clamp
; GFX10-LABEL: v_ssubsat_i48:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1]
; GFX10-NEXT: v_lshlrev_b64 v[2:3], 16, v[2:3]
; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2
; GFX11-LABEL: v_ssubsat_i48:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1]
; GFX11-NEXT: v_lshlrev_b64 v[2:3], 16, v[2:3]
; GFX11-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2
; GFX10-LABEL: v_ssubsat_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2
; GFX10-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
; GFX10-NEXT: v_cmp_lt_i64_e64 s4, 0, v[2:3]
; GFX11-LABEL: v_ssubsat_i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2
; GFX11-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
; GFX11-NEXT: v_cmp_lt_i64_e64 s0, 0, v[2:3]
; GFX10-LABEL: v_ssubsat_v2i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_sub_co_u32 v8, vcc_lo, v0, v4
; GFX10-NEXT: v_sub_co_ci_u32_e32 v9, vcc_lo, v1, v5, vcc_lo
; GFX10-NEXT: v_sub_co_u32 v10, vcc_lo, v2, v6
; GFX11-LABEL: v_ssubsat_v2i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_sub_co_u32 v8, vcc_lo, v0, v4
; GFX11-NEXT: v_sub_co_ci_u32_e32 v9, vcc_lo, v1, v5, vcc_lo
; GFX11-NEXT: v_sub_co_u32 v10, vcc_lo, v2, v6
; GFX10-LABEL: v_ssubsat_v2i128:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_sub_co_u32 v16, vcc_lo, v0, v8
; GFX10-NEXT: v_sub_co_ci_u32_e32 v17, vcc_lo, v1, v9, vcc_lo
; GFX10-NEXT: v_sub_co_ci_u32_e32 v18, vcc_lo, v2, v10, vcc_lo
; GFX11-LABEL: v_ssubsat_v2i128:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_sub_co_u32 v16, vcc_lo, v0, v8
; GFX11-NEXT: v_sub_co_ci_u32_e32 v17, vcc_lo, v1, v9, vcc_lo
; GFX11-NEXT: v_sub_co_ci_u32_e32 v18, vcc_lo, v2, v10, vcc_lo
; GFX10PLUS-LABEL: v_uaddsat_i7:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 9, v0
; GFX10PLUS-NEXT: v_lshlrev_b16 v1, 9, v1
; GFX10PLUS-NEXT: v_add_nc_u16 v0, v0, v1 clamp
; GFX10PLUS-LABEL: v_uaddsat_i8:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 8, v0
; GFX10PLUS-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX10PLUS-NEXT: v_add_nc_u16 v0, v0, v1 clamp
; GFX10-LABEL: v_uaddsat_v2i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v1
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-LABEL: v_uaddsat_v2i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-LABEL: v_uaddsat_v4i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0
; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v0
; GFX11-LABEL: v_uaddsat_v4i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1
; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v0
; GFX10PLUS-LABEL: v_uaddsat_i24:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, 8, v0
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; GFX10PLUS-NEXT: v_add_nc_u32_e64 v0, v0, v1 clamp
; GFX10PLUS-LABEL: v_uaddsat_i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_nc_u32_e64 v0, v0, v1 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call i32 @llvm.uadd.sat.i32(i32 %lhs, i32 %rhs)
; GFX10PLUS-LABEL: v_uaddsat_v2i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_nc_u32_e64 v0, v0, v2 clamp
; GFX10PLUS-NEXT: v_add_nc_u32_e64 v1, v1, v3 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_uaddsat_v3i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_nc_u32_e64 v0, v0, v3 clamp
; GFX10PLUS-NEXT: v_add_nc_u32_e64 v1, v1, v4 clamp
; GFX10PLUS-NEXT: v_add_nc_u32_e64 v2, v2, v5 clamp
; GFX10PLUS-LABEL: v_uaddsat_v4i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_nc_u32_e64 v0, v0, v4 clamp
; GFX10PLUS-NEXT: v_add_nc_u32_e64 v1, v1, v5 clamp
; GFX10PLUS-NEXT: v_add_nc_u32_e64 v2, v2, v6 clamp
; GFX10PLUS-LABEL: v_uaddsat_v5i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_nc_u32_e64 v0, v0, v5 clamp
; GFX10PLUS-NEXT: v_add_nc_u32_e64 v1, v1, v6 clamp
; GFX10PLUS-NEXT: v_add_nc_u32_e64 v2, v2, v7 clamp
; GFX10-LABEL: v_uaddsat_v16i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v16 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v1, v1, v17 clamp
; GFX11-LABEL: v_uaddsat_v16i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: v_add_nc_u32_e64 v0, v0, v16 clamp
; GFX11-NEXT: v_add_nc_u32_e64 v1, v1, v17 clamp
; GFX10PLUS-LABEL: v_uaddsat_i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_nc_u16 v0, v0, v1 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call i16 @llvm.uadd.sat.i16(i16 %lhs, i16 %rhs)
; GFX10PLUS-LABEL: v_uaddsat_v2i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_add_u16 v0, v0, v1 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs)
; GFX10PLUS-LABEL: v_uaddsat_v4i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_add_u16 v0, v0, v2 clamp
; GFX10PLUS-NEXT: v_pk_add_u16 v1, v1, v3 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_uaddsat_v6i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_add_u16 v0, v0, v3 clamp
; GFX10PLUS-NEXT: v_pk_add_u16 v1, v1, v4 clamp
; GFX10PLUS-NEXT: v_pk_add_u16 v2, v2, v5 clamp
; GFX10PLUS-LABEL: v_uaddsat_v8i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_add_u16 v0, v0, v4 clamp
; GFX10PLUS-NEXT: v_pk_add_u16 v1, v1, v5 clamp
; GFX10PLUS-NEXT: v_pk_add_u16 v2, v2, v6 clamp
; GFX10PLUS-LABEL: v_uaddsat_i48:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1]
; GFX10PLUS-NEXT: v_lshlrev_b64 v[2:3], 16, v[2:3]
; GFX10PLUS-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX10PLUS-LABEL: v_uaddsat_i64:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX10PLUS-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc_lo
; GFX10-LABEL: v_uaddsat_v2i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
; GFX10-NEXT: v_add_co_u32 v2, s4, v2, v6
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v5, vcc_lo
; GFX11-LABEL: v_uaddsat_v2i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
; GFX11-NEXT: v_add_co_u32 v2, s0, v2, v6
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v5, vcc_lo
; GFX10-LABEL: v_uaddsat_v2i128:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v8
; GFX10-NEXT: v_add_co_u32 v4, s4, v4, v12
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v9, vcc_lo
; GFX11-LABEL: v_uaddsat_v2i128:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v8
; GFX11-NEXT: v_add_co_u32 v4, s0, v4, v12
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v9, vcc_lo
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - < %s | FileCheck --check-prefixes=GCN,GFX89 %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - < %s | FileCheck --check-prefixes=GCN,GFX89 %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - < %s | FileCheck --check-prefixes=GCN,GFX10 %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - < %s | FileCheck --check-prefixes=GCN %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - < %s | FileCheck --check-prefixes=GCN %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - < %s | FileCheck --check-prefixes=GCN %s
; Test vector bitfield extract.
define i32 @v_srl_mask_i32(i32 %value) {
-; GFX89-LABEL: v_srl_mask_i32:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_bfe_u32 v0, v0, 8, 5
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_srl_mask_i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_bfe_u32 v0, v0, 8, 5
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_srl_mask_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_bfe_u32 v0, v0, 8, 5
+; GCN-NEXT: s_setpc_b64 s[30:31]
%1 = lshr i32 %value, 8
%2 = and i32 %1, 31
ret i32 %2
; Test vector bitfield extract.
define i32 @v_mask_srl_i32(i32 %value) {
-; GFX89-LABEL: v_mask_srl_i32:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_bfe_u32 v0, v0, 8, 5
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_mask_srl_i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_bfe_u32 v0, v0, 8, 5
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_mask_srl_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_bfe_u32 v0, v0, 8, 5
+; GCN-NEXT: s_setpc_b64 s[30:31]
%1 = and i32 %value, 7936 ; 31 << 8
%2 = lshr i32 %1, 8
ret i32 %2
; Test vector bitfield extract for 64-bits.
define i64 @v_srl_mask_i64(i64 %value) {
-; GFX89-LABEL: v_srl_mask_i64:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1]
-; GFX89-NEXT: v_mov_b32_e32 v1, 0
-; GFX89-NEXT: v_bfe_u32 v0, v0, 0, 10
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_srl_mask_i64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1]
-; GFX10-NEXT: v_mov_b32_e32 v1, 0
-; GFX10-NEXT: v_bfe_u32 v0, v0, 0, 10
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_srl_mask_i64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1]
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: v_bfe_u32 v0, v0, 0, 10
+; GCN-NEXT: s_setpc_b64 s[30:31]
%1 = lshr i64 %value, 25
%2 = and i64 %1, 1023
ret i64 %2
; TODO: No need for a 64-bit shift instruction when the extracted value is
; entirely contained within the upper or lower half.
define i64 @v_mask_srl_i64(i64 %value) {
-; GFX89-LABEL: v_mask_srl_i64:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1]
-; GFX89-NEXT: v_mov_b32_e32 v1, 0
-; GFX89-NEXT: v_bfe_u32 v0, v0, 0, 10
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_mask_srl_i64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1]
-; GFX10-NEXT: v_mov_b32_e32 v1, 0
-; GFX10-NEXT: v_bfe_u32 v0, v0, 0, 10
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_mask_srl_i64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1]
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: v_bfe_u32 v0, v0, 0, 10
+; GCN-NEXT: s_setpc_b64 s[30:31]
%1 = and i64 %value, 34326183936 ; 1023 << 25
%2 = lshr i64 %1, 25
ret i64 %2
; GFX10-LABEL: test_min_max_ValK0_K1_u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
; GFX10-LABEL: min_max_ValK0_K1_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umax = call i32 @llvm.umax.i32(i32 12, i32 %a)
; GFX10-LABEL: test_min_K1max_ValK0__u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
; GFX10-LABEL: test_min_K1max_K0Val__u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umax = call i32 @llvm.umax.i32(i32 12, i32 %a)
; GFX10-LABEL: test_max_min_ValK1_K0_u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umin = call i32 @llvm.umin.i32(i32 %a, i32 17)
; GFX10-LABEL: test_max_min_K1Val_K0_u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umin = call i32 @llvm.umin.i32(i32 17, i32 %a)
; GFX10-LABEL: test_max_K0min_ValK1__u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umin = call i32 @llvm.umin.i32(i32 %a, i32 17)
; GFX10-LABEL: test_max_K0min_K1Val__u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umin = call i32 @llvm.umin.i32(i32 17, i32 %a)
; GFX10-LABEL: test_max_K0min_K1Val__v2u16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_min_u16 v0, 17, v0 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_max_u16 v0, 12, v0 op_sel_hi:[0,1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: test_non_inline_constant_u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 0x41
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
; GFX10PLUS-LABEL: v_usubsat_i7:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 9, v0
; GFX10PLUS-NEXT: v_lshlrev_b16 v1, 9, v1
; GFX10PLUS-NEXT: v_sub_nc_u16 v0, v0, v1 clamp
; GFX10PLUS-LABEL: v_usubsat_i8:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 8, v0
; GFX10PLUS-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX10PLUS-NEXT: v_sub_nc_u16 v0, v0, v1 clamp
; GFX10-LABEL: v_usubsat_v2i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v1
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-LABEL: v_usubsat_v2i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-LABEL: v_usubsat_v4i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0
; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v0
; GFX11-LABEL: v_usubsat_v4i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1
; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v0
; GFX10PLUS-LABEL: v_usubsat_i24:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, 8, v0
; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v0, v0, v1 clamp
; GFX10PLUS-LABEL: v_usubsat_i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v0, v0, v1 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call i32 @llvm.usub.sat.i32(i32 %lhs, i32 %rhs)
; GFX10PLUS-LABEL: v_usubsat_v2i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v0, v0, v2 clamp
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v1, v1, v3 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_usubsat_v3i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v0, v0, v3 clamp
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v1, v1, v4 clamp
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v2, v2, v5 clamp
; GFX10PLUS-LABEL: v_usubsat_v4i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v0, v0, v4 clamp
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v1, v1, v5 clamp
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v2, v2, v6 clamp
; GFX10PLUS-LABEL: v_usubsat_v5i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v0, v0, v5 clamp
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v1, v1, v6 clamp
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v2, v2, v7 clamp
; GFX10-LABEL: v_usubsat_v16i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX10-NEXT: v_sub_nc_u32_e64 v0, v0, v16 clamp
; GFX10-NEXT: v_sub_nc_u32_e64 v1, v1, v17 clamp
; GFX11-LABEL: v_usubsat_v16i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: v_sub_nc_u32_e64 v0, v0, v16 clamp
; GFX11-NEXT: v_sub_nc_u32_e64 v1, v1, v17 clamp
; GFX10PLUS-LABEL: v_usubsat_i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_u16 v0, v0, v1 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call i16 @llvm.usub.sat.i16(i16 %lhs, i16 %rhs)
; GFX10PLUS-LABEL: v_usubsat_v2i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_sub_u16 v0, v0, v1 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs)
; GFX10PLUS-LABEL: v_usubsat_v4i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_sub_u16 v0, v0, v2 clamp
; GFX10PLUS-NEXT: v_pk_sub_u16 v1, v1, v3 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_usubsat_v6i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_sub_u16 v0, v0, v3 clamp
; GFX10PLUS-NEXT: v_pk_sub_u16 v1, v1, v4 clamp
; GFX10PLUS-NEXT: v_pk_sub_u16 v2, v2, v5 clamp
; GFX10PLUS-LABEL: v_usubsat_v8i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_sub_u16 v0, v0, v4 clamp
; GFX10PLUS-NEXT: v_pk_sub_u16 v1, v1, v5 clamp
; GFX10PLUS-NEXT: v_pk_sub_u16 v2, v2, v6 clamp
; GFX10PLUS-LABEL: v_usubsat_i48:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1]
; GFX10PLUS-NEXT: v_lshlrev_b64 v[2:3], 16, v[2:3]
; GFX10PLUS-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
; GFX10PLUS-LABEL: v_usubsat_i64:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
; GFX10PLUS-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc_lo
; GFX10-LABEL: v_usubsat_v2i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v4
; GFX10-NEXT: v_sub_co_u32 v2, s4, v2, v6
; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v5, vcc_lo
; GFX11-LABEL: v_usubsat_v2i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v4
; GFX11-NEXT: v_sub_co_u32 v2, s0, v2, v6
; GFX11-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v5, vcc_lo
; GFX10-LABEL: v_usubsat_v2i128:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v8
; GFX10-NEXT: v_sub_co_u32 v4, s4, v4, v12
; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v9, vcc_lo
; GFX11-LABEL: v_usubsat_v2i128:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v8
; GFX11-NEXT: v_sub_co_u32 v4, s0, v4, v12
; GFX11-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v9, vcc_lo
; GFX10PLUS-LABEL: check_v_bfe:
; GFX10PLUS: ; %bb.0: ; %entry
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
entry:
; GFX10-LABEL: vector_xnor_i32_one_use:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v0, v0, v1
; GFX10-NEXT: v_not_b32_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: vector_xnor_i64_one_use:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v0, v0, v2
; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3
; GFX10-NEXT: v_not_b32_e32 v0, v0
; GFX10-LABEL: vector_xor_na_b_i32_one_use:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor3_b32 v0, v0, -1, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
; GFX10-LABEL: vector_xor_a_nb_i32_one_use:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor3_b32 v0, v1, -1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
; GFX10-LABEL: zextload_global_i1_to_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-LABEL: zextload_global_i1_to_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX10-LABEL: zextload_global_i8_to_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: zextload_global_i8_to_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: zextload_global_i16_to_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_ushort v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: zextload_global_i16_to_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_u16 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: zextload_global_i1_to_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX11-LABEL: zextload_global_i1_to_i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 1, v0
; GFX10-LABEL: zextload_global_i8_to_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX11-LABEL: zextload_global_i8_to_i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: zextload_global_i16_to_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_ushort v0, v[0:1], off
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX11-LABEL: zextload_global_i16_to_i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_u16 v0, v[0:1], off
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: zextload_global_i32_to_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX11-LABEL: zextload_global_i32_to_i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: zextload_global_i32_to_i96:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX11-LABEL: zextload_global_i32_to_i96:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: zextload_global_i32_to_i128:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX11-LABEL: zextload_global_i32_to_i128:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
; GFX11-NEXT: v_mov_b32_e32 v3, 0
; GFX10-LABEL: test1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: test1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_endpgm
tail call void @llvm.amdgcn.endpgm()
unreachable
; GFX1100-LABEL: syncscope_system:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: flat_load_b32 v3, v[0:1]
; GFX1100-NEXT: s_mov_b32 s0, 0
; GFX1100-NEXT: .LBB0_1: ; %atomicrmw.start
; GFX1100-LABEL: no_unsafe:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: flat_load_b32 v3, v[0:1]
; GFX1100-NEXT: s_mov_b32 s0, 0
; GFX1100-NEXT: .LBB3_1: ; %atomicrmw.start
; GFX10-BACKOFF-LABEL: back_off_barrier_no_fence:
; GFX10-BACKOFF: ; %bb.0:
; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-BACKOFF-NEXT: flat_load_dword v0, v[0:1]
; GFX10-BACKOFF-NEXT: s_barrier
; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-BACKOFF-NEXT: flat_store_dword v[2:3], v0
; GFX10-BACKOFF-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-BACKOFF-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-BACKOFF-LABEL: back_off_barrier_no_fence:
; GFX11-BACKOFF: ; %bb.0:
; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-BACKOFF-NEXT: flat_load_b32 v0, v[0:1]
; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-BACKOFF-NEXT: s_barrier
; GFX11-BACKOFF-NEXT: flat_store_b32 v[2:3], v0
; GFX11-BACKOFF-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-BACKOFF-NEXT: s_setpc_b64 s[30:31]
%load = load i32, ptr %in
call void @llvm.amdgcn.s.barrier()
; GFX10-BACKOFF-LABEL: back_off_barrier_with_fence:
; GFX10-BACKOFF: ; %bb.0:
; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-BACKOFF-NEXT: flat_load_dword v0, v[0:1]
; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-BACKOFF-NEXT: buffer_gl0_inv
; GFX10-BACKOFF-NEXT: flat_store_dword v[2:3], v0
; GFX10-BACKOFF-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-BACKOFF-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-BACKOFF-LABEL: back_off_barrier_with_fence:
; GFX11-BACKOFF: ; %bb.0:
; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-BACKOFF-NEXT: flat_load_b32 v0, v[0:1]
; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-BACKOFF-NEXT: buffer_gl0_inv
; GFX11-BACKOFF-NEXT: flat_store_b32 v[2:3], v0
; GFX11-BACKOFF-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-BACKOFF-NEXT: s_setpc_b64 s[30:31]
%load = load i32, ptr %in
fence syncscope("workgroup") release
; GFX10-LABEL: test_load_store:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_ushort v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_short v[2:3], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = load bfloat, ptr addrspace(1) %in
store bfloat %val, ptr addrspace(1) %out
; GFX10-LABEL: test_load_store_f32_to_bf16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_short_d16_hi v[2:3], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = load float, ptr addrspace(1) %in
%val.bf16 = fptrunc float %val to bfloat
; GFX10-LABEL: test_load_store_f64_to_bf16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; GFX10-NEXT: global_store_short_d16_hi v[2:3], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = load double, ptr addrspace(1) %in
%val.bf16 = fptrunc double %val to bfloat
; GFX10-LABEL: test_load_store_bf16_to_f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v4, 0
; GFX10-NEXT: global_load_short_d16_hi v4, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v[2:3], v4, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = load bfloat, ptr addrspace(1) %in
%val.f32 = fpext bfloat %val to float
; GFX10-LABEL: test_load_store_bf16_to_f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v4, 0
; GFX10-NEXT: global_load_short_d16_hi v4, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_cvt_f64_f32_e32 v[0:1], v4
; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = load bfloat, ptr addrspace(1) %in
%val.f64 = fpext bfloat %val to double
; GFX10-LABEL: test_load_store_v2bf16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v[2:3], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = load <2 x bfloat>, ptr addrspace(1) %in
store <2 x bfloat> %val, ptr addrspace(1) %out
; GFX10-LABEL: test_load_store_v4bf16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = load <4 x bfloat>, ptr addrspace(1) %in
store <4 x bfloat> %val, ptr addrspace(1) %out
; GFX10-LABEL: test_load_store_v8bf16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = load <8 x bfloat>, ptr addrspace(1) %in
store <8 x bfloat> %val, ptr addrspace(1) %out
; GFX10-LABEL: test_load_store_v16bf16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
; GFX10-NEXT: global_load_dwordx4 v[8:11], v[0:1], off
; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dwordx4 v[2:3], v[8:11], off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = load <16 x bfloat>, ptr addrspace(1) %in
store <16 x bfloat> %val, ptr addrspace(1) %out
; GFX10-LABEL: test_arg_store:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_store_short_d16_hi v[1:2], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
store bfloat %in, ptr addrspace(1) %out
ret void
; GFX10-LABEL: test_arg_store_v2bf16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_store_dword v[1:2], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
store <2 x bfloat> %in, ptr addrspace(1) %out
ret void
; GFX10-LABEL: test_arg_store_v3bf16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_store_short v[2:3], v1, off offset:4
; GFX10-NEXT: global_store_dword v[2:3], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
store <3 x bfloat> %in, <3 x bfloat> addrspace(1) * %out
ret void
; GFX10-LABEL: test_arg_store_v4bf16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
store <4 x bfloat> %in, ptr addrspace(1) %out
ret void
; GFX10-LABEL: test_arg_store_v8bf16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_store_dwordx4 v[4:5], v[0:3], off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
store <8 x bfloat> %in, ptr addrspace(1) %out
ret void
; GFX10-LABEL: test_arg_store_v16bf16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_store_dwordx4 v[8:9], v[4:7], off offset:16
; GFX10-NEXT: global_store_dwordx4 v[8:9], v[0:3], off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
store <16 x bfloat> %in, ptr addrspace(1) %out
ret void
; GFX10-LABEL: test_inreg_arg_store:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v2, s4
; GFX10-NEXT: global_store_short_d16_hi v[0:1], v2, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
store bfloat %in, ptr addrspace(1) %out
ret void
; GFX10-LABEL: test_byval:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s32
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
store bfloat %val, ptr addrspace(5) %bv
%retval = load bfloat, ptr addrspace(5) %bv
; GFX10-LABEL: test_sret:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], 0 offen
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
store bfloat %val, ptr addrspace(5) %sret
ret void
; GFX10-LABEL: test_bitcast_from_bfloat:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_ushort v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_short v[2:3], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = load bfloat, ptr addrspace(1) %in
%val_int = bitcast bfloat %val to i16
; GFX10-LABEL: test_bitcast_to_bfloat:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_ushort v2, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_short v[0:1], v2, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = load i16, ptr addrspace(1) %in
%val_fp = bitcast i16 %val to bfloat
; GFX10-LABEL: test_ret:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
ret bfloat %in
; GFX10-LABEL: test_ret_v2bf16:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
ret <2 x bfloat> %in
; GFX10-LABEL: test_ret_v3bf16:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v2, 0xffff0000, v0
; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX10-NEXT: v_and_or_b32 v0, 0xffff, v0, v2
; GFX10-LABEL: test_ret_v4bf16:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
ret <4 x bfloat> %in
; GFX10-LABEL: test_ret_v8bf16:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
ret <8 x bfloat> %in
; GFX10-LABEL: test_ret_v16bf16:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
ret <16 x bfloat> %in
; GFX10-LABEL: test_call:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s6, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
; GFX10-LABEL: test_call_v2bf16:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s6, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
; GFX10-LABEL: test_call_v3bf16:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s6, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
; GFX10-LABEL: test_call_v4bf16:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s6, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
; GFX10-LABEL: test_call_v8bf16:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s6, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
; GFX10-LABEL: test_call_v16bf16:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s6, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
; GFX10-LABEL: test_alloca_load_store_ret:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s32
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-LABEL: test_overflow_stack:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_clause 0x2
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:8
; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen offset:116
; GFX10-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], 0 offen offset:128
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%ins.0 = insertvalue { <32 x i32>, bfloat } poison, <32 x i32> %b, 0
%ins.1 = insertvalue { <32 x i32>, bfloat } %ins.0 ,bfloat %a, 1
; GFX10-LABEL: v_bfi_def_i32:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_bfi_b32 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-GISEL-LABEL: v_bfi_def_i32:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v1, v2
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
; GFX10-LABEL: v_bfi_sha256_ch:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_bfi_b32 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-GISEL-LABEL: v_bfi_sha256_ch:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v1, v2
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
; GFX10-LABEL: v_bfi_sha256_ma:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v0, v0, v1
; GFX10-NEXT: v_bfi_b32 v0, v0, v2, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-GISEL-LABEL: v_bfi_sha256_ma:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v1
; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v2, v1
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_bitselect_v2i32_pat1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_bfi_b32 v0, v2, v0, v4
; GFX10-NEXT: v_bfi_b32 v1, v3, v1, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-GISEL-LABEL: v_bitselect_v2i32_pat1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v4
; GFX10-GISEL-NEXT: v_bfi_b32 v1, v3, v1, v5
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_bitselect_i64_pat_0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_bfi_b32 v0, v0, v2, v4
; GFX10-NEXT: v_bfi_b32 v1, v1, v3, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-GISEL-LABEL: v_bitselect_i64_pat_0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v2, v4
; GFX10-GISEL-NEXT: v_bfi_b32 v1, v1, v3, v5
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_bitselect_i64_pat_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_bfi_b32 v0, v2, v0, v4
; GFX10-NEXT: v_bfi_b32 v1, v3, v1, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-GISEL-LABEL: v_bitselect_i64_pat_1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v4
; GFX10-GISEL-NEXT: v_bfi_b32 v1, v3, v1, v5
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_bitselect_i64_pat_2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_bfi_b32 v0, v2, v0, v4
; GFX10-NEXT: v_bfi_b32 v1, v3, v1, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-GISEL-LABEL: v_bitselect_i64_pat_2:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v4
; GFX10-GISEL-NEXT: v_bfi_b32 v1, v3, v1, v5
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_bfi_sha256_ma_i64:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v0, v0, v2
; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3
; GFX10-NEXT: v_bfi_b32 v0, v0, v4, v2
; GFX10-GISEL-LABEL: v_bfi_sha256_ma_i64:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v4, v2
; GFX11-FLAT-LABEL: missing_truncate_promote_bitreverse:
; GFX11-FLAT: ; %bb.0: ; %bb
; GFX11-FLAT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLAT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FLAT-NEXT: v_bfrev_b32_e32 v0, v0
; GFX11-FLAT-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-FLAT-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11-GISEL-LABEL: missing_truncate_promote_bitreverse:
; GFX11-GISEL: ; %bb.0: ; %bb
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v0, v0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11-LABEL: missing_truncate_promote_bswap:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-LABEL: v_bswap_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001
; GFX11-NEXT: s_setpc_b64 s[30:31]
%bswap = call i16 @llvm.bswap.i16(i16 %src)
; GFX11-LABEL: v_bswap_i16_zext_to_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001
; GFX11-NEXT: s_setpc_b64 s[30:31]
%bswap = call i16 @llvm.bswap.i16(i16 %src)
; GFX11-LABEL: v_bswap_i16_sext_to_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX11-LABEL: v_bswap_v2i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_perm_b32 v0, 0, v0, 0x2030001
; GFX11-NEXT: s_setpc_b64 s[30:31]
%bswap = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %src)
; GFX11-LABEL: v_bswap_v3i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_perm_b32 v0, 0, v0, 0x2030001
; GFX11-NEXT: v_perm_b32 v1, 0, v1, 0x2030001
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_bswap_v4i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_perm_b32 v0, 0, v0, 0x2030001
; GFX11-NEXT: v_perm_b32 v1, 0, v1, 0x2030001
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_bswap_i48:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_perm_b32 v2, 0, v0, 0x10203
; GFX11-NEXT: v_perm_b32 v0, 0, v1, 0x10203
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; ISA-LABEL: f:
; ISA: ; %bb.0: ; %bb
; ISA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; ISA-NEXT: s_waitcnt_vscnt null, 0x0
; ISA-NEXT: s_mov_b64 s[4:5], 0
; ISA-NEXT: v_cmp_gt_i32_e32 vcc_lo, 1, v0
; ISA-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; ISA-NEXT: s_or_b32 exec_lo, exec_lo, s4
; ISA-NEXT: flat_store_dword v[1:2], v6
; ISA-NEXT: s_waitcnt lgkmcnt(0)
-; ISA-NEXT: s_waitcnt_vscnt null, 0x0
; ISA-NEXT: s_setpc_b64 s[30:31]
; MIR-LABEL: name: f
; MIR: bb.0.bb:
; GFX11-LABEL: tail_call_byval_align16:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: s_getpc_b64 s[0:1]
; GFX11-NEXT: s_add_u32 s0, s0, byval_align16_f64_arg@rel32@lo+4
; GFX11-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_load_b32 v33, off, s32
; GFX11-NEXT: scratch_load_b64 v[31:32], off, s32 offset:4
; GFX11-LABEL: stack_12xv3i32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-LABEL: stack_12xv3f32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-LABEL: stack_8xv5i32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-LABEL: stack_8xv5f32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,VI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX11 %s
; Make sure we don't crash or assert on spir_kernel calling convention.
}
define fastcc float @fastcc(float %arg0) #0 {
-; SIVI-LABEL: fastcc:
-; SIVI: ; %bb.0:
-; SIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SIVI-NEXT: v_add_f32_e32 v0, 4.0, v0
-; SIVI-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: fastcc:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: v_add_f32_e32 v0, 4.0, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: fastcc:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_add_f32_e32 v0, 4.0, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
%add = fadd float %arg0, 4.0
ret float %add
}
define coldcc float @coldcc(float %arg0) #0 {
-; SIVI-LABEL: coldcc:
-; SIVI: ; %bb.0:
-; SIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SIVI-NEXT: v_add_f32_e32 v0, 4.0, v0
-; SIVI-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: coldcc:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: v_add_f32_e32 v0, 4.0, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: coldcc:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_add_f32_e32 v0, 4.0, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
%add = fadd float %arg0, 4.0
ret float %add
}
; GFX10-LABEL: test_sinkable_flat_small_offset_i32:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4
; GFX10-NEXT: v_mov_b32_e32 v4, 0
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: flat_store_dword v[0:1], v4 offset:252
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%out.gep = getelementptr i32, ptr %out, i64 999999
; GFX10-LABEL: test_sink_noop_addrspacecast_flat_to_global_i32:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4
; GFX10-NEXT: v_mov_b32_e32 v4, 0
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: flat_store_dword v[0:1], v4 offset:252
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%out.gep = getelementptr i32, ptr %out, i64 999999
; GFX10-LABEL: test_sink_noop_addrspacecast_flat_to_constant_i32:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4
; GFX10-NEXT: v_mov_b32_e32 v4, 0
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: flat_store_dword v[0:1], v4 offset:252
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%out.gep = getelementptr i32, ptr %out, i64 999999
; GFX10-LABEL: test_sink_flat_small_max_flat_offset:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mbcnt_lo_u32_b32 v4, -1, 0
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4
; GFX10-NEXT: v_mov_b32_e32 v4, 0
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: flat_store_dword v[0:1], v4
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%out.gep = getelementptr i32, ptr %out, i32 1024
; GFX10-LABEL: test_sink_flat_small_max_plus_1_flat_offset:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mbcnt_lo_u32_b32 v4, -1, 0
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4
; GFX10-NEXT: v_mov_b32_e32 v4, 0
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: flat_store_dword v[0:1], v4 offset:636
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%out.gep = getelementptr i32, ptr %out, i64 99999
; GFX10-LABEL: test_sinkable_flat_reg_offset:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mbcnt_lo_u32_b32 v6, -1, 0
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6
; GFX10-NEXT: v_mov_b32_e32 v6, 0
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: flat_store_dword v[0:1], v6
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%out.gep = getelementptr i32, ptr %out, i32 1024
; GFX10_DEFAULT-LABEL: chain_hi_to_lo_private:
; GFX10_DEFAULT: ; %bb.0: ; %bb
; GFX10_DEFAULT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_DEFAULT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10_DEFAULT-NEXT: s_clause 0x1
; GFX10_DEFAULT-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:2
; GFX10_DEFAULT-NEXT: buffer_load_short_d16_hi v0, off, s[0:3], 0
; FLATSCR_GFX10-LABEL: chain_hi_to_lo_private:
; FLATSCR_GFX10: ; %bb.0: ; %bb
; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FLATSCR_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; FLATSCR_GFX10-NEXT: s_mov_b32 s0, 2
; FLATSCR_GFX10-NEXT: scratch_load_ushort v0, off, s0
; FLATSCR_GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX11-LABEL: chain_hi_to_lo_private:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, 2
; GFX11-NEXT: scratch_load_u16 v0, off, s0
; GFX11-NEXT: s_mov_b32 s0, 0
; GFX10_DEFAULT-LABEL: chain_hi_to_lo_private_different_bases:
; GFX10_DEFAULT: ; %bb.0: ; %bb
; GFX10_DEFAULT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_DEFAULT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10_DEFAULT-NEXT: s_clause 0x1
; GFX10_DEFAULT-NEXT: buffer_load_ushort v0, v0, s[0:3], 0 offen
; GFX10_DEFAULT-NEXT: buffer_load_short_d16_hi v0, v1, s[0:3], 0 offen
; FLATSCR_GFX10-LABEL: chain_hi_to_lo_private_different_bases:
; FLATSCR_GFX10: ; %bb.0: ; %bb
; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FLATSCR_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; FLATSCR_GFX10-NEXT: scratch_load_ushort v0, v0, off
; FLATSCR_GFX10-NEXT: scratch_load_short_d16_hi v0, v1, off
; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX11-LABEL: chain_hi_to_lo_private_different_bases:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_u16 v0, v0, off
; GFX11-NEXT: scratch_load_d16_hi_b16 v0, v1, off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10_DEFAULT-LABEL: chain_hi_to_lo_arithmatic:
; GFX10_DEFAULT: ; %bb.0: ; %bb
; GFX10_DEFAULT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_DEFAULT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10_DEFAULT-NEXT: v_add_f16_e32 v1, 1.0, v1
; GFX10_DEFAULT-NEXT: buffer_load_short_d16_hi v1, v0, s[0:3], 0 offen
; GFX10_DEFAULT-NEXT: s_waitcnt vmcnt(0)
; FLATSCR_GFX10-LABEL: chain_hi_to_lo_arithmatic:
; FLATSCR_GFX10: ; %bb.0: ; %bb
; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FLATSCR_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; FLATSCR_GFX10-NEXT: v_add_f16_e32 v1, 1.0, v1
; FLATSCR_GFX10-NEXT: scratch_load_short_d16_hi v1, v0, off
; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX11-LABEL: chain_hi_to_lo_arithmatic:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_add_f16_e32 v1, 1.0, v1
; GFX11-NEXT: scratch_load_d16_hi_b16 v1, v0, off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: chain_hi_to_lo_group:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: ds_read_u16 v0, v1 offset:2
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-LABEL: chain_hi_to_lo_group:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: ds_load_u16 v0, v1 offset:2
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-LABEL: chain_hi_to_lo_group_different_bases:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_read_u16 v0, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: ds_read_u16_d16_hi v0, v1
; GFX11-LABEL: chain_hi_to_lo_group_different_bases:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_u16 v0, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ds_load_u16_d16_hi v0, v1
; GFX10-LABEL: chain_hi_to_lo_global:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v0, 2
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: global_load_ushort v0, v[0:1], off
; GFX11-LABEL: chain_hi_to_lo_global:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v0, 2
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: global_load_u16 v0, v[0:1], off
; GFX10-LABEL: chain_hi_to_lo_global_different_bases:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_ushort v0, v[0:1], off
; GFX10-NEXT: global_load_short_d16_hi v0, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX11-LABEL: chain_hi_to_lo_global_different_bases:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_u16 v0, v[0:1], off
; GFX11-NEXT: global_load_d16_hi_b16 v0, v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: chain_hi_to_lo_flat:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v0, 2
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: flat_load_ushort v0, v[0:1]
; GFX11-LABEL: chain_hi_to_lo_flat:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v0, 2
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: flat_load_u16 v0, v[0:1]
; GFX10-LABEL: chain_hi_to_lo_flat_different_bases:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: flat_load_ushort v0, v[0:1]
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: flat_load_short_d16_hi v0, v[2:3]
; GFX11-LABEL: chain_hi_to_lo_flat_different_bases:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: flat_load_u16 v0, v[0:1]
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-NEXT: flat_load_d16_hi_b16 v0, v[2:3]
; GFX10-LABEL: chain_hi_to_lo_group_other_dep:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_read_u16_d16_hi v1, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
; GFX11-LABEL: chain_hi_to_lo_group_other_dep:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_u16_d16_hi v1, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
; GFX10-LABEL: chain_hi_to_lo_group_other_dep_multi_chain:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_read_u16 v1, v0 offset:2
; GFX10-NEXT: ds_read_u16_d16_hi v0, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-LABEL: chain_hi_to_lo_group_other_dep_multi_chain:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_u16 v1, v0 offset:2
; GFX11-NEXT: ds_load_u16_d16_hi v0, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX10_DEFAULT-LABEL: chain_hi_to_lo_private_other_dep:
; GFX10_DEFAULT: ; %bb.0: ; %bb
; GFX10_DEFAULT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_DEFAULT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10_DEFAULT-NEXT: buffer_load_short_d16_hi v1, v0, s[0:3], 0 offen
; GFX10_DEFAULT-NEXT: s_waitcnt vmcnt(0)
; GFX10_DEFAULT-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
; FLATSCR_GFX10-LABEL: chain_hi_to_lo_private_other_dep:
; FLATSCR_GFX10: ; %bb.0: ; %bb
; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FLATSCR_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; FLATSCR_GFX10-NEXT: scratch_load_short_d16_hi v1, v0, off
; FLATSCR_GFX10-NEXT: v_add_nc_u32_e32 v2, 2, v0
; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX11-LABEL: chain_hi_to_lo_private_other_dep:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_d16_hi_b16 v1, v0, off
; GFX11-NEXT: v_add_nc_u32_e32 v2, 2, v0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: chain_hi_to_lo_global_other_dep:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_ushort v2, v[0:1], off offset:2 glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_load_short_d16_hi v0, v[0:1], off glc dlc
; GFX11-LABEL: chain_hi_to_lo_global_other_dep:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_u16 v2, v[0:1], off offset:2 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_load_d16_hi_b16 v0, v[0:1], off glc dlc
; GFX10-LABEL: chain_hi_to_lo_flat_other_dep:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v0, 2
; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v1, vcc_lo
; GFX10-NEXT: flat_load_ushort v2, v[2:3] glc dlc
; GFX11-LABEL: chain_hi_to_lo_flat_other_dep:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: flat_load_u16 v2, v[0:1] offset:2 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: flat_load_d16_hi_b16 v0, v[0:1] glc dlc
; GFX10-LABEL: chain_hi_to_lo_group_may_alias_store:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v2, 0x7b
; GFX10-NEXT: ds_read_u16 v3, v0
; GFX10-NEXT: ds_write_b16 v1, v2
; GFX11-LABEL: chain_hi_to_lo_group_may_alias_store:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v2, 0x7b
; GFX11-NEXT: ds_load_u16 v3, v0
; GFX11-NEXT: ds_store_b16 v1, v2
; GFX11-LABEL: v_clamp_cvt_pkrtz_src_v2f16_denorm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_pk_rtz_f16_f32_e64 v0, v0, v1 clamp
; GFX11-NEXT: s_setpc_b64 s[30:31]
%add = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %a, float %b)
; GFX1010-LABEL: combine_add_zext_xor:
; GFX1010: ; %bb.0: ; %.entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010-NEXT: v_mov_b32_e32 v1, 0
; GFX1010-NEXT: s_branch .LBB0_2
; GFX1010-NEXT: .LBB0_1: ; %bb9
; GFX1100-LABEL: combine_add_zext_xor:
; GFX1100: ; %bb.0: ; %.entry
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_mov_b32_e32 v1, 0
; GFX1100-NEXT: s_branch .LBB0_2
; GFX1100-NEXT: .LBB0_1: ; %bb9
; GFX1010-LABEL: combine_sub_zext_xor:
; GFX1010: ; %bb.0: ; %.entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010-NEXT: v_mov_b32_e32 v1, 0
; GFX1010-NEXT: s_branch .LBB1_2
; GFX1010-NEXT: .LBB1_1: ; %bb9
; GFX1100-LABEL: combine_sub_zext_xor:
; GFX1100: ; %bb.0: ; %.entry
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_mov_b32_e32 v1, 0
; GFX1100-NEXT: s_branch .LBB1_2
; GFX1100-NEXT: .LBB1_1: ; %bb9
; GFX1010-LABEL: combine_add_zext_or:
; GFX1010: ; %bb.0: ; %.entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010-NEXT: s_mov_b32 s4, 0
; GFX1010-NEXT: s_branch .LBB2_2
; GFX1010-NEXT: .LBB2_1: ; %bb9
; GFX1100-LABEL: combine_add_zext_or:
; GFX1100: ; %bb.0: ; %.entry
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: s_mov_b32 s0, 0
; GFX1100-NEXT: s_branch .LBB2_2
; GFX1100-NEXT: .LBB2_1: ; %bb9
; GFX1010-LABEL: combine_sub_zext_or:
; GFX1010: ; %bb.0: ; %.entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010-NEXT: s_mov_b32 s4, 0
; GFX1010-NEXT: s_branch .LBB3_2
; GFX1010-NEXT: .LBB3_1: ; %bb9
; GFX1100-LABEL: combine_sub_zext_or:
; GFX1100: ; %bb.0: ; %.entry
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: s_mov_b32 s0, 0
; GFX1100-NEXT: s_branch .LBB3_2
; GFX1100-NEXT: .LBB3_1: ; %bb9
; GFX1010-LABEL: combine_add_zext_and:
; GFX1010: ; %bb.0: ; %.entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010-NEXT: v_mov_b32_e32 v1, 0
; GFX1010-NEXT: s_branch .LBB4_2
; GFX1010-NEXT: .LBB4_1: ; %bb9
; GFX1100-LABEL: combine_add_zext_and:
; GFX1100: ; %bb.0: ; %.entry
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_mov_b32_e32 v1, 0
; GFX1100-NEXT: s_branch .LBB4_2
; GFX1100-NEXT: .LBB4_1: ; %bb9
; GFX1010-LABEL: combine_sub_zext_and:
; GFX1010: ; %bb.0: ; %.entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010-NEXT: v_mov_b32_e32 v1, 0
; GFX1010-NEXT: s_branch .LBB5_2
; GFX1010-NEXT: .LBB5_1: ; %bb9
; GFX1100-LABEL: combine_sub_zext_and:
; GFX1100: ; %bb.0: ; %.entry
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_mov_b32_e32 v1, 0
; GFX1100-NEXT: s_branch .LBB5_2
; GFX1100-NEXT: .LBB5_1: ; %bb9
; GCN-LABEL: test:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_xor_saveexec_b32 s4, -1
; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: s_waitcnt_depctr 0xffe3
; GCN-NEXT: s_mov_b32 exec_lo, s4
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_setpc_b64 s[30:31]
entry:
; %x = subgroup operation over all lanes.
; GFX10-LABEL: v_uitofp_i32_to_f32_mask255:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_uitofp_i32_to_f32_mask255:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%masked = and i32 %arg0, 255
; GFX10-LABEL: v_sitofp_i32_to_f32_mask255:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_sitofp_i32_to_f32_mask255:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%masked = and i32 %arg0, 255
; GFX10-LABEL: v_uitofp_to_f32_lshr7_mask255:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 7, v0
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_uitofp_to_f32_lshr7_mask255:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 7, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX10-LABEL: v_uitofp_to_f32_lshr8_mask255:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_ubyte1_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_uitofp_to_f32_lshr8_mask255:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_ubyte1_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%lshr.8 = lshr i32 %arg0, 8
; GFX10-LABEL: v_uitofp_to_f32_multi_use_lshr8_mask255:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0
; GFX10-NEXT: v_cvt_f32_ubyte1_e32 v0, v0
; GFX10-NEXT: global_store_dword v[0:1], v1, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_uitofp_to_f32_multi_use_lshr8_mask255:
; GFX11-LABEL: v_uitofp_to_f32_multi_use_lshr8_mask255:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v0
; GFX11-NEXT: v_cvt_f32_ubyte1_e32 v0, v0
; GFX11-NEXT: global_store_b32 v[0:1], v1, off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%lshr.8 = lshr i32 %arg0, 8
store i32 %lshr.8, ptr addrspace(1) undef
; GFX10-LABEL: v_uitofp_to_f32_lshr16_mask255:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_ubyte2_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_uitofp_to_f32_lshr16_mask255:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_ubyte2_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%lshr.16 = lshr i32 %arg0, 16
; GFX10-LABEL: v_uitofp_to_f32_lshr24_mask255:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_ubyte3_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_uitofp_to_f32_lshr24_mask255:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_ubyte3_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%lshr.16 = lshr i32 %arg0, 24
; GFX10-LABEL: v_uitofp_i8_to_f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_uitofp_i8_to_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cvt = uitofp i8 %arg0 to float
; GFX10-LABEL: v_uitofp_v2i8_to_v2f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v2, v0
; GFX10-NEXT: v_cvt_f32_ubyte1_e32 v1, v0
; GFX10-NEXT: v_mov_b32_e32 v0, v2
; GFX11-LABEL: v_uitofp_v2i8_to_v2f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v2, v0
; GFX11-NEXT: v_cvt_f32_ubyte1_e32 v1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX10-LABEL: v_uitofp_v3i8_to_v3f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, v0
; GFX10-NEXT: v_cvt_f32_ubyte1_e32 v1, v0
; GFX10-NEXT: v_cvt_f32_ubyte2_e32 v2, v0
; GFX11-LABEL: v_uitofp_v3i8_to_v3f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v3, v0
; GFX11-NEXT: v_cvt_f32_ubyte1_e32 v1, v0
; GFX11-NEXT: v_cvt_f32_ubyte2_e32 v2, v0
; GFX10-LABEL: v_uitofp_v4i8_to_v4f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v4, v0
; GFX10-NEXT: v_cvt_f32_ubyte1_e32 v1, v0
; GFX10-NEXT: v_cvt_f32_ubyte2_e32 v2, v0
; GFX11-LABEL: v_uitofp_v4i8_to_v4f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v4, v0
; GFX11-NEXT: v_cvt_f32_ubyte1_e32 v1, v0
; GFX11-NEXT: v_cvt_f32_ubyte2_e32 v2, v0
; GFX10-LABEL: v_uitofp_unpack_i32_to_v4f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v4, v0
; GFX10-NEXT: v_cvt_f32_ubyte1_e32 v1, v0
; GFX10-NEXT: v_cvt_f32_ubyte2_e32 v2, v0
; GFX11-LABEL: v_uitofp_unpack_i32_to_v4f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v4, v0
; GFX11-NEXT: v_cvt_f32_ubyte1_e32 v1, v0
; GFX11-NEXT: v_cvt_f32_ubyte2_e32 v2, v0
; GFX10-LABEL: v_uitofp_i32_to_f16_mask255:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_uitofp_i32_to_f16_mask255:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX10-LABEL: v_sitofp_i32_to_f16_mask255:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_sitofp_i32_to_f16_mask255:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX10-LABEL: v_uitofp_to_f16_lshr8_mask255:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_ubyte1_e32 v0, v0
; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_uitofp_to_f16_lshr8_mask255:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_ubyte1_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX10-LABEL: v_uitofp_to_f16_lshr16_mask255:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_ubyte2_e32 v0, v0
; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_uitofp_to_f16_lshr16_mask255:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_ubyte2_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX10-LABEL: v_uitofp_to_f16_lshr24_mask255:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_ubyte3_e32 v0, v0
; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_uitofp_to_f16_lshr24_mask255:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_ubyte3_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX10-LABEL: v_uitofp_i8_to_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f16_u16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_uitofp_i8_to_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f16_u16_e32 v0, v0
; GFX10-LABEL: v_uitofp_i32_to_f64_mask255:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX10-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_uitofp_i32_to_f64_mask255:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
; GFX10-LABEL: v_uitofp_to_f64_lshr8_mask255:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_bfe_u32 v0, v0, 8, 8
; GFX10-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_uitofp_to_f64_lshr8_mask255:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_bfe_u32 v0, v0, 8, 8
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
; GFX10-LABEL: v_uitofp_to_f64_lshr16_mask255:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_bfe_u32 v0, v0, 16, 8
; GFX10-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_uitofp_to_f64_lshr16_mask255:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_bfe_u32 v0, v0, 16, 8
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
; GFX10-LABEL: v_uitofp_to_f64_lshr24_mask255:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0
; GFX10-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_uitofp_to_f64_lshr24_mask255:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
; GFX10-LABEL: v_uitofp_i8_to_f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v1, 0xffff
; GFX10-NEXT: v_and_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX10-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
; GFX11-LABEL: v_uitofp_i8_to_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GCN-LABEL: fmac_sequence_simple:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: v_fma_f32 v2, v2, v3, v4
; GCN-NEXT: v_fmac_f32_e32 v2, v0, v1
; GCN-NEXT: v_mov_b32_e32 v0, v2
; GCN-LABEL: fmac_sequence_innermost_fmul:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: v_mad_f32 v2, v2, v3, v6
; GCN-NEXT: v_fmac_f32_e32 v2, v0, v1
; GCN-NEXT: v_fmac_f32_e32 v2, v4, v5
; GCN-LABEL: fmac_sequence_innermost_fmul_swapped_operands:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: v_mad_f32 v2, v2, v3, v6
; GCN-NEXT: v_fmac_f32_e32 v2, v0, v1
; GCN-NEXT: v_fmac_f32_e32 v2, v4, v5
; GFX11-LABEL: divergent_vec_0_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%tmp = insertelement <2 x i16> undef, i16 0, i32 0
; GFX11-LABEL: divergent_vec_i16_0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%tmp = insertelement <2 x i16> undef, i16 %a, i32 0
; GFX11-LABEL: divergent_vec_f16_0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%tmp = insertelement <2 x half> undef, half %a, i32 0
; GFX11-LABEL: divergent_vec_i16_LL:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-NEXT: s_setpc_b64 s[30:31]
%tmp = insertelement <2 x i16> undef, i16 %a, i32 0
; GFX11-LABEL: divergent_vec_i16_LH:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_bfi_b32 v0, 0xffff, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%shift = lshr i32 %b, 16
; GFX11-LABEL: divergent_vec_i16_HH:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x7060302
; GFX11-NEXT: s_setpc_b64 s[30:31]
%shift_a = lshr i32 %a, 16
; GFX11-LABEL: divergent_vec_f16_LL:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-NEXT: s_setpc_b64 s[30:31]
%tmp = insertelement <2 x half> undef, half %a, i32 0
; GFX11-LABEL: build_vec_v2i16_undeflo_divergent:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_u16_d16 v0, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: s_add_co_select_user:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b64 s[4:5], 0
; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-LABEL: s_add_co_select_user:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b64 s[0:1], 0
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-LABEL: vec_8xi16_extract_4xi16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_cbranch_scc0 .LBB0_2
; GFX11-NEXT: ; %bb.1: ; %F
; GFX11-NEXT: global_load_b128 v[2:5], v[2:3], off glc dlc
; GFX11-LABEL: vec_8xi16_extract_4xi16_2:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_cbranch_scc0 .LBB1_2
; GFX11-NEXT: ; %bb.1: ; %F
; GFX11-NEXT: global_load_b128 v[2:5], v[2:3], off glc dlc
; GFX11-LABEL: vec_8xf16_extract_4xf16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_cbranch_scc0 .LBB2_2
; GFX11-NEXT: ; %bb.1: ; %F
; GFX11-NEXT: global_load_b128 v[2:5], v[2:3], off glc dlc
; GFX11-LABEL: vec_16xi16_extract_4xi16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_cbranch_scc0 .LBB3_2
; GFX11-NEXT: ; %bb.1: ; %F
; GFX11-NEXT: global_load_b128 v[4:7], v[2:3], off offset:16 glc dlc
; GFX11-LABEL: vec_16xi16_extract_4xi16_2:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_cbranch_scc0 .LBB4_2
; GFX11-NEXT: ; %bb.1: ; %F
; GFX11-NEXT: global_load_b128 v[4:7], v[2:3], off offset:16 glc dlc
; GFX11-LABEL: vec_16xf16_extract_4xf16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_cbranch_scc0 .LBB5_2
; GFX11-NEXT: ; %bb.1: ; %F
; GFX11-NEXT: global_load_b128 v[4:7], v[2:3], off offset:16 glc dlc
; GFX11-LABEL: large_vector:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshl_add_u32 v2, v1, 5, v0
; GFX11-NEXT: ds_load_2addr_b32 v[0:1], v2 offset1:1
; GFX11-NEXT: ds_load_2addr_b32 v[2:3], v2 offset0:2 offset1:3
; GFX10-LABEL: global_load_2xi16_align2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: global_load_2xi16_align2:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: global_load_2xi16_align1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: global_load_2xi16_align1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: global_load_2xi16_align4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: global_load_2xi16_align4:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: private_load_2xi16_align2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-FLASTSCR-LABEL: private_load_2xi16_align2:
; GFX10-FLASTSCR: ; %bb.0:
; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-FLASTSCR-NEXT: scratch_load_dword v0, v0, off
; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0)
; GFX10-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: private_load_2xi16_align2:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v0, v0, off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-FLASTSCR-LABEL: private_load_2xi16_align2:
; GFX11-FLASTSCR: ; %bb.0:
; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FLASTSCR-NEXT: scratch_load_b32 v0, v0, off
; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0)
; GFX11-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: private_store_2xi16_align2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001
; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-FLASTSCR-LABEL: private_store_2xi16_align2:
; GFX10-FLASTSCR: ; %bb.0:
; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-FLASTSCR-NEXT: v_mov_b32_e32 v0, 0x20001
; GFX10-FLASTSCR-NEXT: scratch_store_dword v1, v0, off
-; GFX10-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: private_store_2xi16_align2:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v0, 0x20001
; GFX11-NEXT: scratch_store_b32 v1, v0, off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FLASTSCR-LABEL: private_store_2xi16_align2:
; GFX11-FLASTSCR: ; %bb.0:
; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FLASTSCR-NEXT: v_mov_b32_e32 v0, 0x20001
; GFX11-FLASTSCR-NEXT: scratch_store_b32 v1, v0, off
-; GFX11-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
%gep.r = getelementptr i16, ptr addrspace(5) %r, i64 1
store i16 1, ptr addrspace(5) %r, align 2
; GFX10-LABEL: private_load_2xi16_align1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-FLASTSCR-LABEL: private_load_2xi16_align1:
; GFX10-FLASTSCR: ; %bb.0:
; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-FLASTSCR-NEXT: scratch_load_dword v0, v0, off
; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0)
; GFX10-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: private_load_2xi16_align1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v0, v0, off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-FLASTSCR-LABEL: private_load_2xi16_align1:
; GFX11-FLASTSCR: ; %bb.0:
; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FLASTSCR-NEXT: scratch_load_b32 v0, v0, off
; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0)
; GFX11-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: private_store_2xi16_align1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001
; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-FLASTSCR-LABEL: private_store_2xi16_align1:
; GFX10-FLASTSCR: ; %bb.0:
; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-FLASTSCR-NEXT: v_mov_b32_e32 v0, 0x20001
; GFX10-FLASTSCR-NEXT: scratch_store_dword v1, v0, off
-; GFX10-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: private_store_2xi16_align1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v0, 0x20001
; GFX11-NEXT: scratch_store_b32 v1, v0, off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FLASTSCR-LABEL: private_store_2xi16_align1:
; GFX11-FLASTSCR: ; %bb.0:
; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FLASTSCR-NEXT: v_mov_b32_e32 v0, 0x20001
; GFX11-FLASTSCR-NEXT: scratch_store_b32 v1, v0, off
-; GFX11-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
%gep.r = getelementptr i16, ptr addrspace(5) %r, i64 1
store i16 1, ptr addrspace(5) %r, align 1
; GFX10-LABEL: private_load_2xi16_align4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-FLASTSCR-LABEL: private_load_2xi16_align4:
; GFX10-FLASTSCR: ; %bb.0:
; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-FLASTSCR-NEXT: scratch_load_dword v0, v0, off
; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0)
; GFX10-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: private_load_2xi16_align4:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v0, v0, off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-FLASTSCR-LABEL: private_load_2xi16_align4:
; GFX11-FLASTSCR: ; %bb.0:
; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FLASTSCR-NEXT: scratch_load_b32 v0, v0, off
; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0)
; GFX11-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: private_store_2xi16_align4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001
; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-FLASTSCR-LABEL: private_store_2xi16_align4:
; GFX10-FLASTSCR: ; %bb.0:
; GFX10-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-FLASTSCR-NEXT: v_mov_b32_e32 v0, 0x20001
; GFX10-FLASTSCR-NEXT: scratch_store_dword v1, v0, off
-; GFX10-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: private_store_2xi16_align4:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v0, 0x20001
; GFX11-NEXT: scratch_store_b32 v1, v0, off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FLASTSCR-LABEL: private_store_2xi16_align4:
; GFX11-FLASTSCR: ; %bb.0:
; GFX11-FLASTSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FLASTSCR-NEXT: v_mov_b32_e32 v0, 0x20001
; GFX11-FLASTSCR-NEXT: scratch_store_b32 v1, v0, off
-; GFX11-FLASTSCR-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-FLASTSCR-NEXT: s_setpc_b64 s[30:31]
%gep.r = getelementptr i16, ptr addrspace(5) %r, i64 1
store i16 1, ptr addrspace(5) %r, align 4
; GFX11-LABEL: v_test_canonicalize_build_vector_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
; GFX11-LABEL: v_test_canonicalize_var_v3f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_test_canonicalize_var_v4f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_test_canonicalize_reg_undef_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pack_b32_f16 v0, v0, 0
; GFX11-LABEL: v_test_canonicalize_undef_reg_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX11-LABEL: v_test_canonicalize_undef_lo_imm_hi_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_bfrev_b32_e32 v0, 60
; GFX11-NEXT: s_setpc_b64 s[30:31]
%vec = insertelement <2 x half> undef, half 1.0, i32 1
; GFX11-LABEL: v_test_canonicalize_imm_lo_undef_hi_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v0, 0x3c00
; GFX11-NEXT: s_setpc_b64 s[30:31]
%vec = insertelement <2 x half> undef, half 1.0, i32 0
; GFX11-LABEL: v_test_canonicalize_undef_lo_k_hi_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_bfrev_b32_e32 v0, 50
; GFX11-NEXT: s_setpc_b64 s[30:31]
%vec = insertelement <2 x half> undef, half 16.0, i32 1
; GFX11-LABEL: v_test_canonicalize_k_lo_undef_hi_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v0, 0x4c00
; GFX11-NEXT: s_setpc_b64 s[30:31]
%vec = insertelement <2 x half> undef, half 16.0, i32 0
; GFX11-LABEL: v_test_canonicalize_reg_k_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pack_b32_f16 v0, v0, 2.0
; GFX11-LABEL: v_test_canonicalize_k_reg_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pack_b32_f16 v0, 2.0, v0
; GFX11-LABEL: v_test_canonicalize_reg_undef_undef_undef_v4f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
; GFX11-NEXT: v_mov_b32_e32 v1, 0x7e007e00
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-LABEL: v_test_canonicalize_reg_reg_undef_undef_v4f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-NEXT: v_mov_b32_e32 v1, 0x7e007e00
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-LABEL: v_test_canonicalize_reg_undef_reg_reg_v4f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-LABEL: v_test_canonicalize_var_v6f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
; GFX11-LABEL: v_test_canonicalize_var_v8f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
; GFX11-LABEL: v_test_canonicalize_var_v12f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
; GFX11-LABEL: v_test_canonicalize_var_v16f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
; GFX11-LABEL: v_test_canonicalize_var_v32f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
; GFX11-NEXT: v_pk_max_f16 v2, v2, v2
; GFX11-LABEL: v_test_canonicalize_var_v64f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: v_pk_max_f16 v0, v0, v0
; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
; GFX11-LABEL: v_test_canonicalize_v2f32_flush:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%canon = call <2 x float> @llvm.canonicalize.v2f32(<2 x float> %arg)
; GFX11-LABEL: v_test_canonicalize_v3f32_flush:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
; GFX11-NEXT: v_max_f32_e32 v2, v2, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_test_canonicalize_v4f32_flush:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
; GFX11-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_test_canonicalize_v8f32_flush:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
; GFX11-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
; GFX11-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5
; GFX11-LABEL: v_test_canonicalize_v2f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_test_canonicalize_v3f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
; GFX11-LABEL: v_test_canonicalize_v4f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
; GFX11-LABEL: v_copysign_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call half @llvm.copysign.f16(half %mag, half %sign)
; GFX11-LABEL: v_test_copysign_f16_0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call half @llvm.copysign.f16(half %mag, half 0.0)
; GFX11-LABEL: v_test_copysign_f16_1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call half @llvm.copysign.f16(half %mag, half 1.0)
; GFX11-LABEL: v_test_copysign_f16_10:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call half @llvm.copysign.f16(half %mag, half 10.0)
; GFX11-LABEL: v_test_copysign_f16_neg1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_or_b32_e32 v0, 0x8000, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call half @llvm.copysign.f16(half %mag, half -1.0)
; GFX11-LABEL: v_test_copysign_f16_neg10:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_or_b32_e32 v0, 0x8000, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call half @llvm.copysign.f16(half %mag, half -10.0)
; GFX11-LABEL: v_test_copysign_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_bfi_b32 v0, 0x7fffffff, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.copysign.f32(float %mag, float %sign)
; GFX11-LABEL: v_test_copysign_f32_0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.copysign.f32(float %mag, float 0.0)
; GFX11-LABEL: v_test_copysign_f32_1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.copysign.f32(float %mag, float 1.0)
; GFX11-LABEL: v_test_copysign_f32_10:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.copysign.f32(float %mag, float 10.0)
; GFX11-LABEL: v_test_copysign_f32_neg1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_or_b32_e32 v0, 0x80000000, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.copysign.f32(float %mag, float -1.0)
; GFX11-LABEL: v_test_copysign_f32_neg10:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_or_b32_e32 v0, 0x80000000, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.copysign.f32(float %mag, float -10.0)
; GFX11-LABEL: v_test_copysign_v2f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_bfi_b32 v0, 0x7fffffff, v0, v2
; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_test_copysign_v2f32_0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GFX11-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_test_copysign_v2f32_neg1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_or_b32_e32 v0, 0x80000000, v0
; GFX11-NEXT: v_or_b32_e32 v1, 0x80000000, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_test_copysign_v3f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_bfi_b32 v0, 0x7fffffff, v0, v3
; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v4
; GFX11-NEXT: v_bfi_b32 v2, 0x7fffffff, v2, v5
; GFX11-LABEL: v_test_copysign_v4f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_bfi_b32 v0, 0x7fffffff, v0, v4
; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v5
; GFX11-NEXT: v_bfi_b32 v2, 0x7fffffff, v2, v6
; GFX11-LABEL: v_test_copysign_v5f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_bfi_b32 v0, 0x7fffffff, v0, v5
; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v6
; GFX11-NEXT: v_bfi_b32 v2, 0x7fffffff, v2, v7
; GFX11-LABEL: v_test_copysign_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v0, v10
; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v11, v21
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_test_copysign_f64_0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v0, v10 :: v_dual_and_b32 v1, 0x7fffffff, v11
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call double @llvm.copysign.f64(double %mag, double 0.0)
; GFX11-LABEL: v_test_copysign_f64_1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v0, v10 :: v_dual_and_b32 v1, 0x7fffffff, v11
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call double @llvm.copysign.f64(double %mag, double 1.0)
; GFX11-LABEL: v_test_copysign_f64_10:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v0, v10 :: v_dual_and_b32 v1, 0x7fffffff, v11
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call double @llvm.copysign.f64(double %mag, double 10.0)
; GFX11-LABEL: v_test_copysign_f64_neg1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v0, v10
; GFX11-NEXT: v_or_b32_e32 v1, 0x80000000, v11
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_test_copysign_f64_neg10:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v0, v10
; GFX11-NEXT: v_or_b32_e32 v1, 0x80000000, v11
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_test_copysign_f64_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v0, v10
; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v11, v20
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_test_copysign_f64_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v0, v10 :: v_dual_lshlrev_b32 v1, 16, v20
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v11, v1
; GFX11-LABEL: v_test_copysign_v2f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v0, v2
; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v3, v7
; GFX11-NEXT: v_bfi_b32 v3, 0x7fffffff, v5, v9
; GFX11-LABEL: v_test_copysign_v3f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v0, v2
; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v3, v9
; GFX11-NEXT: v_bfi_b32 v3, 0x7fffffff, v5, v11
; GFX11-LABEL: v_test_copysign_v4f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v0, v2
; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v3, v11
; GFX11-NEXT: v_bfi_b32 v3, 0x7fffffff, v5, v13
; GFX10-LABEL: zero_init_foo:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s0, 0
; GFX10-NEXT: s_mov_b32 s1, s0
; GFX10-NEXT: s_mov_b32 s2, s0
; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:32
; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:16
; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s32
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: zero_init_foo:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, 0
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_mov_b32 s1, s0
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 offset:32
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 offset:16
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-PAL-LABEL: zero_init_foo:
; GFX10-PAL-LABEL: zero_init_foo:
; GFX10-PAL: ; %bb.0:
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: s_mov_b32 s0, 0
; GFX10-PAL-NEXT: s_mov_b32 s1, s0
; GFX10-PAL-NEXT: s_mov_b32 s2, s0
; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:32
; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:16
; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32
-; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-PAL-LABEL: zero_init_foo:
; GFX11-PAL: ; %bb.0:
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: s_mov_b32 s0, 0
; GFX11-PAL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-PAL-NEXT: s_mov_b32 s1, s0
; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:32
; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:16
; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s32
-; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca [32 x i16], align 2, addrspace(5)
call void @llvm.memset.p5.i64(ptr addrspace(5) align 2 dereferenceable(64) %alloca, i8 0, i64 64, i1 false)
; GFX10-LABEL: store_load_vindex_foo:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v1, 15, v0
; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, s32
; GFX10-NEXT: v_mov_b32_e32 v2, 15
; GFX11-LABEL: store_load_vindex_foo:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0
; GFX11-NEXT: v_lshl_add_u32 v0, v0, 2, s32
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX10-PAL-LABEL: store_load_vindex_foo:
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: v_and_b32_e32 v1, 15, v0
; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s32
; GFX10-PAL-NEXT: v_mov_b32_e32 v2, 15
; GFX11-PAL-LABEL: store_load_vindex_foo:
; GFX11-PAL: ; %bb.0: ; %bb
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0
; GFX11-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s32
; GFX11-PAL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX10-LABEL: private_ptr_foo:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v0, 4, v0
; GFX10-NEXT: v_mov_b32_e32 v1, 0x41200000
; GFX10-NEXT: scratch_store_dword v0, v1, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: private_ptr_foo:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v1, 0x41200000 :: v_dual_add_nc_u32 v0, 4, v0
; GFX11-NEXT: scratch_store_b32 v0, v1, off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-PAL-LABEL: private_ptr_foo:
; GFX10-PAL-LABEL: private_ptr_foo:
; GFX10-PAL: ; %bb.0:
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: v_add_nc_u32_e32 v0, 4, v0
; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 0x41200000
; GFX10-PAL-NEXT: scratch_store_dword v0, v1, off
-; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-PAL-LABEL: private_ptr_foo:
; GFX11-PAL: ; %bb.0:
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 0x41200000 :: v_dual_add_nc_u32 v0, 4, v0
; GFX11-PAL-NEXT: scratch_store_b32 v0, v1, off
-; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr inbounds float, ptr addrspace(5) %arg, i32 1
store float 1.000000e+01, ptr addrspace(5) %gep, align 4
; GFX10-LABEL: zero_init_small_offset_foo:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_load_dword v0, off, s32 glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_mov_b32 s0, 0
; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:272
; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:288
; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:304
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: zero_init_small_offset_foo:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v0, off, s32 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_mov_b32 s0, 0
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 offset:272
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 offset:288
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 offset:304
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-PAL-LABEL: zero_init_small_offset_foo:
; GFX10-PAL-LABEL: zero_init_small_offset_foo:
; GFX10-PAL: ; %bb.0:
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: scratch_load_dword v0, off, s32 glc dlc
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX10-PAL-NEXT: s_mov_b32 s0, 0
; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:272
; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:288
; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:304
-; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-PAL-LABEL: zero_init_small_offset_foo:
; GFX11-PAL: ; %bb.0:
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s32 glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_mov_b32 s0, 0
; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:272
; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:288
; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:304
-; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
%padding = alloca [64 x i32], align 4, addrspace(5)
%alloca = alloca [32 x i16], align 2, addrspace(5)
; GFX10-LABEL: store_load_vindex_small_offset_foo:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v1, 15, v0
; GFX10-NEXT: s_add_i32 s0, s32, 0x100
; GFX10-NEXT: v_mov_b32_e32 v2, 15
; GFX11-LABEL: store_load_vindex_small_offset_foo:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0
; GFX11-NEXT: s_add_i32 s0, s32, 0x100
; GFX11-NEXT: scratch_load_b32 v3, off, s32 glc dlc
; GFX10-PAL-LABEL: store_load_vindex_small_offset_foo:
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: v_and_b32_e32 v1, 15, v0
; GFX10-PAL-NEXT: s_add_i32 s0, s32, 0x100
; GFX10-PAL-NEXT: v_mov_b32_e32 v2, 15
; GFX11-PAL-LABEL: store_load_vindex_small_offset_foo:
; GFX11-PAL: ; %bb.0: ; %bb
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0
; GFX11-PAL-NEXT: s_add_i32 s0, s32, 0x100
; GFX11-PAL-NEXT: scratch_load_b32 v3, off, s32 glc dlc
; GFX10-LABEL: zero_init_large_offset_foo:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_load_dword v0, off, s32 offset:4 glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_mov_b32 s0, 0
; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32
; GFX10-NEXT: s_add_i32 s0, s32, 0x4004
; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: zero_init_large_offset_foo:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v0, off, s32 offset:4 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_mov_b32 s0, 0
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 offset:32
; GFX11-NEXT: s_add_i32 s0, s32, 0x4004
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 offset:48
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-PAL-LABEL: zero_init_large_offset_foo:
; GFX1010-PAL-LABEL: zero_init_large_offset_foo:
; GFX1010-PAL: ; %bb.0:
; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s32 offset:4 glc dlc
; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX1010-PAL-NEXT: s_mov_b32 s0, 0
; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3
; GFX1010-PAL-NEXT: s_add_i32 s0, s32, 0x4004
; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48
-; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010-PAL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-PAL-LABEL: zero_init_large_offset_foo:
; GFX1030-PAL: ; %bb.0:
; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s32 offset:4 glc dlc
; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX1030-PAL-NEXT: s_mov_b32 s0, 0
; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32
; GFX1030-PAL-NEXT: s_add_i32 s0, s32, 0x4004
; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48
-; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1030-PAL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-PAL-LABEL: zero_init_large_offset_foo:
; GFX11-PAL: ; %bb.0:
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s32 offset:4 glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_mov_b32 s0, 0
; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s0 offset:32
; GFX11-PAL-NEXT: s_add_i32 s0, s32, 0x4004
; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s0 offset:48
-; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
%padding = alloca [4096 x i32], align 4, addrspace(5)
%alloca = alloca [32 x i16], align 2, addrspace(5)
; GFX10-LABEL: store_load_vindex_large_offset_foo:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v1, 15, v0
; GFX10-NEXT: s_add_i32 s0, s32, 0x4004
; GFX10-NEXT: v_mov_b32_e32 v2, 15
; GFX11-LABEL: store_load_vindex_large_offset_foo:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0
; GFX11-NEXT: s_add_i32 s0, s32, 0x4004
; GFX11-NEXT: scratch_load_b32 v3, off, s32 offset:4 glc dlc
; GFX10-PAL-LABEL: store_load_vindex_large_offset_foo:
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: v_and_b32_e32 v1, 15, v0
; GFX10-PAL-NEXT: s_add_i32 s0, s32, 0x4004
; GFX10-PAL-NEXT: v_mov_b32_e32 v2, 15
; GFX11-PAL-LABEL: store_load_vindex_large_offset_foo:
; GFX11-PAL: ; %bb.0: ; %bb
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0
; GFX11-PAL-NEXT: s_add_i32 s0, s32, 0x4004
; GFX11-PAL-NEXT: scratch_load_b32 v3, off, s32 offset:4 glc dlc
; GFX10-LABEL: store_load_large_imm_offset_foo:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v0, 13
; GFX10-NEXT: v_mov_b32_e32 v1, 15
; GFX10-NEXT: s_movk_i32 s0, 0x3800
; GFX11-LABEL: store_load_large_imm_offset_foo:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
; GFX11-NEXT: s_movk_i32 s0, 0x3000
; GFX11-NEXT: s_add_i32 s1, s32, 4
; GFX10-PAL-LABEL: store_load_large_imm_offset_foo:
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 13
; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15
; GFX10-PAL-NEXT: s_movk_i32 s0, 0x3800
; GFX11-PAL-LABEL: store_load_large_imm_offset_foo:
; GFX11-PAL: ; %bb.0: ; %bb
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
; GFX11-PAL-NEXT: s_movk_i32 s0, 0x3000
; GFX11-PAL-NEXT: s_add_i32 s1, s32, 4
; GFX10-LABEL: store_load_i64_aligned:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v1, 15
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX10-NEXT: scratch_store_dwordx2 v0, v[1:2], off
; GFX11-LABEL: store_load_i64_aligned:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_mov_b32 v2, 0
; GFX11-NEXT: scratch_store_b64 v0, v[1:2], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-LABEL: store_load_i64_aligned:
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15
; GFX10-PAL-NEXT: v_mov_b32_e32 v2, 0
; GFX10-PAL-NEXT: scratch_store_dwordx2 v0, v[1:2], off
; GFX11-PAL-LABEL: store_load_i64_aligned:
; GFX11-PAL: ; %bb.0: ; %bb
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_mov_b32 v2, 0
; GFX11-PAL-NEXT: scratch_store_b64 v0, v[1:2], off dlc
; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-LABEL: store_load_i64_unaligned:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v1, 15
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX10-NEXT: scratch_store_dwordx2 v0, v[1:2], off
; GFX11-LABEL: store_load_i64_unaligned:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_mov_b32 v2, 0
; GFX11-NEXT: scratch_store_b64 v0, v[1:2], off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-LABEL: store_load_i64_unaligned:
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15
; GFX10-PAL-NEXT: v_mov_b32_e32 v2, 0
; GFX10-PAL-NEXT: scratch_store_dwordx2 v0, v[1:2], off
; GFX11-PAL-LABEL: store_load_i64_unaligned:
; GFX11-PAL: ; %bb.0: ; %bb
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_mov_b32 v2, 0
; GFX11-PAL-NEXT: scratch_store_b64 v0, v[1:2], off dlc
; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-LABEL: store_load_v3i32_unaligned:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v1, 1
; GFX10-NEXT: v_mov_b32_e32 v2, 2
; GFX10-NEXT: v_mov_b32_e32 v3, 3
; GFX11-LABEL: store_load_v3i32_unaligned:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2
; GFX11-NEXT: v_mov_b32_e32 v3, 3
; GFX11-NEXT: scratch_store_b96 v0, v[1:3], off dlc
; GFX10-PAL-LABEL: store_load_v3i32_unaligned:
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 1
; GFX10-PAL-NEXT: v_mov_b32_e32 v2, 2
; GFX10-PAL-NEXT: v_mov_b32_e32 v3, 3
; GFX11-PAL-LABEL: store_load_v3i32_unaligned:
; GFX11-PAL: ; %bb.0: ; %bb
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2
; GFX11-PAL-NEXT: v_mov_b32_e32 v3, 3
; GFX11-PAL-NEXT: scratch_store_b96 v0, v[1:3], off dlc
; GFX10-LABEL: store_load_v4i32_unaligned:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v1, 1
; GFX10-NEXT: v_mov_b32_e32 v2, 2
; GFX10-NEXT: v_mov_b32_e32 v3, 3
; GFX11-LABEL: store_load_v4i32_unaligned:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2
; GFX11-NEXT: v_dual_mov_b32 v3, 3 :: v_dual_mov_b32 v4, 4
; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off dlc
; GFX10-PAL-LABEL: store_load_v4i32_unaligned:
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 1
; GFX10-PAL-NEXT: v_mov_b32_e32 v2, 2
; GFX10-PAL-NEXT: v_mov_b32_e32 v3, 3
; GFX11-PAL-LABEL: store_load_v4i32_unaligned:
; GFX11-PAL: ; %bb.0: ; %bb
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2
; GFX11-PAL-NEXT: v_dual_mov_b32 v3, 3 :: v_dual_mov_b32 v4, 4
; GFX11-PAL-NEXT: scratch_store_b128 v0, v[1:4], off dlc
; GFX10-LABEL: store_load_i32_negative_unaligned:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v0, -1, v0
; GFX10-NEXT: v_mov_b32_e32 v1, 1
; GFX10-NEXT: scratch_store_byte v0, v1, off
; GFX11-LABEL: store_load_i32_negative_unaligned:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_add_nc_u32 v0, -1, v0
; GFX11-NEXT: scratch_store_b8 v0, v1, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-LABEL: store_load_i32_negative_unaligned:
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: v_add_nc_u32_e32 v0, -1, v0
; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 1
; GFX10-PAL-NEXT: scratch_store_byte v0, v1, off
; GFX11-PAL-LABEL: store_load_i32_negative_unaligned:
; GFX11-PAL: ; %bb.0: ; %bb
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_add_nc_u32 v0, -1, v0
; GFX11-PAL-NEXT: scratch_store_b8 v0, v1, off dlc
; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-LABEL: store_load_i32_large_negative_unaligned:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v0, 0xffffef7f, v0
; GFX10-NEXT: v_mov_b32_e32 v1, 1
; GFX10-NEXT: scratch_store_byte v0, v1, off
; GFX11-LABEL: store_load_i32_large_negative_unaligned:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_add_nc_u32 v0, 0xffffef7f, v0
; GFX11-NEXT: scratch_store_b8 v0, v1, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-LABEL: store_load_i32_large_negative_unaligned:
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: v_add_nc_u32_e32 v0, 0xffffef7f, v0
; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 1
; GFX10-PAL-NEXT: scratch_store_byte v0, v1, off
; GFX11-PAL-LABEL: store_load_i32_large_negative_unaligned:
; GFX11-PAL: ; %bb.0: ; %bb
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_add_nc_u32 v0, 0xffffef7f, v0
; GFX11-PAL-NEXT: scratch_store_b8 v0, v1, off dlc
; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-LABEL: test_fma:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call half @llvm.fma.f16(half %x, half %y, half %z)
; GFX10-LABEL: test_fmac:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fmac_f16_e32 v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call half @llvm.fma.f16(half %y, half %z, half %x)
; GFX10-LABEL: test_fmaak:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fmaak_f16 v0, v0, v1, 0x4200
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call half @llvm.fma.f16(half %x, half %y, half 0xH4200)
; GFX10-LABEL: test_fmamk:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fmamk_f16 v0, v0, 0x4200, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call half @llvm.fma.f16(half %x, half 0xH4200, half %z)
; GFX10-SDAG-LABEL: test_D139469_f16:
; GFX10-SDAG: ; %bb.0: ; %bb
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_mov_b32_e32 v1, 0x211e
; GFX10-SDAG-NEXT: v_mul_f16_e32 v2, 0x291e, v0
; GFX10-SDAG-NEXT: v_fmac_f16_e32 v1, 0x291e, v0
; GFX10-GISEL-LABEL: test_D139469_f16:
; GFX10-GISEL: ; %bb.0: ; %bb
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x291e
; GFX10-GISEL-NEXT: v_mul_f16_e32 v1, 0x291e, v0
; GFX10-GISEL-NEXT: v_fmaak_f16 v0, s4, v0, 0x211e
; GFX10-SDAG-LABEL: test_D139469_v2f16:
; GFX10-SDAG: ; %bb.0: ; %bb
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: s_movk_i32 s4, 0x211e
; GFX10-SDAG-NEXT: v_pk_mul_f16 v1, 0x291e, v0 op_sel_hi:[0,1]
; GFX10-SDAG-NEXT: v_pk_fma_f16 v0, 0x291e, v0, s4 op_sel_hi:[0,1,0]
; GFX10-GISEL-LABEL: test_D139469_v2f16:
; GFX10-GISEL: ; %bb.0: ; %bb
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_mov_b32 s4, 0x291e291e
; GFX10-GISEL-NEXT: v_pk_mul_f16 v1, v0, 0x291e op_sel_hi:[1,0]
; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v0, s4, 0x211e op_sel_hi:[1,1,0]
; FMAGFX10-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
; FMAGFX10: ; %bb.0:
; FMAGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FMAGFX10-NEXT: s_waitcnt_vscnt null, 0x0
; FMAGFX10-NEXT: v_fmac_f32_e32 v0, v1, v0
; FMAGFX10-NEXT: s_setpc_b64 s[30:31]
;
; FMAGFX11-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
; FMAGFX11: ; %bb.0:
; FMAGFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FMAGFX11-NEXT: s_waitcnt_vscnt null, 0x0
; FMAGFX11-NEXT: v_fmac_f32_e32 v0, v1, v0
; FMAGFX11-NEXT: s_setpc_b64 s[30:31]
;
; FMADGFX10-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
; FMADGFX10: ; %bb.0:
; FMADGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FMADGFX10-NEXT: s_waitcnt_vscnt null, 0x0
; FMADGFX10-NEXT: v_fmac_f32_e32 v0, v1, v0
; FMADGFX10-NEXT: s_setpc_b64 s[30:31]
%add = fadd fast float %arg1, 1.0
; FMAGFX10-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
; FMAGFX10: ; %bb.0:
; FMAGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FMAGFX10-NEXT: s_waitcnt_vscnt null, 0x0
; FMAGFX10-NEXT: v_fma_f32 v0, -v1, v0, v0
; FMAGFX10-NEXT: s_setpc_b64 s[30:31]
;
; FMAGFX11-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
; FMAGFX11: ; %bb.0:
; FMAGFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FMAGFX11-NEXT: s_waitcnt_vscnt null, 0x0
; FMAGFX11-NEXT: v_fma_f32 v0, -v1, v0, v0
; FMAGFX11-NEXT: s_setpc_b64 s[30:31]
;
; FMADGFX10-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
; FMADGFX10: ; %bb.0:
; FMADGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FMADGFX10-NEXT: s_waitcnt_vscnt null, 0x0
; FMADGFX10-NEXT: v_fma_f32 v0, -v1, v0, v0
; FMADGFX10-NEXT: s_setpc_b64 s[30:31]
%add = fsub fast float 1.0, %arg1
; FMAGFX10-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
; FMAGFX10: ; %bb.0:
; FMAGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FMAGFX10-NEXT: s_waitcnt_vscnt null, 0x0
; FMAGFX10-NEXT: v_fmac_f32_e32 v0, v2, v0
; FMAGFX10-NEXT: v_fmac_f32_e32 v1, v3, v1
; FMAGFX10-NEXT: s_setpc_b64 s[30:31]
; FMAGFX11-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
; FMAGFX11: ; %bb.0:
; FMAGFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FMAGFX11-NEXT: s_waitcnt_vscnt null, 0x0
; FMAGFX11-NEXT: v_dual_fmac_f32 v0, v2, v0 :: v_dual_fmac_f32 v1, v3, v1
; FMAGFX11-NEXT: s_setpc_b64 s[30:31]
;
; FMADGFX10-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
; FMADGFX10: ; %bb.0:
; FMADGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FMADGFX10-NEXT: s_waitcnt_vscnt null, 0x0
; FMADGFX10-NEXT: v_fmac_f32_e32 v0, v2, v0
; FMADGFX10-NEXT: v_fmac_f32_e32 v1, v3, v1
; FMADGFX10-NEXT: s_setpc_b64 s[30:31]
; FMAGFX10-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
; FMAGFX10: ; %bb.0:
; FMAGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FMAGFX10-NEXT: s_waitcnt_vscnt null, 0x0
; FMAGFX10-NEXT: v_fma_f32 v0, -v2, v0, v0
; FMAGFX10-NEXT: v_fma_f32 v1, -v3, v1, v1
; FMAGFX10-NEXT: s_setpc_b64 s[30:31]
; FMAGFX11-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
; FMAGFX11: ; %bb.0:
; FMAGFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FMAGFX11-NEXT: s_waitcnt_vscnt null, 0x0
; FMAGFX11-NEXT: v_fma_f32 v0, -v2, v0, v0
; FMAGFX11-NEXT: v_fma_f32 v1, -v3, v1, v1
; FMAGFX11-NEXT: s_setpc_b64 s[30:31]
; FMADGFX10-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
; FMADGFX10: ; %bb.0:
; FMADGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FMADGFX10-NEXT: s_waitcnt_vscnt null, 0x0
; FMADGFX10-NEXT: v_fma_f32 v0, -v2, v0, v0
; FMADGFX10-NEXT: v_fma_f32 v1, -v3, v1, v1
; FMADGFX10-NEXT: s_setpc_b64 s[30:31]
; FMAGFX10-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
; FMAGFX10: ; %bb.0:
; FMAGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FMAGFX10-NEXT: s_waitcnt_vscnt null, 0x0
; FMAGFX10-NEXT: v_fma_f32 v0, v0, v1, v1
; FMAGFX10-NEXT: s_setpc_b64 s[30:31]
;
; FMAGFX11-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
; FMAGFX11: ; %bb.0:
; FMAGFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FMAGFX11-NEXT: s_waitcnt_vscnt null, 0x0
; FMAGFX11-NEXT: v_fma_f32 v0, v0, v1, v1
; FMAGFX11-NEXT: s_setpc_b64 s[30:31]
;
; FMADGFX10-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
; FMADGFX10: ; %bb.0:
; FMADGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FMADGFX10-NEXT: s_waitcnt_vscnt null, 0x0
; FMADGFX10-NEXT: v_mad_f32 v0, v0, v1, v1
; FMADGFX10-NEXT: s_setpc_b64 s[30:31]
%add = fadd fast float %arg0, 1.0
; FMAGFX10-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
; FMAGFX10: ; %bb.0:
; FMAGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FMAGFX10-NEXT: s_waitcnt_vscnt null, 0x0
; FMAGFX10-NEXT: v_fma_f32 v0, -v0, v1, v1
; FMAGFX10-NEXT: s_setpc_b64 s[30:31]
;
; FMAGFX11-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
; FMAGFX11: ; %bb.0:
; FMAGFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FMAGFX11-NEXT: s_waitcnt_vscnt null, 0x0
; FMAGFX11-NEXT: v_fma_f32 v0, -v0, v1, v1
; FMAGFX11-NEXT: s_setpc_b64 s[30:31]
;
; FMADGFX10-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
; FMADGFX10: ; %bb.0:
; FMADGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FMADGFX10-NEXT: s_waitcnt_vscnt null, 0x0
; FMADGFX10-NEXT: v_mad_f32 v0, -v0, v1, v1
; FMADGFX10-NEXT: s_setpc_b64 s[30:31]
%sub = fsub fast float 1.0, %arg0
; GFX11-LABEL: no_fmax3_v2f16:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_pk_max_f16 v0, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_pk_max_f16 v0, v2, v0
; GFX11-SAFE-LABEL: test_fmax_legacy_ugt_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_cmp_nle_f16_e32 vcc_lo, v0, v1
; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
; GFX11-NNAN-LABEL: test_fmax_legacy_ugt_f16:
; GFX11-NNAN: ; %bb.0:
; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NNAN-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NNAN-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31]
%cmp = fcmp ugt half %a, %b
; GFX11-SAFE-LABEL: test_fmax_legacy_ugt_v2f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX11-NNAN-LABEL: test_fmax_legacy_ugt_v2f16:
; GFX11-NNAN: ; %bb.0:
; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NNAN-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NNAN-NEXT: v_pk_max_f16 v0, v0, v1
; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31]
%cmp = fcmp ugt <2 x half> %a, %b
; GFX11-SAFE-LABEL: test_fmax_legacy_ugt_v3f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2
; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0
; GFX11-SAFE-NEXT: v_cmp_nle_f16_e32 vcc_lo, v0, v2
; GFX11-NNAN-LABEL: test_fmax_legacy_ugt_v3f16:
; GFX11-NNAN: ; %bb.0:
; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NNAN-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NNAN-NEXT: v_pk_max_f16 v0, v0, v2
; GFX11-NNAN-NEXT: v_pk_max_f16 v1, v1, v3
; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-LABEL: test_fmax_legacy_ugt_v4f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v1
; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX11-NNAN-LABEL: test_fmax_legacy_ugt_v4f16:
; GFX11-NNAN: ; %bb.0:
; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NNAN-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NNAN-NEXT: v_pk_max_f16 v0, v0, v2
; GFX11-NNAN-NEXT: v_pk_max_f16 v1, v1, v3
; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-LABEL: test_fmax_legacy_ugt_v8f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v10, 16, v7
; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v11, 16, v3
; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v12, 16, v6
; GFX11-NNAN-LABEL: test_fmax_legacy_ugt_v8f16:
; GFX11-NNAN: ; %bb.0:
; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NNAN-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NNAN-NEXT: v_pk_max_f16 v0, v0, v4
; GFX11-NNAN-NEXT: v_pk_max_f16 v1, v1, v5
; GFX11-NNAN-NEXT: v_pk_max_f16 v2, v2, v6
; GFX11-LABEL: no_fmin3_v2f16:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_pk_min_f16 v0, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_pk_min_f16 v0, v2, v0
; GFX11-SAFE-LABEL: test_fmin_legacy_ule_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1
; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31]
; GFX11-NNAN-LABEL: test_fmin_legacy_ule_f16:
; GFX11-NNAN: ; %bb.0:
; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NNAN-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NNAN-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31]
%cmp = fcmp ule half %a, %b
; GFX11-SAFE-LABEL: test_fmin_legacy_ule_v2f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX11-NNAN-LABEL: test_fmin_legacy_ule_v2f16:
; GFX11-NNAN: ; %bb.0:
; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NNAN-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NNAN-NEXT: v_pk_min_f16 v0, v0, v1
; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31]
%cmp = fcmp ule <2 x half> %a, %b
; GFX11-SAFE-LABEL: test_fmin_legacy_ule_v3f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2
; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0
; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2
; GFX11-NNAN-LABEL: test_fmin_legacy_ule_v3f16:
; GFX11-NNAN: ; %bb.0:
; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NNAN-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NNAN-NEXT: v_pk_min_f16 v0, v0, v2
; GFX11-NNAN-NEXT: v_pk_min_f16 v1, v1, v3
; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-LABEL: test_fmin_legacy_ule_v4f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v1
; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX11-NNAN-LABEL: test_fmin_legacy_ule_v4f16:
; GFX11-NNAN: ; %bb.0:
; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NNAN-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NNAN-NEXT: v_pk_min_f16 v0, v0, v2
; GFX11-NNAN-NEXT: v_pk_min_f16 v1, v1, v3
; GFX11-NNAN-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-LABEL: test_fmin_legacy_ule_v8f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v10, 16, v7
; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v11, 16, v3
; GFX11-SAFE-NEXT: v_lshrrev_b32_e32 v12, 16, v6
; GFX11-NNAN-LABEL: test_fmin_legacy_ule_v8f16:
; GFX11-NNAN: ; %bb.0:
; GFX11-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NNAN-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NNAN-NEXT: v_pk_min_f16 v0, v0, v4
; GFX11-NNAN-NEXT: v_pk_min_f16 v1, v1, v5
; GFX11-NNAN-NEXT: v_pk_min_f16 v2, v2, v6
; GFX11-SAFE-LABEL: v_fneg_add_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_add_f16_e32 v0, v0, v1
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GFX11-NSZ-LABEL: v_fneg_add_f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_sub_f16_e64 v0, -v0, v1
; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
%add = fadd half %a, %b
; GFX11-LABEL: v_fneg_add_store_use_add_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_add_f16_e32 v1, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v1
; GFX11-SAFE-LABEL: v_fneg_add_multi_use_add_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_add_f16_e32 v1, v0, v1
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v1
; GFX11-NSZ-LABEL: v_fneg_add_multi_use_add_f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_sub_f16_e64 v0, -v0, v1
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-NEXT: v_mul_f16_e32 v1, -4.0, v0
; GFX11-SAFE-LABEL: v_fneg_add_fneg_x_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_sub_f16_e32 v0, v1, v0
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GFX11-NSZ-LABEL: v_fneg_add_fneg_x_f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_sub_f16_e32 v0, v0, v1
; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
; GFX11-SAFE-LABEL: v_fneg_add_x_fneg_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_sub_f16_e32 v0, v0, v1
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GFX11-NSZ-LABEL: v_fneg_add_x_fneg_f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_sub_f16_e32 v0, v1, v0
; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
%fneg.b = fneg half %b
; GFX11-SAFE-LABEL: v_fneg_add_fneg_fneg_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_sub_f16_e64 v0, -v0, v1
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GFX11-NSZ-LABEL: v_fneg_add_fneg_fneg_f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_add_f16_e32 v0, v0, v1
; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
; GFX11-SAFE-LABEL: v_fneg_add_store_use_fneg_x_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_sub_f16_e32 v1, v1, v0
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-SAFE-NEXT: v_xor_b32_e32 v2, 0x8000, v1
; GFX11-NSZ-LABEL: v_fneg_add_store_use_fneg_x_f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_sub_f16_e32 v2, v0, v1
; GFX11-NSZ-NEXT: v_xor_b32_e32 v1, 0x8000, v0
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-SAFE-LABEL: v_fneg_add_multi_use_fneg_x_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_sub_f16_e32 v1, v1, v0
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-SAFE-NEXT: v_xor_b32_e32 v3, 0x8000, v1
; GFX11-NSZ-LABEL: v_fneg_add_multi_use_fneg_x_f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_sub_f16_e32 v3, v0, v1
; GFX11-NSZ-NEXT: v_mul_f16_e64 v1, -v0, v2
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-LABEL: v_fneg_mul_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e64 v0, v0, -v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%mul = fmul half %a, %b
; GFX11-LABEL: v_fneg_mul_store_use_mul_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e32 v1, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v1
; GFX11-LABEL: v_fneg_mul_multi_use_mul_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e64 v0, v0, -v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mul_f16_e32 v1, -4.0, v0
; GFX11-LABEL: v_fneg_mul_fneg_x_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
; GFX11-LABEL: v_fneg_mul_x_fneg_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fneg.b = fneg half %b
; GFX11-LABEL: v_fneg_mul_fneg_fneg_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e64 v0, v0, -v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
; GFX11-LABEL: v_fneg_mul_store_use_fneg_x_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e32 v2, v0, v1
; GFX11-NEXT: v_xor_b32_e32 v1, 0x8000, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-LABEL: v_fneg_mul_multi_use_fneg_x_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e32 v3, v0, v1
; GFX11-NEXT: v_mul_f16_e64 v1, -v0, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-LABEL: v_fneg_minnum_f16_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e64 v1, -v1, -v1
; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-LABEL: v_fneg_minnum_f16_no_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half %a, half %b)
; GFX11-LABEL: v_fneg_self_minnum_f16_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half %a, half %a)
; GFX11-LABEL: v_fneg_self_minnum_f16_no_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half %a, half %a)
; GFX11-LABEL: v_fneg_posk_minnum_f16_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_max_f16_e32 v0, -4.0, v0
; GFX11-LABEL: v_fneg_posk_minnum_f16_no_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e64 v0, -v0, -4.0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half 4.0, half %a)
; GFX11-LABEL: v_fneg_negk_minnum_f16_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_max_f16_e32 v0, 4.0, v0
; GFX11-LABEL: v_fneg_negk_minnum_f16_no_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e64 v0, -v0, 4.0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half -4.0, half %a)
; GFX11-LABEL: v_fneg_0_minnum_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_min_f16_e32 v0, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GFX11-LABEL: v_fneg_neg0_minnum_f16_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_max_f16_e32 v0, 0, v0
; GFX11-LABEL: v_fneg_inv2pi_minnum_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_min_f16_e32 v0, 0.15915494, v0
; GFX11-LABEL: v_fneg_neg_inv2pi_minnum_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_min_f16_e32 v0, 0.15915494, v0
; GFX11-LABEL: v_fneg_neg0_minnum_f16_no_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e64 v0, -v0, 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half -0.0, half %a)
; GFX11-LABEL: v_fneg_0_minnum_foldable_use_f16_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_min_f16_e32 v0, 0, v0
; GFX11-LABEL: v_fneg_inv2pi_minnum_foldable_use_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_min_f16_e32 v0, 0.15915494, v0
; GFX11-LABEL: v_fneg_0_minnum_foldable_use_f16_no_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_min_f16_e32 v0, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mul_f16_e64 v0, -v0, v1
; GFX11-LABEL: v_fneg_minnum_multi_use_minnum_f16_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e64 v1, -v1, -v1
; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-LABEL: v_fneg_minnum_multi_use_minnum_f16_no_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_f16_e32 v1, 4.0, v0
; GFX11-LABEL: v_fneg_maxnum_f16_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e64 v1, -v1, -v1
; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-LABEL: v_fneg_maxnum_f16_no_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_min_f16_e64 v0, -v0, -v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%max = call half @llvm.maxnum.f16(half %a, half %b)
; GFX11-LABEL: v_fneg_self_maxnum_f16_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%max = call half @llvm.maxnum.f16(half %a, half %a)
; GFX11-LABEL: v_fneg_self_maxnum_f16_no_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%max = call half @llvm.maxnum.f16(half %a, half %a)
; GFX11-LABEL: v_fneg_posk_maxnum_f16_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_min_f16_e32 v0, -4.0, v0
; GFX11-LABEL: v_fneg_posk_maxnum_f16_no_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_min_f16_e64 v0, -v0, -4.0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%max = call half @llvm.maxnum.f16(half 4.0, half %a)
; GFX11-LABEL: v_fneg_negk_maxnum_f16_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_min_f16_e32 v0, 4.0, v0
; GFX11-LABEL: v_fneg_negk_maxnum_f16_no_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_min_f16_e64 v0, -v0, 4.0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%max = call half @llvm.maxnum.f16(half -4.0, half %a)
; GFX11-LABEL: v_fneg_0_maxnum_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e32 v0, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GFX11-LABEL: v_fneg_neg0_maxnum_f16_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_min_f16_e32 v0, 0, v0
; GFX11-LABEL: v_fneg_neg0_maxnum_f16_no_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_min_f16_e64 v0, -v0, 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%max = call half @llvm.maxnum.f16(half -0.0, half %a)
; GFX11-LABEL: v_fneg_0_maxnum_foldable_use_f16_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_max_f16_e32 v0, 0, v0
; GFX11-LABEL: v_fneg_0_maxnum_foldable_use_f16_no_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e32 v0, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mul_f16_e64 v0, -v0, v1
; GFX11-LABEL: v_fneg_maxnum_multi_use_maxnum_f16_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e64 v1, -v1, -v1
; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-LABEL: v_fneg_maxnum_multi_use_maxnum_f16_no_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_f16_e32 v1, 4.0, v0
; GFX11-SAFE-LABEL: v_fneg_fma_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_fmac_f16_e32 v2, v0, v1
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v2
; GFX11-NSZ-LABEL: v_fneg_fma_f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, -v1, -v2
; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
%fma = call half @llvm.fma.f16(half %a, half %b, half %c)
; GFX11-LABEL: v_fneg_fma_store_use_fma_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_f16 v1, v0, v1, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v1
; GFX11-SAFE-LABEL: v_fneg_fma_multi_use_fma_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_fmac_f16_e32 v2, v0, v1
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v2
; GFX11-NSZ-LABEL: v_fneg_fma_multi_use_fma_f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, -v1, -v2
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-NEXT: v_mul_f16_e32 v1, -4.0, v0
; GFX11-SAFE-LABEL: v_fneg_fma_fneg_x_y_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_fma_f16 v0, -v0, v1, v2
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GFX11-NSZ-LABEL: v_fneg_fma_fneg_x_y_f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, v1, -v2
; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
; GFX11-SAFE-LABEL: v_fneg_fma_x_fneg_y_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_fma_f16 v0, v0, -v1, v2
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GFX11-NSZ-LABEL: v_fneg_fma_x_fneg_y_f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, v1, -v2
; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
%fneg.b = fneg half %b
; GFX11-SAFE-LABEL: v_fneg_fma_fneg_fneg_y_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_fmac_f16_e32 v2, v0, v1
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v2
; GFX11-NSZ-LABEL: v_fneg_fma_fneg_fneg_y_f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, -v1, -v2
; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
; GFX11-SAFE-LABEL: v_fneg_fma_fneg_x_fneg_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_fma_f16 v0, -v0, v1, -v2
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GFX11-NSZ-LABEL: v_fneg_fma_fneg_x_fneg_f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, v1, v2
; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
; GFX11-SAFE-LABEL: v_fneg_fma_x_y_fneg_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_fma_f16 v0, v0, v1, -v2
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GFX11-NSZ-LABEL: v_fneg_fma_x_y_fneg_f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, -v1, v2
; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
%fneg.c = fneg half %c
; GFX11-SAFE-LABEL: v_fneg_fma_store_use_fneg_x_y_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_fma_f16 v1, -v0, v1, v2
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-SAFE-NEXT: v_xor_b32_e32 v2, 0x8000, v1
; GFX11-NSZ-LABEL: v_fneg_fma_store_use_fneg_x_y_f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_fma_f16 v2, v0, v1, -v2
; GFX11-NSZ-NEXT: v_xor_b32_e32 v1, 0x8000, v0
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-SAFE-LABEL: v_fneg_fma_multi_use_fneg_x_y_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_fma_f16 v1, -v0, v1, v2
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-SAFE-NEXT: v_xor_b32_e32 v2, 0x8000, v1
; GFX11-NSZ-LABEL: v_fneg_fma_multi_use_fneg_x_y_f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_fma_f16 v2, v0, v1, -v2
; GFX11-NSZ-NEXT: v_mul_f16_e64 v1, -v0, v3
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-SAFE-LABEL: v_fneg_fmad_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_fmac_f16_e32 v2, v0, v1
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v2
; GFX11-NSZ-LABEL: v_fneg_fmad_f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, -v1, -v2
; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
%fma = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
; GFX11-SAFE-LABEL: v_fneg_fmad_v4f32:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4
; GFX11-SAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NSZ-LABEL: v_fneg_fmad_v4f32:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,1,1] neg_hi:[0,1,1]
; GFX11-NSZ-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,1,1] neg_hi:[0,1,1]
; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-LABEL: v_fneg_fmad_multi_use_fmad_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_fmac_f16_e32 v2, v0, v1
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-NEXT: v_xor_b32_e32 v0, 0x8000, v2
; GFX11-NSZ-LABEL: v_fneg_fmad_multi_use_fmad_f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_fma_f16 v0, v0, -v1, -v2
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-NEXT: v_mul_f16_e32 v1, -4.0, v0
; GFX11-LABEL: v_fneg_fp_extend_f16_to_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-LABEL: v_fneg_fp_extend_fneg_f16_to_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
; GFX11-LABEL: v_fneg_fp_extend_store_use_fneg_f16_to_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v2, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v2
; GFX11-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f64_f32_e32 v[2:3], v0
; GFX11-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
; GFX11-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_xor_b32_e32 v0, 0x80000000, v1
; GFX11-LABEL: v_fneg_fp_round_f64_to_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_and_or_b32 v0, 0x1ff, v1, v0
; GFX11-LABEL: v_fneg_fp_round_fneg_f64_to_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_or_b32 v0, 0x1ff, v1, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v1
; GFX11-NEXT: v_bfe_u32 v3, v1, 20, 11
; GFX11-LABEL: v_fneg_fp_round_store_use_fneg_f64_to_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_or_b32 v2, 0x1ff, v1, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1
; GFX11-NEXT: v_bfe_u32 v4, v1, 20, 11
; GFX11-LABEL: v_fneg_fp_round_multi_use_fneg_f64_to_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_or_b32 v4, 0x1ff, v1, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1
; GFX11-NEXT: v_bfe_u32 v6, v1, 20, 11
; GFX11-LABEL: v_fneg_multi_use_fp_round_fneg_f64_to_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_or_b32 v0, 0x1ff, v1, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v1
; GFX11-NEXT: v_bfe_u32 v3, v1, 20, 11
; GFX11-LABEL: v_fneg_trunc_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_trunc_f16_e64 v0, -v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%trunc = call half @llvm.trunc.f16(half %a)
; GFX11-SAFE-LABEL: v_fneg_round_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_trunc_f16_e32 v1, v0
; GFX11-SAFE-NEXT: s_movk_i32 s0, 0x3c00
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NSZ-LABEL: v_fneg_round_f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_trunc_f16_e32 v1, v0
; GFX11-NSZ-NEXT: s_movk_i32 s0, 0x3c00
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-LABEL: v_fneg_rint_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_rndne_f16_e64 v0, -v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%rint = call half @llvm.rint.f16(half %a)
; GFX11-LABEL: v_fneg_nearbyint_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_rndne_f16_e64 v0, -v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%nearbyint = call half @llvm.nearbyint.f16(half %a)
; GFX11-LABEL: v_fneg_sin_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e32 v0, 0xb118, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_sin_f16_e32 v0, v0
; GFX11-LABEL: v_fneg_canonicalize_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%trunc = call half @llvm.canonicalize.f16(half %a)
; GFX11-LABEL: v_fneg_copytoreg_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v6, 0x3ff, v31
; GFX11-NEXT: v_mul_f16_e32 v2, v2, v3
; GFX11-NEXT: s_mov_b32 s0, exec_lo
; GFX11-LABEL: v_fneg_inlineasm_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e64 v0, v0, -v1
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use v0
; GFX11-LABEL: v_fneg_inlineasm_multi_use_src_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e32 v0, v2, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_xor_b32_e32 v1, 0x8000, v0
; GFX11-LABEL: multiuse_fneg_2_vop3_users_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_f16 v3, -v0, v1, v2
; GFX11-NEXT: v_fma_f16 v1, -v0, v2, 2.0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-LABEL: multiuse_fneg_2_vop2_users_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e64 v3, -v0, v1
; GFX11-NEXT: v_mul_f16_e64 v1, -v0, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-LABEL: multiuse_fneg_vop2_vop3_users_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_f16 v0, -v2, v3, 2.0
; GFX11-NEXT: v_mul_f16_e64 v1, -v2, v4
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-LABEL: free_fold_src_code_size_cost_use_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_fma_f16 v1, v2, v3, 2.0
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-NEXT: v_mul_f16_e64 v0, -v1, v4
; GFX11-NSZ-LABEL: free_fold_src_code_size_cost_use_f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_fma_f16 v1, v2, -v3, -2.0
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-NEXT: v_mul_f16_e32 v0, v1, v4
; GFX11-LABEL: one_use_cost_to_fold_into_src_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_trunc_f16_e32 v0, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_fma_f16 v0, -v0, v3, v4
; GFX11-LABEL: multi_use_cost_to_fold_into_src:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_trunc_f16_e32 v1, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_fma_f16 v0, -v1, v3, v4
; GFX11-LABEL: fneg_fma_fneg_dagcombine_loop:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_pk_fma_f16 v1, v1, v2, 0x8000 op_sel_hi:[1,1,0] neg_lo:[0,1,0] neg_hi:[0,1,0]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_pk_add_f16 v0, v1, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX11-LABEL: nnan_fmul_neg1_to_fneg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e64 v0, -v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%mul = fmul half %x, -1.0
; GFX11-LABEL: denormal_fmul_neg1_to_fneg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e64 v0, -v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%mul = fmul nnan half %x, -1.0
; GFX11-LABEL: denorm_snan_fmul_neg1_to_fneg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e64 v0, v0, -v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX11-LABEL: flush_snan_fmul_neg1_to_fneg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_f16_e64 v0, -v0, -v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX11-LABEL: fadd_select_fneg_fneg_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-LABEL: fadd_select_fneg_fneg_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX10-LABEL: testfn:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f64 v[4:5], v[4:5], -v[0:1]
; GFX10-NEXT: v_add_f64 v[0:1], v[4:5], -v[2:3]
; GFX10-NEXT: v_add_f64 v[2:3], -v[2:3], -v[4:5]
; GFX11-LABEL: fneg_xor_select_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GFX11-LABEL: fneg_xor_select_v2i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-LABEL: fneg_xor_select_i32_multi_use:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo
; GFX11-NEXT: v_xor_b32_e32 v0, 0x80000000, v1
; GFX11-NEXT: global_store_b32 v[3:4], v1, off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%select = select i1 %cond, i32 %arg0, i32 %arg1
store i32 %select, ptr addrspace(1) %ptr
; GFX11-LABEL: fneg_xor_select_i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GFX11-LABEL: fneg_xor_select_v2i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GFX11-LABEL: fneg_xor_select_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GFX11-LABEL: fneg_xor_select_v2i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2
; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v3
; GFX11-LABEL: fneg_xor_select_i16_multi_use:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo
; GFX11-NEXT: v_xor_b32_e32 v0, 0xffff8000, v1
; GFX11-NEXT: global_store_b16 v[3:4], v1, off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%select = select i1 %cond, i16 %arg0, i16 %arg1
store i16 %select, ptr addrspace(1) %ptr
; GFX11-LABEL: fneg_xor_select_i64_multi_user:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v7, v1 :: v_dual_and_b32 v0, 1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GFX11-NEXT: v_cndmask_b32_e64 v2, -v4, -v2, vcc_lo
; GFX11-NEXT: global_store_b64 v[5:6], v[0:1], off
; GFX11-NEXT: v_mov_b32_e32 v1, v2
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%select = select i1 %cond, i64 %arg0, i64 %arg1
store i64 %select, ptr addrspace(1) %ptr
; GFX11-LABEL: select_fneg_xor_select_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
; GFX11-LABEL: select_fneg_select_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-LABEL: fneg_xor_select_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GFX11-LABEL: fneg_xor_select_f64_multi_user:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v7, v1 :: v_dual_and_b32 v0, 1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v1
; GFX11-NEXT: global_store_b64 v[5:6], v[0:1], off
; GFX11-NEXT: v_mov_b32_e32 v1, v2
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%select = select i1 %cond, double %arg0, double %arg1
store double %select, ptr addrspace(1) %ptr
; GFX11-LABEL: fneg_xor_select_i64_user_with_srcmods:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GFX11-LABEL: select_fneg_select_fneg_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
; GFX11-LABEL: select_fneg_xor_select_i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
; GFX11-LABEL: select_fneg_select_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: v_xor_b32_e32 v2, 0x8000, v2
; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
; GFX11-LABEL: select_fneg_xor_select_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: v_xor_b32_e32 v2, 0xffff8000, v2
; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
; GFX11-LABEL: select_fneg_select_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v4, 0x80008000, v4
; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v5
; GFX11-NEXT: v_and_b32_e32 v3, 1, v3
; GFX11-LABEL: select_fneg_xor_select_v2i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v4, 0x80008000, v4
; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v5
; GFX11-NEXT: v_and_b32_e32 v3, 1, v3
; GFX11-LABEL: cospiD_pattern0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v5, 1, v0
; GFX11-NEXT: v_cmp_lt_i32_e64 s0, 1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-LABEL: cospiD_pattern1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v5, 1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5
; GFX11-LABEL: cospiD_pattern0_half:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v3, 1, v0
; GFX11-NEXT: v_cmp_lt_i16_e32 vcc_lo, 1, v0
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11-LABEL: cospiD_pattern1_half:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v3, 1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v3
; GFX11-LABEL: fneg_f64_bitcast_vector_i64_to_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%bitcast = bitcast i64 %arg to double
; GFX11-LABEL: fneg_f64_bitcast_vector_v2i32_to_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%bitcast = bitcast <2 x i32> %arg to double
; GFX11-LABEL: fneg_f64_bitcast_vector_v2f32_to_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%bitcast = bitcast <2 x float> %arg to double
; GFX11-LABEL: fneg_f64_bitcast_vector_v4i16_to_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%bitcast = bitcast <4 x i16> %arg to double
; GFX11-LABEL: fneg_f64_bitcast_vector_v4f16_to_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%bitcast = bitcast <4 x half> %arg to double
; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%insert.0 = insertelement <2 x i32> poison, i32 %elt0, i32 0
; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%insert.0 = insertelement <2 x float> poison, float %elt0, i32 0
; GFX11-LABEL: fneg_f64_bitcast_build_vector_v4i16_to_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-LABEL: fneg_f64_bitcast_build_vector_v4f16_to_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-LABEL: fneg_f64_bitcast_build_vector_v4bf16_to_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_perm_b32 v2, v2, v3, 0x3020706
; GFX11-NEXT: v_perm_b32 v0, v0, v1, 0x3020706
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64_modifier_user:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3]
; GFX11-NEXT: s_setpc_b64 s[30:31]
%insert.0 = insertelement <2 x i32> poison, i32 %elt0, i32 0
; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64_multi_modifier_user:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f64 v[6:7], -v[0:1], v[2:3]
; GFX11-NEXT: v_mul_f64 v[2:3], v[4:5], -v[0:1]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64_modifier_user_integer_neg_source:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3]
; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_foldable_sources_to_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_sub_f32_e32 v1, -2.0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fadd = fadd nsz nnan float %elt1, 2.0
; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_user:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v4, 0x80000000, v1
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
; GFX11-NEXT: v_mov_b32_e32 v1, v4
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%insert.0 = insertelement <2 x float> poison, float %elt0, i32 0
%insert.1 = insertelement <2 x float> %insert.0, float %elt1, i32 1
; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_foldable_user:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v4, 0x80000000, v1
; GFX11-NEXT: v_dual_add_f32 v2, v0, v2 :: v_dual_add_f32 v3, v1, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_user:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v3
; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_foldable_user:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3]
; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
; FUNC-LABEL: {{^}}v_fneg_i32:
; GCN: s_waitcnt
-; GFX11: s_waitcnt_vscnt
; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GCN-NEXT: s_setpc_b64
define i32 @v_fneg_i32(i32 %in) {
; FUNC-LABEL: {{^}}v_fneg_i32_fp_use:
; GCN: s_waitcnt
-; GFX11: s_waitcnt_vscnt
; GCN-NEXT: v_sub_f32_e32 v0, 2.0, v0
; GCN-NEXT: s_setpc_b64
define float @v_fneg_i32_fp_use(i32 %in) {
; FUNC-LABEL: {{^}}v_fneg_i64:
; GCN: s_waitcnt
-; GFX11: s_waitcnt_vscnt
; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GCN-NEXT: s_setpc_b64
define i64 @v_fneg_i64(i64 %in) {
; FUNC-LABEL: {{^}}v_fneg_i64_fp_use:
; GCN: s_waitcnt
-; GFX11: s_waitcnt_vscnt
; GCN-NEXT: v_add_f64 v[0:1], -v[0:1], 2.0
; GCN-NEXT: s_setpc_b64
define double @v_fneg_i64_fp_use(i64 %in) {
; FUNC-LABEL: {{^}}v_fneg_i16:
; GCN: s_waitcnt
-; GFX11: s_waitcnt_vscnt
; GCN-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
; GCN-NEXT: s_setpc_b64
define i16 @v_fneg_i16(i16 %in) {
; GFX10-LABEL: fold_abs_in_branch:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
; GFX10-NEXT: s_mov_b32 s4, exec_lo
; GFX10-NEXT: v_add_f32_e32 v1, v0, v1
; GFX10-LABEL: fold_abs_in_branch_multiple_users:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
; GFX10-NEXT: s_mov_b32 s4, exec_lo
; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
; GFX10-LABEL: fold_abs_in_branch_undef:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f32_e64 v0, |s4|, |s4|
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, 1.0, v0
; GFX10-NEXT: s_cbranch_vccnz .LBB2_2
; GFX10-LABEL: fold_abs_in_branch_poison:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f32_e64 v0, |s4|, |s4|
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, 1.0, v0
; GFX10-NEXT: s_cbranch_vccnz .LBB3_2
; GFX10-LABEL: fold_abs_in_branch_fabs:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
; GFX10-NEXT: s_mov_b32 s4, exec_lo
; GFX10-NEXT: v_add_f32_e32 v1, v0, v1
; GFX10-LABEL: fold_abs_in_branch_phi:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
; GFX10-NEXT: s_mov_b32 s4, exec_lo
; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
; GFX10-LABEL: fold_neg_in_branch:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
; GFX10-NEXT: s_mov_b32 s4, exec_lo
; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: flat_store_dword v[3:4], v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: flat_atomic_fmin_f32_rtn:
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-NEXT: flat_store_b32 v[3:4], v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%ret = call float @llvm.amdgcn.flat.atomic.fmin.f32.p1.f32(ptr %ptr, float %data)
store float %ret, ptr %out
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: flat_store_dword v[3:4], v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: flat_atomic_fmax_f32_rtn:
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-NEXT: flat_store_b32 v[3:4], v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%ret = call float @llvm.amdgcn.flat.atomic.fmax.f32.p1.f32(ptr %ptr, float %data)
store float %ret, ptr %out
; GFX11-LABEL: fadd_fpext_fmul_f16_to_f32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-LABEL: fadd_fpext_fmul_f16_to_f64:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-LABEL: fadd_fpext_fmul_f32_to_f64:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
; GFX11-LABEL: fadd_fpext_fmul_f16_to_f32_commute:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-LABEL: fadd_muladd_fpext_fmul_f16_to_f32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e32 v2, v2, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[0,0,1]
; GFX11-LABEL: fadd_muladd_fpext_fmul_f16_to_f32_commute:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e32 v2, v2, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[0,0,1]
; GFX11-LABEL: fadd_fmad_fpext_fmul_f16_to_f32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e32 v2, v2, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[0,0,1]
; GFX11-LABEL: fadd_fma_fpext_fmul_f16_to_f32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e32 v2, v2, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[0,0,1]
; GFX11-LABEL: fadd_fma_fpext_fmul_f16_to_f32_commute:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e32 v2, v2, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[0,0,1]
; GFX11-LABEL: fadd_fpext_fmuladd_f16_to_f32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e32 v3, v3, v4
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_fmac_f16_e32 v3, v1, v2
; GFX11-LABEL: fadd_fpext_fma_f16_to_f32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e32 v3, v3, v4
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_fmac_f16_e32 v3, v1, v2
; GFX11-LABEL: fadd_fpext_fma_f16_to_f32_commute:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e32 v3, v3, v4
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_fmac_f16_e32 v3, v1, v2
; GFX11-LABEL: fsub_fpext_fmul_f16_to_f32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-F32FLUSH-LABEL: fsub_fpext_fmul_f16_to_f32_commute:
; GFX11-F32FLUSH: ; %bb.0: ; %entry
; GFX11-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-F32FLUSH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-F32FLUSH-NEXT: v_fma_mix_f32 v0, -v1, v2, v0 op_sel_hi:[1,1,0]
; GFX11-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-F32DENORM-LABEL: fsub_fpext_fmul_f16_to_f32_commute:
; GFX11-F32DENORM: ; %bb.0: ; %entry
; GFX11-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-F32DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-F32DENORM-NEXT: v_mul_f16_e32 v1, v1, v2
; GFX11-F32DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-F32DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX11-LABEL: fsub_fpext_fneg_fmul_f16_to_f32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e64 v0, v0, -v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-LABEL: fsub_fneg_fpext_fmul_f16_to_f32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e64 v0, v0, -v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-LABEL: fsub_muladd_fpext_mul_f16_to_f32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e32 v3, v3, v4
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_fma_mix_f32 v0, v0, v1, v3 op_sel_hi:[0,0,1]
; GFX11-LABEL: fsub_fpext_muladd_mul_f16_to_f32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e32 v3, v3, v4
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_fmac_f16_e32 v3, v0, v1
; GFX11-LABEL: fsub_muladd_fpext_mul_f16_to_f32_commute:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e32 v3, v3, v4
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_fma_mix_f32 v1, v1, v2, v3 op_sel_hi:[0,0,1]
; GFX11-LABEL: fsub_fpext_muladd_mul_f16_to_f32_commute:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e32 v3, v3, v4
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_fmac_f16_e32 v3, v1, v2
; GFX10-LABEL: v_pow_f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_log_f32_e32 v0, v0
; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
; GFX10-NEXT: v_exp_f32_e32 v0, v0
; GFX11-LABEL: v_pow_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_log_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0
; GFX10-LABEL: v_pow_v2f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_log_f32_e32 v0, v0
; GFX10-NEXT: v_log_f32_e32 v1, v1
; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v2, v0
; GFX11-LABEL: v_pow_v2f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_log_f32_e32 v0, v0
; GFX11-NEXT: v_log_f32_e32 v1, v1
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX10-LABEL: v_pow_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX10-NEXT: v_log_f32_e32 v0, v0
; GFX11-LABEL: v_pow_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX10-LABEL: v_pow_v2f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX11-LABEL: v_pow_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX10-LABEL: v_pow_v2f16_fneg_lhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX10-NEXT: v_cvt_f32_f16_e64 v0, -v0
; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX11-LABEL: v_pow_v2f16_fneg_lhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX11-NEXT: v_cvt_f32_f16_e64 v0, -v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX10-LABEL: v_pow_v2f16_fneg_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX11-LABEL: v_pow_v2f16_fneg_rhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX10-LABEL: v_pow_v2f16_fneg_lhs_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX10-NEXT: v_cvt_f32_f16_e64 v0, -v0
; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX11-LABEL: v_pow_v2f16_fneg_lhs_rhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX11-NEXT: v_cvt_f32_f16_e64 v0, -v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX10-LABEL: v_pow_f32_fabs_lhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_log_f32_e64 v0, |v0|
; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
; GFX10-NEXT: v_exp_f32_e32 v0, v0
; GFX11-LABEL: v_pow_f32_fabs_lhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_log_f32_e64 v0, |v0|
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0
; GFX10-LABEL: v_pow_f32_fabs_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_log_f32_e32 v0, v0
; GFX10-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0
; GFX10-NEXT: v_exp_f32_e32 v0, v0
; GFX11-LABEL: v_pow_f32_fabs_rhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_log_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, |v1|, v0
; GFX10-LABEL: v_pow_f32_fabs_lhs_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_log_f32_e64 v0, |v0|
; GFX10-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0
; GFX10-NEXT: v_exp_f32_e32 v0, v0
; GFX11-LABEL: v_pow_f32_fabs_lhs_rhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_log_f32_e64 v0, |v0|
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, |v1|, v0
; GFX11-LABEL: safe_math_fract_f32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fract_f32_e32 v3, v0
; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX11-NEXT: v_floor_f32_e32 v4, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo
; GFX11-NEXT: global_store_b32 v[1:2], v4, off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
; GFX11-LABEL: safe_math_fract_f32_noinf_check:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f32_e32 v3, v0
; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: global_store_b32 v[1:2], v3, off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
; GFX11-LABEL: no_nan_check_math_fract_f32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f32_e32 v3, v0
; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: global_store_b32 v[1:2], v3, off
; GFX11-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
; GFX11-LABEL: basic_fract_f32_nonans:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
; GFX11-LABEL: basic_fract_f32_flags_minnum:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f32_e32 v1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-LABEL: basic_fract_f32_flags_fsub:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
; GFX11-LABEL: basic_fract_v2f32_nonans:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: v_fract_f32_e32 v1, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: basic_fract_f32_multi_use_fsub_nonans:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f32_e32 v3, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_sub_f32_e32 v3, v0, v3
; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: global_store_b32 v[1:2], v3, off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
; GFX11-LABEL: nnan_minnum_fract_f32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
; GFX11-LABEL: nnan_fsub_fract_f32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f32_e32 v1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-LABEL: nnan_floor_fract_f32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f32_e32 v1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-LABEL: nnan_src_fract_f32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
; GFX11-LABEL: not_fract_f32_wrong_const:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f32_e32 v1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-LABEL: not_fract_f32_swapped_fsub:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f32_e32 v1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_f32_e32 v0, v1, v0
; GFX11-LABEL: not_fract_f32_not_floor:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_trunc_f32_e32 v1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-LABEL: not_fract_f32_different_floor:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f32_e32 v1, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-LABEL: not_fract_f32_maxnum:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f32_e32 v1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]]
; IR-NEXT: ret float [[COND]]
;
-; GFX6-LABEL: fcmp_uno_check_is_nan_f32:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX7-LABEL: fcmp_uno_check_is_nan_f32:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: fcmp_uno_check_is_nan_f32:
-; GFX8: ; %bb.0: ; %entry
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: fcmp_uno_check_is_nan_f32:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: fcmp_uno_check_is_nan_f32:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor
; GFX11-LABEL: select_nan_fract_f32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
; GFX11-LABEL: commuted_select_nan_fract_f32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
; GFX11-LABEL: wrong_commuted_nan_select_f32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f32_e32 v1, v0
; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-LABEL: basic_fract_f16_nonan:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fract_f16_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
; GFX11-LABEL: basic_fract_v2f16_nonan:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX11-NEXT: v_fract_f16_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-LABEL: basic_fract_f64_nanans:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fract_f64_e32 v[0:1], v[0:1]
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
; GFX11-LABEL: safe_math_fract_f16_noinf_check:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f16_e32 v3, v0
; GFX11-NEXT: v_fract_f16_e32 v0, v0
; GFX11-NEXT: global_store_b16 v[1:2], v3, off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call half @llvm.floor.f16(half %x)
; GFX11-LABEL: safe_math_fract_f64_noinf_check:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f64_e32 v[4:5], v[0:1]
; GFX11-NEXT: v_fract_f64_e32 v[0:1], v[0:1]
; GFX11-NEXT: global_store_b64 v[2:3], v[4:5], off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call double @llvm.floor.f64(double %x)
; GFX11-LABEL: select_nan_fract_f32_flags_select:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
; GFX11-LABEL: select_nan_fract_f32_flags_minnum:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
; GFX11-LABEL: safe_math_fract_v2f32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fract_f32_e32 v6, v0
; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX11-NEXT: v_fract_f32_e32 v7, v1
; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v1|
; GFX11-NEXT: global_store_b64 v[2:3], v[4:5], off
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc_lo
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %x)
; GFX11-LABEL: safe_math_fract_f64:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, 0
; GFX11-NEXT: s_mov_b32 s1, 0x7ff00000
; GFX11-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-NEXT: v_dual_cndmask_b32 v0, 0, v4 :: v_dual_cndmask_b32 v1, 0, v5
; GFX11-NEXT: global_store_b64 v[2:3], v[6:7], off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call double @llvm.floor.f64(double %x)
; GFX11-LABEL: safe_math_fract_f16:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fract_f16_e32 v3, v0
; GFX11-NEXT: v_cmp_neq_f16_e64 vcc_lo, 0x7c00, |v0|
; GFX11-NEXT: v_floor_f16_e32 v4, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo
; GFX11-NEXT: global_store_b16 v[1:2], v4, off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call half @llvm.floor.f16(half %x)
; GFX11-LABEL: safe_math_fract_v2f16:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX11-NEXT: v_fract_f16_e32 v6, v0
; GFX11-NEXT: v_floor_f16_e32 v5, v0
; GFX11-NEXT: v_cndmask_b32_e64 v0, v6, 0, s0
; GFX11-NEXT: global_store_b32 v[1:2], v4, off
; GFX11-NEXT: v_pack_b32_f16 v0, v0, v3
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call <2 x half> @llvm.floor.v2f16(<2 x half> %x)
; GFX11-LABEL: safe_math_fract_v2f64:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fract_f64_e32 v[10:11], v[0:1]
; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 0x204
; GFX11-NEXT: v_fract_f64_e32 v[12:13], v[2:3]
; GFX11-NEXT: v_cndmask_b32_e64 v2, v12, 0, s1
; GFX11-NEXT: v_cndmask_b32_e64 v3, v13, 0, s1
; GFX11-NEXT: global_store_b128 v[4:5], v[6:9], off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call <2 x double> @llvm.floor.v2f64(<2 x double> %x)
declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #0
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GCN: {{.*}}
; GFX10-LABEL: v_fshr_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fshr_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ret = call i32 @llvm.fshr.i32(i32 %src0, i32 %src1, i32 %src2)
; GFX10-LABEL: v_fshr_v2i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_alignbit_b32 v0, v0, v2, v4
; GFX10-NEXT: v_alignbit_b32 v1, v1, v3, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_fshr_v2i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_alignbit_b32 v0, v0, v2, v4
; GFX11-NEXT: v_alignbit_b32 v1, v1, v3, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_fshr_v3i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_alignbit_b32 v0, v0, v3, v6
; GFX10-NEXT: v_alignbit_b32 v1, v1, v4, v7
; GFX10-NEXT: v_alignbit_b32 v2, v2, v5, v8
; GFX11-LABEL: v_fshr_v3i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_alignbit_b32 v0, v0, v3, v6
; GFX11-NEXT: v_alignbit_b32 v1, v1, v4, v7
; GFX11-NEXT: v_alignbit_b32 v2, v2, v5, v8
; GFX10-LABEL: v_fshr_v4i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_alignbit_b32 v0, v0, v4, v8
; GFX10-NEXT: v_alignbit_b32 v1, v1, v5, v9
; GFX10-NEXT: v_alignbit_b32 v2, v2, v6, v10
; GFX11-LABEL: v_fshr_v4i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_alignbit_b32 v0, v0, v4, v8
; GFX11-NEXT: v_alignbit_b32 v1, v1, v5, v9
; GFX11-NEXT: v_alignbit_b32 v2, v2, v6, v10
; GFX10-LABEL: v_fshr_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0
; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2
; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1
; GFX11-LABEL: v_fshr_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0
; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2
; GFX11-NEXT: v_lshrrev_b16 v1, v2, v1
; GFX10-LABEL: v_fshr_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2
; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
; GFX10-NEXT: v_and_b32_e32 v2, 0xf000f, v2
; GFX11-LABEL: v_fshr_v2i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2
; GFX11-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
; GFX11-NEXT: v_and_b32_e32 v2, 0xf000f, v2
; GFX10-LABEL: v_fshr_v3i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v4
; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v2
; GFX11-LABEL: v_fshr_v3i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v4
; GFX11-NEXT: v_lshrrev_b32_e32 v8, 16, v2
; GFX10-LABEL: v_fshr_v4i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v3
; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v5
; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1
; GFX11-LABEL: v_fshr_v4i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v3
; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v5
; GFX11-NEXT: v_lshrrev_b32_e32 v8, 16, v1
; GFX10-LABEL: v_fshr_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GFX10-NEXT: v_not_b32_e32 v5, v4
; GFX10-NEXT: v_lshrrev_b64 v[2:3], v4, v[2:3]
; GFX11-LABEL: v_fshr_i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GFX11-NEXT: v_not_b32_e32 v5, v4
; GFX11-NEXT: v_lshrrev_b64 v[2:3], v4, v[2:3]
; GFX10-LABEL: v_fshr_v2i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
; GFX10-NEXT: v_not_b32_e32 v9, v8
; GFX11-LABEL: v_fshr_v2i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
; GFX11-NEXT: v_not_b32_e32 v9, v8
; GFX10-LABEL: v_fshr_i24:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v2
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; GFX10-NEXT: v_mul_hi_u32 v3, 0xaaaaaab, v3
; GFX11-LABEL: v_fshr_i24:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v2
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-LABEL: v_fshr_v2i24:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v6, 0xffffff, v4
; GFX10-NEXT: v_and_b32_e32 v7, 0xffffff, v5
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GFX11-LABEL: v_fshr_v2i24:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v6, 0xffffff, v4
; GFX11-NEXT: v_and_b32_e32 v7, 0xffffff, v5
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GFX11-LABEL: void_func_i1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store i1 %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_i1_zeroext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_or_b32_e32 v0, 12, v0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ext = zext i1 %arg0 to i32
%add = add i32 %ext, 12
; GFX11-LABEL: void_func_i1_signext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ext = sext i1 %arg0 to i32
%add = add i32 %ext, 12
; GFX11-LABEL: i1_arg_i1_use:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: .LBB3_2: ; %bb2
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
bb:
br i1 %arg, label %bb2, label %bb1
; GFX11-LABEL: void_func_i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store i8 %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_i8_zeroext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ext = zext i8 %arg0 to i32
%add = add i32 %ext, 12
; GFX11-LABEL: void_func_i8_signext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ext = sext i8 %arg0 to i32
%add = add i32 %ext, 12
; GFX11-LABEL: void_func_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store i16 %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_i16_zeroext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ext = zext i16 %arg0 to i32
%add = add i32 %ext, 12
; GFX11-LABEL: void_func_i16_signext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ext = sext i16 %arg0 to i32
%add = add i32 %ext, 12
; GFX11-LABEL: void_func_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store i32 %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store i64 %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store half %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store float %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store double %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v2i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x i32> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v3i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x i32> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v4i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x i32> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v5i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b32 v4, off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <5 x i32> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v8i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x i32> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v16i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x i32> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v32i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <32 x i32> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v33i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_store_b32 v32, off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <33 x i32> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v2i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x i64> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v3i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b64 v[4:5], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x i64> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v4i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x i64> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v5i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x2
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b64 v[8:9], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <5 x i64> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v8i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x i64> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v16i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x i64> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v2i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x i16> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v3i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x i16> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v4i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x i16> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v5i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b16 v2, off, s[0:3], 0
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <5 x i16> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v8i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x i16> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v16i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x i16> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v2i24:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%elt0 = extractelement <2 x i24> %arg0, i32 0
%elt1 = extractelement <2 x i24> %arg0, i32 1
; GFX11-LABEL: void_func_v2f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x float> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v3f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x float> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v4f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x float> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v8f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x float> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v16f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x float> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v2f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x double> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v3f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b64 v[4:5], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x double> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v4f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x double> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v8f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x double> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v16f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x double> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x half> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v3f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x half> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v4f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x half> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v8f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x half> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_v16f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x half> %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_i32_i64_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 dlc
; GFX11-LABEL: void_func_struct_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store { i32 } %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_struct_i8_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b32 v1, off, s[0:3], 0
; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store { i8, i32 } %arg0, ptr addrspace(1) undef
ret void
; GFX11-LABEL: void_func_byval_struct_i8_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_load_b32 v0, off, s32 offset:4
; GFX11-NEXT: scratch_load_u8 v1, off, s32
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%arg0.load = load { i8, i32 }, ptr addrspace(5) %arg0
store { i8, i32 } %arg0.load, ptr addrspace(1) undef
; GFX11-LABEL: void_func_byval_struct_i8_i32_x2:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_u8 v1, off, s32 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: scratch_load_b32 v2, off, s32 offset:4 glc dlc
; GFX11-LABEL: void_func_byval_i32_byval_i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_load_b32 v2, off, s32
; GFX11-NEXT: scratch_load_b64 v[0:1], off, s32 offset:8
; GFX11-NEXT: buffer_store_b32 v2, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%arg0.load = load i32, ptr addrspace(5) %arg0
%arg1.load = load i64, ptr addrspace(5) %arg1
; GFX11-LABEL: void_func_v32i32_i32_i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:12
; GFX11-LABEL: void_func_v32i32_i1_i8_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_clause 0x4
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: scratch_load_u8 v32, off, s32 offset:4
; GFX11-LABEL: void_func_v32i32_v2i32_v2f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_clause 0x4
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8
; GFX11-LABEL: void_func_v32i32_v2i16_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_clause 0x2
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4
; GFX11-LABEL: void_func_v32i32_v2i64_v2f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_clause 0x8
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:32
; GFX11-LABEL: void_func_v32i32_v4i32_v4f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_clause 0x8
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:16
; GFX11-LABEL: void_func_v32i32_v8i32_v8f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_clause 0x10
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:48
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:44
; GFX11-LABEL: void_func_v32i32_v16i32_v16f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_clause 0x20
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:80
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:76
; GFX11-LABEL: void_func_v3f32_wasted_reg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_store_b32 v0, v0
; GFX11-NEXT: ds_store_b32 v0, v1
; GFX11-NEXT: ds_store_b32 v0, v2
; GFX11-LABEL: void_func_v3i32_wasted_reg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_store_b32 v0, v0
; GFX11-NEXT: ds_store_b32 v0, v1
; GFX11-NEXT: ds_store_b32 v0, v2
; GFX11-LABEL: void_func_v16i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b8 v15, off, s[0:3], 0 dlc
; GFX11-LABEL: void_func_v32i32_v16i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_clause 0x10
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: scratch_load_u8 v32, off, s32 offset:64
; GFX11-LABEL: i1_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0
; GFX11-LABEL: i1_zeroext_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0
; GFX11-LABEL: i1_signext_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0
; GFX11-LABEL: i8_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0
; GFX11-LABEL: i8_zeroext_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0
; GFX11-LABEL: i8_signext_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_i8 v0, off, s[0:3], 0
; GFX11-LABEL: i16_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0
; GFX11-LABEL: i16_zeroext_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0
; GFX11-LABEL: i16_signext_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_i16 v0, off, s[0:3], 0
; GFX11-LABEL: i32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0
; GFX11-LABEL: i48_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-LABEL: i48_zeroext_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-LABEL: i48_signext_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-LABEL: i63_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
ret i63 %val
}
; GFX11-LABEL: i63_zeroext_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
ret i63 %val
; GFX11-LABEL: i63_signext_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1]
; GFX11-LABEL: i64_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0
; GFX11-LABEL: i65_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-LABEL: f32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0
; GFX11-LABEL: f64_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0
; GFX11-LABEL: v2f64_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-LABEL: v2i32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0
; GFX11-LABEL: v3i32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b96 v[0:2], off, s[0:3], 0
; GFX11-LABEL: v4i32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-LABEL: v5i32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b32 v4, off, s[0:3], 0 glc dlc
; GFX11-LABEL: v8i32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-LABEL: v16i32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-LABEL: v32i32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-LABEL: v2i64_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-LABEL: v3i64_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-LABEL: v4i64_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-LABEL: v5i64_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-LABEL: v8i64_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-LABEL: v16i64_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-LABEL: v2i16_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0
; GFX11-LABEL: v3i16_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0
; GFX11-LABEL: v4i16_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0
; GFX11-LABEL: v4f16_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0
; GFX11-LABEL: v5i16_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-LABEL: v8i16_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-LABEL: v16i16_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-LABEL: v16i8_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-LABEL: v4i8_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-LABEL: struct_i8_i32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-LABEL: void_func_sret_struct_i8_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_u8 v1, off, s[0:3], 0 glc dlc
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_store_b8 v0, v1, off
; GFX11-NEXT: scratch_store_b32 v0, v2, off offset:4
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load volatile i8, ptr addrspace(1) undef
%val1 = load volatile i32, ptr addrspace(1) undef
; GFX11-LABEL: v33i32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: scratch_store_b128 off, v[29:32], s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: scratch_store_b32 off, v33, s8
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
%val = load <33 x i32>, ptr addrspace(1) %ptr
; GFX11-LABEL: struct_v32i32_i32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: scratch_store_b128 off, v[29:32], s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: scratch_store_b32 off, v33, s8
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
%val = load { <32 x i32>, i32 }, ptr addrspace(1) %ptr
; GFX11-LABEL: struct_i32_v32i32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: scratch_store_b128 off, v[29:32], s8
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: scratch_store_b32 off, v33, s0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
%val = load { i32, <32 x i32> }, ptr addrspace(1) %ptr
; GFX11-LABEL: v3i32_struct_func_void_wasted_reg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_b32 v0, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ds_load_b32 v1, v0
; GFX11-LABEL: v3f32_struct_func_void_wasted_reg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_b32 v0, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ds_load_b32 v1, v0
; GFX11-LABEL: void_func_sret_max_known_zero_bits:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 17, v0
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX10-LABEL: test_call_external_void_func_i1_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_i1_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_i1_signext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_i1_signext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_signext:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_i1_zeroext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_i1_zeroext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_zeroext:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_i8_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_i8_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_i8_signext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_i8_signext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_signext:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_i8_zeroext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_i8_zeroext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_zeroext:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_i16_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_i16_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_i16_signext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_i16_signext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_signext:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_i16_zeroext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_i16_zeroext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_zeroext:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_i32_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_i32_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i32_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_i64_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_i64_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i64_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v2i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v2i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v2i64_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v2i64_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v3i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v3i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i64:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v4i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v4i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i64:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_f16_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_f16_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_f16_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_f32_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_f32_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_f32_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v2f32_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v2f32_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f32_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v3f32_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v3f32_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f32_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v5f32_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v5f32_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5f32_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_f64_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_f64_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_f64_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v2f64_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v2f64_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f64_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v3f64_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v3f64_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f64_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v2i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i16:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v3i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v3i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v3f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v3f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v3i16_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v3i16_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v3f16_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v3f16_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v4i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v4i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v4i16_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v4i16_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v2f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f16:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v2i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v2i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v2i32_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v2i32_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v3i32_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v3i32_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v3i32_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v3i32_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_i32:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v4i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v4i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v4i32_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v4i32_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v5i32_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v5i32_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5i32_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v8i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v8i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v8i32_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v8i32_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v16i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v16i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i32:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v32i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v32i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v32i32_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v32i32_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_i32:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_i32_func_i32_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_i32_func_i32_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_i32_func_i32_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_struct_i8_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_struct_i8_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_struct_i8_i32:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_byval_struct_i8_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_byval_struct_i8_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_byval_struct_i8_i32:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v16i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v16i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i8:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: tail_call_byval_align16:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s6, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s4, -1
; GFX11-LABEL: tail_call_byval_align16:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s4, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-LABEL: tail_call_byval_align16:
; GFX10-SCRATCH: ; %bb.0: ; %entry
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s4, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-LABEL: test_call_external_void_func_i1_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_i1_imm_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_i8_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_i8_imm_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_i16_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_i16_imm_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_i32_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_i32_imm_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i32_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_i64_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_i64_imm_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i64_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v2i64_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v2i64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v2i64_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v2i64_imm_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v3i64_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v3i64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i64_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v4i64_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v4i64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i64_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_f16_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_f16_imm_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_f16_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_f32_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_f32_imm_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_f32_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v2f32_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v2f32_imm_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f32_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v3f32_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v3f32_imm_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f32_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v5f32_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v5f32_imm_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5f32_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_f64_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_f64_imm_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_f64_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v2f64_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v2f64_imm_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f64_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v3f64_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v3f64_imm_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f64_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v2i16_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v2i16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i16_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v3i16_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v3i16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v3f16_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v3f16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v3i16_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v3i16_imm_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v3f16_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v3f16_imm_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v4i16_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v4i16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v4i16_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v4i16_imm_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v2f16_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v2f16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f16_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v2i32_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v2i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v2i32_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v2i32_imm_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v3i32_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v3i32_imm_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v3i32_i32_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v3i32_i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_i32_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v4i32_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v4i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v4i32_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v4i32_imm_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v5i32_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v5i32_imm_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5i32_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v8i32_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v8i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v8i32_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v8i32_imm_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v16i32_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v16i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i32_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v32i32_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v32i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_v32i32_i32_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_v32i32_i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_i32_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: stack_passed_arg_alignment_v32i32_f64:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: stack_passed_arg_alignment_v32i32_f64:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: stack_passed_arg_alignment_v32i32_f64:
; GFX10-SCRATCH: ; %bb.0: ; %entry
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: stack_12xv3i32:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: stack_12xv3i32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: stack_12xv3i32:
; GFX10-SCRATCH: ; %bb.0: ; %entry
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: stack_8xv5i32:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: stack_8xv5i32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: stack_8xv5i32:
; GFX10-SCRATCH: ; %bb.0: ; %entry
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: stack_8xv5f32:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: stack_8xv5f32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-SCRATCH-LABEL: stack_8xv5f32:
; GFX10-SCRATCH: ; %bb.0: ; %entry
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: void_func_void_clobber_s28_s29:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_xor_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_void_clobber_s28_s29:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; clobber", "~{s[30:31]}"() #0
call void asm sideeffect "; clobber", "~{s[28:29]}"() #0
; GFX10-LABEL: test_call_void_func_void_mayclobber_s31:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_void_func_void_mayclobber_s31:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_void_func_void_mayclobber_v31:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_void_func_void_mayclobber_v31:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_void_func_void_preserves_s33:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_void_func_void_preserves_s33:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_void_func_void_preserves_s34:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_void_func_void_preserves_s34:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_void_func_void_preserves_v40:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_void_func_void_preserves_v40:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: void_func_void_clobber_s33:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s4
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_void_clobber_s33:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; clobber", "~{s33}"() #0
ret void
; GFX10-LABEL: void_func_void_clobber_s34:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s4
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_void_clobber_s34:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; clobber", "~{s34}"() #0
ret void
; GFX10-LABEL: test_call_void_func_void_clobber_s33:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_void_func_void_clobber_s33:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: test_call_void_func_void_clobber_s34:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: test_call_void_func_void_clobber_s34:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: callee_saved_sgpr_kernel:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: callee_saved_sgpr_kernel:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: callee_saved_sgpr_vgpr_kernel:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX11-LABEL: callee_saved_sgpr_vgpr_kernel:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10PLUS-LABEL: return_i1:
; GFX10PLUS: ; %bb.0: ; %entry
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
entry:
; GFX10-LABEL: call_i1:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s36, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_xor_saveexec_b32 s34, -1
; GFX11-LABEL: call_i1:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s2, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX10PLUS-LABEL: return_i16:
; GFX10PLUS: ; %bb.0: ; %entry
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 10
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
entry:
; GFX10-LABEL: call_i16:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s36, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_xor_saveexec_b32 s34, -1
; GFX11-LABEL: call_i16:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s2, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX10PLUS-LABEL: return_2xi16:
; GFX10PLUS: ; %bb.0: ; %entry
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0x20001
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
entry:
; GFX10-LABEL: call_2xi16:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s36, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_xor_saveexec_b32 s34, -1
; GFX11-LABEL: call_2xi16:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s2, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX10-LABEL: return_3xi16:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001
; GFX10-NEXT: v_mov_b32_e32 v1, 3
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: return_3xi16:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v0, 0x20001 :: v_dual_mov_b32 v1, 3
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
; GFX10-LABEL: call_3xi16:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s36, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_xor_saveexec_b32 s34, -1
; GFX11-LABEL: call_3xi16:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s2, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX10-LABEL: return_100xi32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX11-LABEL: return_100xi32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0
; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0
; GFX10-LABEL: call_100xi32:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s36, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_xor_saveexec_b32 s34, -1
; GFX11-LABEL: call_100xi32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s2, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX10-LABEL: return_512xi32:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:1020
; GFX10-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:2044
; GFX10-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:8
; GFX10-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
; GFX10-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: return_512xi32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, 0
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_mov_b32 s3, s0
; GFX11-NEXT: s_add_i32 s0, s0, 16
; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
ret <512 x i32> zeroinitializer
; GFX10-LABEL: call_512xi32:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s36, s33
; GFX10-NEXT: s_add_i32 s33, s32, 0xffe0
; GFX10-NEXT: s_and_b32 s33, s33, 0xffff0000
; GFX11-LABEL: call_512xi32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s34, s33
; GFX11-NEXT: s_add_i32 s33, s32, 0x7ff
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX10-LABEL: return_72xi32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:176
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:180
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: return_72xi32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_clause 0xe
; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:220
; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:216
; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:216
; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:220
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
ret <72 x i32> %val
}
; GFX10-LABEL: call_72xi32:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s36, s33
; GFX10-NEXT: s_add_i32 s33, s32, 0x3fe0
; GFX10-NEXT: s_and_b32 s33, s33, 0xffffc000
; GFX11-LABEL: call_72xi32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s45, s33
; GFX11-NEXT: s_add_i32 s33, s32, 0x1ff
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX10-LABEL: mul_inline_imm_0.5_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; GFX10-NEXT: v_mul_lo_u16 v2, 0x3800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00]
; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
;
; GFX11-LABEL: mul_inline_imm_0.5_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_mul_lo_u16 v2, 0x3800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00]
; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
;
; VI-LABEL: mul_inline_imm_0.5_i16:
; GFX10-LABEL: mul_inline_imm_neg_0.5_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; GFX10-NEXT: v_mul_lo_u16 v2, 0xb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff]
; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
;
; GFX11-LABEL: mul_inline_imm_neg_0.5_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_mul_lo_u16 v2, 0xb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff]
; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
;
; VI-LABEL: mul_inline_imm_neg_0.5_i16:
; GFX10-LABEL: mul_inline_imm_1.0_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; GFX10-NEXT: v_mul_lo_u16 v2, 0x3c00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x3c,0x00,0x00]
; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
;
; GFX11-LABEL: mul_inline_imm_1.0_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_mul_lo_u16 v2, 0x3c00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x3c,0x00,0x00]
; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
;
; VI-LABEL: mul_inline_imm_1.0_i16:
; GFX10-LABEL: mul_inline_imm_neg_1.0_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; GFX10-NEXT: v_mul_lo_u16 v2, 0xbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff]
; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
;
; GFX11-LABEL: mul_inline_imm_neg_1.0_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_mul_lo_u16 v2, 0xbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff]
; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
;
; VI-LABEL: mul_inline_imm_neg_1.0_i16:
; GFX10-LABEL: shl_inline_imm_2.0_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; GFX10-NEXT: v_lshlrev_b16 v2, v2, 0x4000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0x40,0x00,0x00]
; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
;
; GFX11-LABEL: shl_inline_imm_2.0_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_lshlrev_b16 v2, v2, 0x4000 ; encoding: [0x02,0x00,0x38,0xd7,0x02,0xff,0x01,0x00,0x00,0x40,0x00,0x00]
; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
;
; VI-LABEL: shl_inline_imm_2.0_i16:
; GFX10-LABEL: shl_inline_imm_neg_2.0_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; GFX10-NEXT: v_lshlrev_b16 v2, v2, 0xc000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff]
; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
;
; GFX11-LABEL: shl_inline_imm_neg_2.0_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_lshlrev_b16 v2, v2, 0xc000 ; encoding: [0x02,0x00,0x38,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff]
; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
;
; VI-LABEL: shl_inline_imm_neg_2.0_i16:
; GFX10-LABEL: mul_inline_imm_4.0_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; GFX10-NEXT: v_mul_lo_u16 v2, 0x4400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x44,0x00,0x00]
; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
;
; GFX11-LABEL: mul_inline_imm_4.0_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_mul_lo_u16 v2, 0x4400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x44,0x00,0x00]
; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
;
; VI-LABEL: mul_inline_imm_4.0_i16:
; GFX10-LABEL: mul_inline_imm_neg_4.0_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; GFX10-NEXT: v_mul_lo_u16 v2, 0xc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff]
; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
;
; GFX11-LABEL: mul_inline_imm_neg_4.0_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_mul_lo_u16 v2, 0xc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff]
; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
;
; VI-LABEL: mul_inline_imm_neg_4.0_i16:
; GFX10-LABEL: mul_inline_imm_inv2pi_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; GFX10-NEXT: v_mul_lo_u16 v2, 0x3118, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x18,0x31,0x00,0x00]
; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
;
; GFX11-LABEL: mul_inline_imm_inv2pi_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_mul_lo_u16 v2, 0x3118, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x18,0x31,0x00,0x00]
; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
;
; VI-LABEL: mul_inline_imm_inv2pi_i16:
; GFX11-LABEL: f0:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s2, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX11-LABEL: f1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX10-SDAG-LABEL: clpeak_imad_pat_i32:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0
; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0
; GFX10-GISEL-LABEL: clpeak_imad_pat_i32:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v0
; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v2, v1
; GFX10-GISEL-NEXT: v_add3_u32 v0, v0, v2, 1
; GFX10-SDAG-LABEL: clpeak_imad_pat_i16:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
; GFX10-GISEL-LABEL: clpeak_imad_pat_i16:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i16:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i16:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v2, v1
; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, v2
; GFX10-SDAG-LABEL: clpeak_imad_pat_v3i16:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1
; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3
; GFX10-GISEL-LABEL: clpeak_imad_pat_v3i16:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_pk_add_u16 v4, v0, 1 op_sel_hi:[1,0]
; GFX10-GISEL-NEXT: v_pk_add_u16 v5, v1, 1
; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v4, v4, v2
; GFX10-SDAG-LABEL: clpeak_imad_pat_v4i16:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3
; GFX10-GISEL-LABEL: clpeak_imad_pat_v4i16:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_pk_add_u16 v4, v0, 1 op_sel_hi:[1,0]
; GFX10-GISEL-NEXT: v_pk_add_u16 v5, v1, 1 op_sel_hi:[1,0]
; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v4, v4, v2
; GFX10-SDAG-LABEL: clpeak_umad_pat_i16:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
; GFX10-GISEL-LABEL: clpeak_umad_pat_i16:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10-SDAG-LABEL: clpeak_umad_pat_v2i16:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
; GFX10-GISEL-LABEL: clpeak_umad_pat_v2i16:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v2, v1
; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, v2
; GFX10-SDAG-LABEL: clpeak_umad_pat_v3i16:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1
; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3
; GFX10-GISEL-LABEL: clpeak_umad_pat_v3i16:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_pk_add_u16 v4, v0, 1 op_sel_hi:[1,0]
; GFX10-GISEL-NEXT: v_pk_add_u16 v5, v1, 1
; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v4, v4, v2
; GFX10-SDAG-LABEL: clpeak_umad_pat_v4i16:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3
; GFX10-GISEL-LABEL: clpeak_umad_pat_v4i16:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_pk_add_u16 v4, v0, 1 op_sel_hi:[1,0]
; GFX10-GISEL-NEXT: v_pk_add_u16 v5, v1, 1 op_sel_hi:[1,0]
; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v4, v4, v2
; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i32:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0
; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1
; GFX10-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2
; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i32:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v0
; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v5, 1, v1
; GFX10-GISEL-NEXT: v_mul_lo_u32 v4, v4, v2
; GFX10-SDAG-LABEL: clpeak_imad_pat_v3i32:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0
; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1
; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v2, 1, v2
; GFX10-GISEL-LABEL: clpeak_imad_pat_v3i32:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v6, 1, v0
; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v7, 1, v1
; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v8, 1, v2
; GFX10-SDAG-LABEL: clpeak_imad_pat_v4i32:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0
; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1
; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v2, 1, v2
; GFX10-GISEL-LABEL: clpeak_imad_pat_v4i32:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v8, 1, v0
; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v9, 1, v1
; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v10, 1, v2
; GFX10-SDAG-LABEL: clpeak_imad_pat_i24:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 24
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 24
; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0
; GFX10-GISEL-LABEL: clpeak_imad_pat_i24:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 24
; GFX10-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 24
; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v0
; GFX10-SDAG-LABEL: clpeak_imad_pat_u24:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xffffff, v0
; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xffffff, v1
; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0
; GFX10-GISEL-LABEL: clpeak_imad_pat_u24:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0
; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1
; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v0
; GFX10-SDAG-LABEL: clpeak_imad_pat_i8:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
; GFX10-GISEL-LABEL: clpeak_imad_pat_i8:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i8:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_nc_u16 v1, v1, 1
; GFX10-SDAG-NEXT: v_add_nc_u16 v0, v0, 1
; GFX10-SDAG-NEXT: v_mad_u16 v4, v1, v3, v1
; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i8:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_add_nc_u16 v4, v0, 1
; GFX10-GISEL-NEXT: v_add_nc_u16 v5, v1, 1
; GFX10-GISEL-NEXT: v_mul_lo_u16 v4, v4, v2
; GFX10-SDAG-LABEL: clpeak_imad_pat_i64:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v4, vcc_lo, v0, 1
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v1, vcc_lo
; GFX10-SDAG-NEXT: v_mul_lo_u32 v7, v4, v3
; GFX10-GISEL-LABEL: clpeak_imad_pat_i64:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_add_co_u32 v6, vcc_lo, v0, 1
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v1, vcc_lo
; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v6, v2, 0
; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i64:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v8, vcc_lo, v0, 1
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, 0, v1, vcc_lo
; GFX10-SDAG-NEXT: v_add_co_u32 v10, vcc_lo, v2, 1
; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i64:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_add_co_u32 v10, vcc_lo, v0, 1
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v11, vcc_lo, 0, v1, vcc_lo
; GFX10-GISEL-NEXT: v_add_co_u32 v12, vcc_lo, v2, 1
; GFX10-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_all:
; GFX10-SDAG: ; %bb.0: ; %bb
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0
; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0
; GFX10-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_all:
; GFX10-GISEL: ; %bb.0: ; %bb
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v0
; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v2, v1
; GFX10-GISEL-NEXT: v_add3_u32 v0, v0, v2, 1
; GFX10-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_some:
; GFX10-SDAG: ; %bb.0: ; %bb
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0
; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0
; GFX10-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_some:
; GFX10-GISEL: ; %bb.0: ; %bb
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v0
; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v2, v1
; GFX10-GISEL-NEXT: v_add3_u32 v0, v0, v2, 1
; GFX10-SDAG-LABEL: clpeak_imad_pat_i32_x2:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0
; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0
; GFX10-GISEL-LABEL: clpeak_imad_pat_i32_x2:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v0
; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v2, v1
; GFX10-GISEL-NEXT: v_add3_u32 v0, v0, v2, 1
; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i32_x2:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0
; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1
; GFX10-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2
; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i32_x2:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v0
; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v5, 1, v1
; GFX10-GISEL-NEXT: v_mul_lo_u32 v4, v4, v2
; GFX10-SDAG-LABEL: clpeak_imad_pat_i16_x2:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
; GFX10-GISEL-LABEL: clpeak_imad_pat_i16_x2:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10-SDAG-LABEL: clpeak_umad_pat_i16_x2:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
; GFX10-GISEL-LABEL: clpeak_umad_pat_i16_x2:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i16_x2:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i16_x2:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v2, v1
; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, v2
; GFX10-SDAG-LABEL: clpeak_umad_pat_v2i16_x2:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
; GFX10-GISEL-LABEL: clpeak_umad_pat_v2i16_x2:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v2, v1
; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, v2
; GFX10-LABEL: multi_use_mul_mad_i32_var:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_lo_u32 v1, v0, v1
; GFX10-NEXT: v_add_nc_u32_e32 v0, v1, v2
; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3
; GFX10-SDAG-LABEL: multi_use_mul_mad_i16_var:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_mad_u16 v2, v0, v1, v2
; GFX10-SDAG-NEXT: v_mad_u16 v0, v0, v1, v3
; GFX10-SDAG-NEXT: v_perm_b32 v0, v0, v2, 0x5040100
; GFX10-GISEL-LABEL: multi_use_mul_mad_i16_var:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v0, v2
; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, v3
; GFX10-LABEL: other_use_mul_mad_i32_var:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_lo_u32 v1, v0, v1
; GFX10-NEXT: v_add_nc_u32_e32 v0, v1, v2
; GFX10-NEXT: ds_write_b32 v3, v1
; GFX10-SDAG-LABEL: other_use_mul_mad_i16_var:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_mul_lo_u16 v4, v0, v1
; GFX10-SDAG-NEXT: v_mad_u16 v0, v0, v1, v2
; GFX10-SDAG-NEXT: ds_write_b16 v3, v4
; GFX10-GISEL-LABEL: other_use_mul_mad_i16_var:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v0, v1
; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v1, v2
; GFX10-GISEL-NEXT: ds_write_b16 v3, v1
; GFX10-LABEL: multi_use_mul_mad_v2i16_var:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_lo_u16 v1, v0, v1
; GFX10-NEXT: v_pk_add_u16 v0, v1, v2
; GFX10-NEXT: v_pk_add_u16 v1, v1, v3
; GFX10-LABEL: other_use_mul_mad_v2i16_var:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_lo_u16 v1, v0, v1
; GFX10-NEXT: v_pk_add_u16 v0, v1, v2
; GFX10-NEXT: ds_write_b32 v3, v1
; CHECK-LABEL: known_nnan_extract_vector_elt:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; CHECK-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, v0, v1
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 4, v2
; CHECK-NEXT: v_add_f16_e32 v2, 1.0, v3
; CHECK-LABEL: fma_not_fmaxnm_maybe_nan:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v0
; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v1
; CHECK-NEXT: v_fmaak_f32 v0, v1, v0, 0xff800000
; CHECK-LABEL: use_module:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: ds_write_b16 v0, v0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-LABEL: use_extern_normal:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; CHECK-NEXT: s_getpc_b64 s[6:7]
; CHECK-NEXT: s_add_u32 s6, s6, llvm.amdgcn.dynlds.offset.table@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.dynlds.offset.table@rel32@hi+12
; CHECK-LABEL: use_extern_overalign:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; CHECK-NEXT: s_getpc_b64 s[6:7]
; CHECK-NEXT: s_add_u32 s6, s6, llvm.amdgcn.dynlds.offset.table@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.dynlds.offset.table@rel32@hi+12
; GFX10-LABEL: v_fma:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_legacy_f32 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fma:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_dx9_zero_f32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fma = call float @llvm.amdgcn.fma.legacy(float %a, float %b, float %c)
; GFX10-LABEL: v_fma_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_legacy_f32 v0, 0x41200000, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fma_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_dx9_zero_f32 v0, 0x41200000, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fma = call float @llvm.amdgcn.fma.legacy(float %a, float 10.0, float %c)
; GFX10-LABEL: v_fabs_fma:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_legacy_f32 v0, |v0|, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fabs_fma:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_dx9_zero_f32 v0, |v0|, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fabs.a = call float @llvm.fabs.f32(float %a)
; GFX10-LABEL: v_fneg_fabs_fma:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_legacy_f32 v0, v0, -|v1|, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fneg_fabs_fma:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_dx9_zero_f32 v0, v0, -|v1|, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fabs.b = call float @llvm.fabs.f32(float %b)
; GFX10-LABEL: v_fneg_fma:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_legacy_f32 v0, v0, v1, -v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fneg_fma:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_dx9_zero_f32 v0, v0, v1, -v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.c = fneg float %c
; GFX10-LABEL: v_fma_const_const:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_legacy_f32 v0, v0, 2.0, -1.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fma_const_const:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_dx9_zero_f32 v0, v0, 2.0, -1.0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fma = call float @llvm.amdgcn.fma.legacy(float %a, float 2.0, float -1.0)
; GFX10-NEXT: image_load v[0:4], v5, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm tfe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v6, v4, s[8:9]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_1d_tfe:
; GFX11-NEXT: image_load v[0:4], v5, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm tfe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v6, v4, s[8:9]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
; GFX10-NEXT: image_load v[0:4], v5, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm lwe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v6, v4, s[8:9]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_1d_lwe:
; GFX11-NEXT: image_load v[0:4], v5, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm lwe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v6, v4, s[8:9]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 2, i32 0)
; GFX10-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v7, v4, s[8:9]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_2d_tfe:
; GFX11-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v7, v4, s[8:9]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
; GFX10-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe lwe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v8, v4, s[8:9]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_3d_tfe_lwe:
; GFX11-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe lwe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v8, v4, s[8:9]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 3, i32 0)
; GFX10-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm lwe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v8, v4, s[8:9]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_cube_lwe:
; GFX11-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm lwe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v8, v4, s[8:9]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0)
; GFX10-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm tfe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v7, v4, s[8:9]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_1darray_tfe:
; GFX11-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm tfe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v7, v4, s[8:9]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 1, i32 0)
; GFX10-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm lwe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v8, v4, s[8:9]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_2darray_lwe:
; GFX11-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm lwe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v8, v4, s[8:9]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0)
; GFX10-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm tfe lwe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v8, v4, s[8:9]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_2dmsaa_both:
; GFX11-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm tfe lwe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v8, v4, s[8:9]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0)
; GFX10-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v9, v4, s[8:9]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_2darraymsaa_tfe:
; GFX11-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v9, v4, s[8:9]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
; GFX10-NEXT: image_load_mip v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm lwe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v7, v4, s[8:9]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_mip_1d_lwe:
; GFX11-NEXT: image_load_mip v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm lwe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v7, v4, s[8:9]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.1d.v4f32i32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 2, i32 0)
; GFX10-NEXT: image_load_mip v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v8, v4, s[8:9]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_mip_2d_tfe:
; GFX11-NEXT: image_load_mip v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v8, v4, s[8:9]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
; GFX10-NEXT: image_load v[0:3], v4, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v5, v3, s[8:9]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_1d_tfe_V4_dmask3:
; GFX11-NEXT: image_load v[0:3], v4, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v5, v3, s[8:9]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
; GFX10-NEXT: image_load v[0:2], v3, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm tfe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v4, v2, s[8:9]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_1d_tfe_V4_dmask2:
; GFX11-NEXT: image_load v[0:2], v3, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm tfe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v4, v2, s[8:9]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
; GFX10-NEXT: image_load v[0:1], v2, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm tfe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v3, v1, s[8:9]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_1d_tfe_V4_dmask1:
; GFX11-NEXT: image_load v[0:1], v2, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm tfe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v3, v1, s[8:9]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
; GFX10-NEXT: image_load v[0:1], v2, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm tfe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v3, v1, s[8:9]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: load_1d_tfe_V2_dmask1:
; GFX11-NEXT: image_load v[0:1], v2, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm tfe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v3, v1, s[8:9]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, v2
; GFX10-NEXT: global_store_dword v4, v3, s[12:13]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: image_sample_2d_f16_tfe:
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, v2
; GFX11-NEXT: global_store_b32 v4, v3, s[12:13]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
main_body:
%tex = call {half,i32} @llvm.amdgcn.image.sample.2d.f16i32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0)
; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v6, v4, s[12:13]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_1d_tfe:
; GFX11-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v6, v4, s[12:13]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v6, v4, s[12:13]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_1d_lwe:
; GFX11-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v6, v4, s[12:13]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 2, i32 0)
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, v9
; GFX10-NEXT: global_store_dword v11, v10, s[12:13]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_c_d_o_2darray_V1_tfe:
; GFX11-NEXT: image_sample_c_d_o v[0:1], [v10, v9, v2, v3, v[4:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v11, v1, s[12:13]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call {float,i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off
; GFX11-NEXT: v_mov_b32_e32 v0, v6
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%load = call { <4 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
%data = extractvalue { <4 x i32>, i32 } %load, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off
; GFX11-NEXT: v_mov_b32_e32 v0, v6
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%load = call { <4 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
%data = extractvalue { <4 x float>, i32 } %load, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b96 v[0:1], v[2:4], off
; GFX11-NEXT: v_mov_b32_e32 v0, v5
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%load = call { <3 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
%data = extractvalue { <3 x i32>, i32 } %load, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b96 v[0:1], v[2:4], off
; GFX11-NEXT: v_mov_b32_e32 v0, v5
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%load = call { <3 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
%data = extractvalue { <3 x float>, i32 } %load, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
; GFX11-NEXT: v_mov_b32_e32 v0, v4
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%load = call { <2 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v2i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
%data = extractvalue { <2 x i32>, i32 } %load, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
; GFX11-NEXT: v_mov_b32_e32 v0, v4
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%load = call { <2 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v2f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
%data = extractvalue { <2 x float>, i32 } %load, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
; GFX11-NEXT: v_mov_b32_e32 v0, v3
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%load = call { i32, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
%data = extractvalue { i32, i32 } %load, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
; GFX11-NEXT: v_mov_b32_e32 v0, v3
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%load = call { float, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
%data = extractvalue { float, i32 } %load, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off
; GFX11-NEXT: v_mov_b32_e32 v0, v6
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%load = call { <4 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
%data = extractvalue { <4 x i32>, i32 } %load, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off
; GFX11-NEXT: v_mov_b32_e32 v0, v6
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%load = call { <4 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
%data = extractvalue { <4 x float>, i32 } %load, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b96 v[0:1], v[2:4], off
; GFX11-NEXT: v_mov_b32_e32 v0, v5
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%load = call { <3 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
%data = extractvalue { <3 x i32>, i32 } %load, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b96 v[0:1], v[2:4], off
; GFX11-NEXT: v_mov_b32_e32 v0, v5
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%load = call { <3 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
%data = extractvalue { <3 x float>, i32 } %load, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
; GFX11-NEXT: v_mov_b32_e32 v0, v4
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%load = call { <2 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
%data = extractvalue { <2 x i32>, i32 } %load, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
; GFX11-NEXT: v_mov_b32_e32 v0, v4
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%load = call { <2 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
%data = extractvalue { <2 x float>, i32 } %load, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
; GFX11-NEXT: v_mov_b32_e32 v0, v3
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%load = call { i32, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
%data = extractvalue { i32, i32 } %load, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
; GFX11-NEXT: v_mov_b32_e32 v0, v3
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%load = call { float, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
%data = extractvalue { float, i32 } %load, 0
; GFX11-LABEL: test_llvm_amdgcn_sudot4_uu:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 ; encoding: [0x00,0x40,0x16,0xcc,0x00,0x03,0x0a,0x1c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot4_us:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 neg_lo:[0,1,0] ; encoding: [0x00,0x40,0x16,0xcc,0x00,0x03,0x0a,0x5c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot4_su:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 neg_lo:[1,0,0] ; encoding: [0x00,0x40,0x16,0xcc,0x00,0x03,0x0a,0x3c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot4_ss:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 neg_lo:[1,1,0] ; encoding: [0x00,0x40,0x16,0xcc,0x00,0x03,0x0a,0x7c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot4_uu_clamp:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 clamp ; encoding: [0x00,0xc0,0x16,0xcc,0x00,0x03,0x0a,0x1c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot4_us_clamp:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 neg_lo:[0,1,0] clamp ; encoding: [0x00,0xc0,0x16,0xcc,0x00,0x03,0x0a,0x5c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot4_su_clamp:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 neg_lo:[1,0,0] clamp ; encoding: [0x00,0xc0,0x16,0xcc,0x00,0x03,0x0a,0x3c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot4_ss_clamp:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot4_i32_iu8 v0, v0, v1, v2 neg_lo:[1,1,0] clamp ; encoding: [0x00,0xc0,0x16,0xcc,0x00,0x03,0x0a,0x7c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot8_uu:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x1c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot8_us:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[0,1,0] ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x5c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot8_su:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[1,0,0] ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x3c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot8_ss:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[1,1,0] ; encoding: [0x00,0x40,0x18,0xcc,0x00,0x03,0x0a,0x7c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot8_uu_clamp:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 clamp ; encoding: [0x00,0xc0,0x18,0xcc,0x00,0x03,0x0a,0x1c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot8_us_clamp:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[0,1,0] clamp ; encoding: [0x00,0xc0,0x18,0xcc,0x00,0x03,0x0a,0x5c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot8_su_clamp:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[1,0,0] clamp ; encoding: [0x00,0xc0,0x18,0xcc,0x00,0x03,0x0a,0x3c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_llvm_amdgcn_sudot8_ss_clamp:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: v_dot8_i32_iu4 v0, v0, v1, v2 neg_lo:[1,1,0] clamp ; encoding: [0x00,0xc0,0x18,0xcc,0x00,0x03,0x0a,0x7c]
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
entry:
; GFX11-LABEL: test_frexp_f16_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_frexp_exp_i16_f16_e32 v1, v0
; GFX11-NEXT: v_frexp_mant_f16_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-LABEL: test_frexp_f16_i32_only_use_fract:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_frexp_mant_f16_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_frexp_f16_i32_only_use_exp:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_frexp_exp_i16_f16_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX11-LABEL: test_frexp_v2f16_v2i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX11-NEXT: v_frexp_mant_f16_e32 v2, v0
; GFX11-NEXT: v_frexp_exp_i16_f16_e32 v0, v0
; GFX11-LABEL: test_frexp_v2f16_v2i32_only_use_fract:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX11-NEXT: v_frexp_mant_f16_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-LABEL: test_frexp_v2f16_v2i32_only_use_exp:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX11-NEXT: v_frexp_exp_i16_f16_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-LABEL: test_frexp_f16_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_frexp_mant_f16_e32 v2, v0
; GFX11-NEXT: v_frexp_exp_i16_f16_e32 v1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-LABEL: test_frexp_f16_i16_only_use_fract:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_frexp_mant_f16_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_frexp_f16_i16_only_use_exp:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_frexp_exp_i16_f16_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_frexp_f32_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v0
; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-LABEL: test_frexp_f32_i32_only_use_fract:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_frexp_mant_f32_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_frexp_f32_i32_only_use_exp:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_frexp_v2f32_v2i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_frexp_mant_f32_e32 v4, v0
; GFX11-NEXT: v_frexp_mant_f32_e32 v5, v1
; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v2, v0
; GFX11-LABEL: test_frexp_v2f32_v2i32_only_use_fract:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_frexp_mant_f32_e32 v0, v0
; GFX11-NEXT: v_frexp_mant_f32_e32 v1, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: test_frexp_v2f32_v2i32_only_use_exp:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: test_frexp_f64_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_frexp_mant_f64_e32 v[3:4], v[0:1]
; GFX11-NEXT: v_frexp_exp_i32_f64_e32 v2, v[0:1]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-LABEL: test_frexp_f64_i32_only_use_fract:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_frexp_mant_f64_e32 v[0:1], v[0:1]
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call { double, i32 } @llvm.frexp.f64.i32(double %a)
; GFX11-LABEL: test_frexp_f64_i32_only_use_exp:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_frexp_exp_i32_f64_e32 v0, v[0:1]
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call { double, i32 } @llvm.frexp.f64.i32(double %a)
; GFX11-LABEL: test_frexp_v2f64_v2i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_frexp_mant_f64_e32 v[8:9], v[0:1]
; GFX11-NEXT: v_frexp_mant_f64_e32 v[6:7], v[2:3]
; GFX11-NEXT: v_frexp_exp_i32_f64_e32 v4, v[0:1]
; GFX11-LABEL: test_frexp_v2f64_v2i32_only_use_fract:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_frexp_mant_f64_e32 v[0:1], v[0:1]
; GFX11-NEXT: v_frexp_mant_f64_e32 v[2:3], v[2:3]
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: test_frexp_v2f64_v2i32_only_use_exp:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_frexp_exp_i32_f64_e32 v0, v[0:1]
; GFX11-NEXT: v_frexp_exp_i32_f64_e32 v1, v[2:3]
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: zeromask_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_mov_b32_e32 v0, 0
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
;
; GFX11CHECK-LABEL: zeromask_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_mov_b32_e32 v0, 0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
%1 = call i1 @llvm.is.fpclass.f16(half %x, i32 0)
; GFX10SELDAG-LABEL: allflags_f16:
; GFX10SELDAG: ; %bb.0:
; GFX10SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10SELDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, 1
; GFX10SELDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX10GLISEL-LABEL: allflags_f16:
; GFX10GLISEL: ; %bb.0:
; GFX10GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10GLISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10GLISEL-NEXT: v_mov_b32_e32 v0, -1
; GFX10GLISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-LABEL: allflags_f16:
; GFX11SELDAG: ; %bb.0:
; GFX11SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11SELDAG-NEXT: v_mov_b32_e32 v0, 1
; GFX11SELDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-LABEL: allflags_f16:
; GFX11GLISEL: ; %bb.0:
; GFX11GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11GLISEL-NEXT: v_mov_b32_e32 v0, -1
; GFX11GLISEL-NEXT: s_setpc_b64 s[30:31]
%1 = call i1 @llvm.is.fpclass.f16(half %x, i32 1023) ; 0x3ff
; GFX10CHECK-LABEL: snan_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 1
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: snan_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 1
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: qnan_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 2
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: qnan_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 2
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: posinf_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x200
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: posinf_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x200
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: neginf_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 4
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: neginf_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 4
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: posnormal_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x100
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: posnormal_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x100
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: negnormal_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 8
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: negnormal_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 8
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: possubnormal_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x80
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: possubnormal_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x80
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: negsubnormal_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 16
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: negsubnormal_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 16
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: poszero_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 64
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: poszero_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 64
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: negzero_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 32
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: negzero_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 32
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: posfinite_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x1c0
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: posfinite_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x1c0
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: negfinite_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 56
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: negfinite_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 56
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: isnan_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 3
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: isnan_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 3
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: not_isnan_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x3fc
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: not_isnan_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x3fc
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: isnan_v2f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_mov_b32_e32 v1, 3
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 3
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
; GFX11CHECK-LABEL: isnan_v2f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 3
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX10SELDAG-LABEL: isnan_v3f16:
; GFX10SELDAG: ; %bb.0:
; GFX10SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10SELDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10SELDAG-NEXT: v_cmp_u_f16_sdwa s4, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
; GFX10SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v0
; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4
; GFX10GLISEL-LABEL: isnan_v3f16:
; GFX10GLISEL: ; %bb.0:
; GFX10GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10GLISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10GLISEL-NEXT: v_mov_b32_e32 v2, 3
; GFX10GLISEL-NEXT: v_cmp_class_f16_e64 s4, v0, 3
; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4
; GFX11SELDAG-LABEL: isnan_v3f16:
; GFX11SELDAG: ; %bb.0:
; GFX11SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11SELDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX11SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v0
; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-LABEL: isnan_v3f16:
; GFX11GLISEL: ; %bb.0:
; GFX11GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11GLISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX11GLISEL-NEXT: v_cmp_class_f16_e64 s0, v0, 3
; GFX11GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX10SELDAG-LABEL: isnan_v4f16:
; GFX10SELDAG: ; %bb.0:
; GFX10SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10SELDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10SELDAG-NEXT: v_mov_b32_e32 v2, 3
; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s5, v0, 3
; GFX10SELDAG-NEXT: v_cmp_class_f16_sdwa s4, v1, v2 src0_sel:WORD_1 src1_sel:DWORD
; GFX10GLISEL-LABEL: isnan_v4f16:
; GFX10GLISEL: ; %bb.0:
; GFX10GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10GLISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10GLISEL-NEXT: v_mov_b32_e32 v3, 3
; GFX10GLISEL-NEXT: v_cmp_class_f16_e64 s4, v0, 3
; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4
; GFX11CHECK-LABEL: isnan_v4f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 3
; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v4, 16, v1
; GFX10CHECK-LABEL: isnan_f16_strictfp:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 3
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: isnan_f16_strictfp:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 3
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: isinf_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x204
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: isinf_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x204
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: isfinite_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x1f8
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: isfinite_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x1f8
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: issubnormal_or_zero_f16:
; GFX10CHECK: ; %bb.0: ; %entry
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0xf0
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: issubnormal_or_zero_f16:
; GFX11CHECK: ; %bb.0: ; %entry
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0xf0
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: not_issubnormal_or_zero_f16:
; GFX10CHECK: ; %bb.0: ; %entry
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x30f
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: not_issubnormal_or_zero_f16:
; GFX11CHECK: ; %bb.0: ; %entry
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x30f
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: isnormal_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x108
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: isnormal_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x108
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: not_isnormal_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x2f7
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: not_isnormal_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x2f7
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: not_is_plus_normal_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x2ff
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: not_is_plus_normal_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x2ff
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: not_is_neg_normal_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x3f7
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: not_is_neg_normal_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x3f7
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: issubnormal_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x90
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: issubnormal_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x90
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: not_issubnormal_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x36f
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: not_issubnormal_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x36f
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: iszero_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x60
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: iszero_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x60
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: not_iszero_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x39f
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: not_iszero_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x39f
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: ispositive_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x3c0
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: ispositive_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x3c0
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: not_ispositive_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 63
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: not_ispositive_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 63
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: isnegative_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 60
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: isnegative_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 60
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: not_isnegative_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x3c3
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: not_isnegative_f16:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x3c3
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: iszero_or_nan_f16:
; GFX10CHECK: ; %bb.0: ; %entry
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x63
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: iszero_or_nan_f16:
; GFX11CHECK: ; %bb.0: ; %entry
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x63
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: iszero_or_nan_f_daz:
; GFX10CHECK: ; %bb.0: ; %entry
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x63
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: iszero_or_nan_f_daz:
; GFX11CHECK: ; %bb.0: ; %entry
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x63
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: iszero_or_nan_f_maybe_daz:
; GFX10CHECK: ; %bb.0: ; %entry
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x63
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: iszero_or_nan_f_maybe_daz:
; GFX11CHECK: ; %bb.0: ; %entry
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x63
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: not_iszero_or_nan_f16:
; GFX10CHECK: ; %bb.0: ; %entry
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x39c
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: not_iszero_or_nan_f16:
; GFX11CHECK: ; %bb.0: ; %entry
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x39c
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: not_iszero_or_nan_f_daz:
; GFX10CHECK: ; %bb.0: ; %entry
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x39c
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: not_iszero_or_nan_f_daz:
; GFX11CHECK: ; %bb.0: ; %entry
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x39c
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: not_iszero_or_nan_f_maybe_daz:
; GFX10CHECK: ; %bb.0: ; %entry
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x39c
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: not_iszero_or_nan_f_maybe_daz:
; GFX11CHECK: ; %bb.0: ; %entry
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x39c
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: iszero_or_qnan_f16:
; GFX10CHECK: ; %bb.0: ; %entry
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x62
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: iszero_or_qnan_f16:
; GFX11CHECK: ; %bb.0: ; %entry
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x62
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: iszero_or_snan_f16:
; GFX10CHECK: ; %bb.0: ; %entry
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x61
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: iszero_or_snan_f16:
; GFX11CHECK: ; %bb.0: ; %entry
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x61
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: not_iszero_or_qnan_f16:
; GFX10CHECK: ; %bb.0: ; %entry
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x39d
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: not_iszero_or_qnan_f16:
; GFX11CHECK: ; %bb.0: ; %entry
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x39d
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: not_iszero_or_snan_f16:
; GFX10CHECK: ; %bb.0: ; %entry
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x39e
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: not_iszero_or_snan_f16:
; GFX11CHECK: ; %bb.0: ; %entry
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x39e
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: isinf_or_nan_f16:
; GFX10CHECK: ; %bb.0: ; %entry
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x207
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: isinf_or_nan_f16:
; GFX11CHECK: ; %bb.0: ; %entry
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x207
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: not_isinf_or_nan_f16:
; GFX10CHECK: ; %bb.0: ; %entry
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x1f8
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: not_isinf_or_nan_f16:
; GFX11CHECK: ; %bb.0: ; %entry
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x1f8
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: isfinite_or_nan_f:
; GFX10CHECK: ; %bb.0: ; %entry
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x1fb
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: isfinite_or_nan_f:
; GFX11CHECK: ; %bb.0: ; %entry
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x1fb
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: not_isfinite_or_nan_f:
; GFX10CHECK: ; %bb.0: ; %entry
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 0x204
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: not_isfinite_or_nan_f:
; GFX11CHECK: ; %bb.0: ; %entry
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 0x204
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX10CHECK-LABEL: isnan_f32:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: isnan_f32:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3
; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX10CHECK-LABEL: isnan_v2f32:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3
; GFX11CHECK-LABEL: isnan_v2f32:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3
; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX10CHECK-LABEL: isnan_v3f32:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3
; GFX11CHECK-LABEL: isnan_v3f32:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3
; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX10CHECK-LABEL: isnan_v4f32:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3
; GFX11CHECK-LABEL: isnan_v4f32:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3
; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX10CHECK-LABEL: isnan_v5f32:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3
; GFX11CHECK-LABEL: isnan_v5f32:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3
; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX10CHECK-LABEL: isnan_v6f32:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3
; GFX11CHECK-LABEL: isnan_v6f32:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3
; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX10CHECK-LABEL: isnan_v7f32:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3
; GFX11CHECK-LABEL: isnan_v7f32:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3
; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX10CHECK-LABEL: isnan_v8f32:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3
; GFX11CHECK-LABEL: isnan_v8f32:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3
; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX10CHECK-LABEL: isnan_v16f32:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v1, 3
; GFX11CHECK-LABEL: isnan_v16f32:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3
; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX10CHECK-LABEL: isnan_f64:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s4, v[0:1], 3
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: isnan_f64:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 3
; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX10CHECK-LABEL: isnan_f32_strictfp:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 3
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: isnan_f32_strictfp:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 3
; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX10CHECK-LABEL: isnan_f64_strictfp:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s4, v[0:1], 3
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: isnan_f64_strictfp:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 3
; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX10CHECK-LABEL: isinf_f32:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 0x204
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: isinf_f32:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 0x204
; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX10CHECK-LABEL: isinf_f64:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s4, v[0:1], 0x204
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: isinf_f64:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 0x204
; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX10CHECK-LABEL: isfinite_f32:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 0x1f8
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: isfinite_f32:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 0x1f8
; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX10CHECK-LABEL: isfinite_f64:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s4, v[0:1], 0x1f8
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: isfinite_f64:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 0x1f8
; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX10CHECK-LABEL: isnormal_f32:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 0x108
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: isnormal_f32:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 0x108
; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX10CHECK-LABEL: isnormal_v2f64:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s4, v[0:1], 0x108
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s4, v[2:3], 0x108
; GFX11CHECK-LABEL: isnormal_v2f64:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 0x108
; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX10CHECK-LABEL: issubnormal_f32:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 0x90
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: issubnormal_f32:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 0x90
; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX10CHECK-LABEL: iszero_f32:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s4, v0, 0x60
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
; GFX11CHECK-LABEL: iszero_f32:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s0, v0, 0x60
; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11-LABEL: test_ldexp_f32_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_ldexp_f32 v0, v2, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.ldexp.f32.i32(float %a, i32 %b)
; GFX11-LABEL: test_ldexp_v2f32_v2i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_ldexp_f32 v0, v2, v4
; GFX11-NEXT: v_ldexp_f32 v1, v3, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: test_ldexp_v3f32_v3i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_ldexp_f32 v0, v2, v5
; GFX11-NEXT: v_ldexp_f32 v1, v3, v6
; GFX11-NEXT: v_ldexp_f32 v2, v4, v7
; GFX11-LABEL: test_ldexp_v4f32_v4i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_ldexp_f32 v0, v2, v6
; GFX11-NEXT: v_ldexp_f32 v1, v3, v7
; GFX11-NEXT: v_ldexp_f32 v2, v4, v8
}
define double @test_ldexp_f64_i32(double %a, i32 %b) {
-; GFX6-LABEL: test_ldexp_f64_i32:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: test_ldexp_f64_i32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: test_ldexp_f64_i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: test_ldexp_f64_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: test_ldexp_f64_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GCN-NEXT: s_setpc_b64 s[30:31]
%result = call double @llvm.ldexp.f64.i32(double %a, i32 %b)
ret double %result
}
define <2 x double> @test_ldexp_v2f64_v2i32(<2 x double> %a, <2 x i32> %b) {
-; GFX6-LABEL: test_ldexp_v2f64_v2i32:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
-; GFX6-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: test_ldexp_v2f64_v2i32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
-; GFX8-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: test_ldexp_v2f64_v2i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
-; GFX9-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: test_ldexp_v2f64_v2i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
-; GFX11-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: test_ldexp_v2f64_v2i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
+; GCN-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
+; GCN-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double> %a, <2 x i32> %b)
ret <2 x double> %result
}
; GFX11-SDAG-LABEL: test_ldexp_f16_i8:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX11-GISEL-LABEL: test_ldexp_f16_i8:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-LABEL: test_ldexp_f16_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: test_ldexp_f16_i32:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
; GFX11-GISEL-LABEL: test_ldexp_f16_i32:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
; GFX11-SDAG-LABEL: test_ldexp_v2f16_v2i32:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX11-SDAG-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
; GFX11-GISEL-LABEL: test_ldexp_v2f16_v2i32:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-SDAG-LABEL: test_ldexp_v2f16_v2i16:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX11-GISEL-LABEL: test_ldexp_v2f16_v2i16:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
declare <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double>, <2 x i32>) #0
attributes #0 = { nounwind readnone }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GCN: {{.*}}
; GFX1100-SDAG-LABEL: v_log_f32:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log_f32:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-SDAG-LABEL: v_log_fabs_f32:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
; GFX1100-GISEL-LABEL: v_log_fabs_f32:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
; GFX1100-SDAG-LABEL: v_log_fneg_fabs_f32:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e64 s0, 0x80800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
; GFX1100-GISEL-LABEL: v_log_fneg_fabs_f32:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -|v0|
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
; GFX1100-SDAG-LABEL: v_log_fneg_f32:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0x80800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log_fneg_f32:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
; GFX1100-SDAG-LABEL: v_log_f32_fast:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log_f32_fast:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX1100-LABEL: v_log_f32_unsafe_math_attr:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX1100-LABEL: v_log_f32_approx_fn_attr:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX1100-SDAG-LABEL: v_log_f32_ninf:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log_f32_ninf:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-SDAG-LABEL: v_log_f32_afn:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log_f32_afn:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX1100-LABEL: v_log_f32_afn_daz:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX1100-SDAG-LABEL: v_log_f32_afn_dynamic:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log_f32_afn_dynamic:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX1100-SDAG-LABEL: v_fabs_log_f32_afn:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
; GFX1100-GISEL-LABEL: v_fabs_log_f32_afn:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_log_f32_e64 v0, |v0|
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX1100-LABEL: v_log_f32_daz:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX1100-SDAG-LABEL: v_log_f32_nnan:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log_f32_nnan:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-LABEL: v_log_f32_nnan_daz:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX1100-SDAG-LABEL: v_log_f32_nnan_dynamic:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log_f32_nnan_dynamic:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-LABEL: v_log_f32_ninf_daz:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX1100-SDAG-LABEL: v_log_f32_ninf_dynamic:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log_f32_ninf_dynamic:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-SDAG-LABEL: v_log_f32_nnan_ninf:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log_f32_nnan_ninf:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-LABEL: v_log_f32_nnan_ninf_daz:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX1100-SDAG-LABEL: v_log_f32_nnan_ninf_dynamic:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log_f32_nnan_ninf_dynamic:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-LABEL: v_log_f32_fast_daz:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX1100-SDAG-LABEL: v_log_f32_dynamic_mode:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log_f32_dynamic_mode:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-SDAG-LABEL: v_log_f32_undef:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, s0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX1100-GISEL-LABEL: v_log_f32_undef:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, 0x4f800000, s0
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v1, s0, 1.0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x800000, s0
; GFX1100-SDAG-LABEL: v_log_f32_0:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, 0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, 0xc2000000, v0
; GFX1100-GISEL-LABEL: v_log_f32_0:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, 0x4f800000, 0
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v1, 0, 1.0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x800000, 0
; GFX1100-LABEL: v_log_f32_from_fpext_f16:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-LABEL: v_log_f32_from_fpext_math_f16:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_add_f16_e32 v0, v0, v1
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX1100-SDAG-LABEL: v_log_f32_from_fpext_bf16:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log_f32_from_fpext_bf16:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-LABEL: v_log_f16:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f16_e32 v0, v0
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0
; GFX1100-LABEL: v_log_fabs_f16:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f16_e64 v0, |v0|
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0
; GFX1100-LABEL: v_log_fneg_fabs_f16:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f16_e64 v0, -|v0|
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0
; GFX1100-LABEL: v_log_fneg_f16:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f16_e64 v0, -v0
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0
; GFX1100-LABEL: v_log_f16_fast:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f16_e32 v0, v0
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0
; GFX1100-LABEL: v_log_v2f16:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1100-NEXT: v_log_f16_e32 v0, v0
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX1100-SDAG-LABEL: v_log_fabs_v2f16:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX1100-GISEL-LABEL: v_log_fabs_v2f16:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1100-SDAG-LABEL: v_log_fneg_fabs_v2f16:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, -|v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX1100-GISEL-LABEL: v_log_fneg_fabs_v2f16:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1100-SDAG-LABEL: v_log_fneg_v2f16:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, -v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX1100-GISEL-LABEL: v_log_fneg_v2f16:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1100-LABEL: v_log_v2f16_fast:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1100-NEXT: v_log_f16_e32 v0, v0
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX1100-LABEL: v_log_v3f16:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX1100-NEXT: v_log_f16_e32 v0, v0
; GFX1100-NEXT: v_log_f16_e32 v1, v1
; GFX1100-LABEL: v_log_v3f16_fast:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX1100-NEXT: v_log_f16_e32 v0, v0
; GFX1100-NEXT: v_log_f16_e32 v1, v1
; GFX1100-SDAG-LABEL: v_log_v4f16:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX1100-SDAG-NEXT: v_log_f16_e32 v1, v1
; GFX1100-GISEL-LABEL: v_log_v4f16:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0
; GFX1100-SDAG-LABEL: v_log_v4f16_fast:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX1100-SDAG-NEXT: v_log_f16_e32 v1, v1
; GFX1100-GISEL-LABEL: v_log_v4f16_fast:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0
; GFX1100-SDAG-LABEL: v_log10_f32:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log10_f32:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-SDAG-LABEL: v_log10_fabs_f32:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
; GFX1100-GISEL-LABEL: v_log10_fabs_f32:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
; GFX1100-SDAG-LABEL: v_log10_fneg_fabs_f32:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e64 s0, 0x80800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
; GFX1100-GISEL-LABEL: v_log10_fneg_fabs_f32:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -|v0|
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
; GFX1100-SDAG-LABEL: v_log10_fneg_f32:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0x80800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log10_fneg_f32:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
; GFX1100-SDAG-LABEL: v_log10_f32_fast:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log10_f32_fast:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX1100-LABEL: v_log10_f32_unsafe_math_attr:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX1100-LABEL: v_log10_f32_approx_fn_attr:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX1100-SDAG-LABEL: v_log10_f32_ninf:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log10_f32_ninf:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-SDAG-LABEL: v_log10_f32_afn:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log10_f32_afn:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX1100-LABEL: v_log10_f32_afn_daz:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX1100-SDAG-LABEL: v_log10_f32_afn_dynamic:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log10_f32_afn_dynamic:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX1100-SDAG-LABEL: v_fabs_log10_f32_afn:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
; GFX1100-GISEL-LABEL: v_fabs_log10_f32_afn:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_log_f32_e64 v0, |v0|
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX1100-LABEL: v_log10_f32_daz:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX1100-SDAG-LABEL: v_log10_f32_nnan:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log10_f32_nnan:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-LABEL: v_log10_f32_nnan_daz:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX1100-SDAG-LABEL: v_log10_f32_nnan_dynamic:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log10_f32_nnan_dynamic:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-LABEL: v_log10_f32_ninf_daz:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX1100-SDAG-LABEL: v_log10_f32_ninf_dynamic:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log10_f32_ninf_dynamic:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-SDAG-LABEL: v_log10_f32_nnan_ninf:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log10_f32_nnan_ninf:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-LABEL: v_log10_f32_nnan_ninf_daz:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX1100-SDAG-LABEL: v_log10_f32_nnan_ninf_dynamic:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log10_f32_nnan_ninf_dynamic:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-LABEL: v_log10_f32_fast_daz:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX1100-SDAG-LABEL: v_log10_f32_dynamic_mode:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log10_f32_dynamic_mode:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-SDAG-LABEL: v_log10_f32_undef:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, s0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX1100-GISEL-LABEL: v_log10_f32_undef:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, 0x4f800000, s0
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v1, s0, 1.0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x800000, s0
; GFX1100-SDAG-LABEL: v_log10_f32_0:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, 0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, 0xc2000000, v0
; GFX1100-GISEL-LABEL: v_log10_f32_0:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, 0x4f800000, 0
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v1, 0, 1.0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x800000, 0
; GFX1100-LABEL: v_log10_f32_from_fpext_f16:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-LABEL: v_log10_f32_from_fpext_math_f16:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_add_f16_e32 v0, v0, v1
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX1100-SDAG-LABEL: v_log10_f32_from_fpext_bf16:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log10_f32_from_fpext_bf16:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-LABEL: v_log10_f16:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f16_e32 v0, v0
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f16_e32 v0, 0x34d1, v0
; GFX1100-LABEL: v_log10_fabs_f16:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f16_e64 v0, |v0|
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f16_e32 v0, 0x34d1, v0
; GFX1100-LABEL: v_log10_fneg_fabs_f16:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f16_e64 v0, -|v0|
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f16_e32 v0, 0x34d1, v0
; GFX1100-LABEL: v_log10_fneg_f16:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f16_e64 v0, -v0
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f16_e32 v0, 0x34d1, v0
; GFX1100-LABEL: v_log10_f16_fast:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f16_e32 v0, v0
; GFX1100-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-NEXT: v_mul_f16_e32 v0, 0x34d1, v0
; GFX1100-LABEL: v_log10_v2f16:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1100-NEXT: v_log_f16_e32 v0, v0
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX1100-SDAG-LABEL: v_log10_fabs_v2f16:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX1100-GISEL-LABEL: v_log10_fabs_v2f16:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1100-SDAG-LABEL: v_log10_fneg_fabs_v2f16:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, -|v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX1100-GISEL-LABEL: v_log10_fneg_fabs_v2f16:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1100-SDAG-LABEL: v_log10_fneg_v2f16:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, -v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX1100-GISEL-LABEL: v_log10_fneg_v2f16:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1100-LABEL: v_log10_v2f16_fast:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1100-NEXT: v_log_f16_e32 v0, v0
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX1100-LABEL: v_log10_v3f16:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX1100-NEXT: v_log_f16_e32 v0, v0
; GFX1100-NEXT: v_log_f16_e32 v1, v1
; GFX1100-LABEL: v_log10_v3f16_fast:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX1100-NEXT: v_log_f16_e32 v0, v0
; GFX1100-NEXT: v_log_f16_e32 v1, v1
; GFX1100-SDAG-LABEL: v_log10_v4f16:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX1100-SDAG-NEXT: v_log_f16_e32 v1, v1
; GFX1100-GISEL-LABEL: v_log10_v4f16:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0
; GFX1100-SDAG-LABEL: v_log10_v4f16_fast:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX1100-SDAG-NEXT: v_log_f16_e32 v1, v1
; GFX1100-GISEL-LABEL: v_log10_v4f16_fast:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0
; GFX1100-SDAG-LABEL: v_log2_f32:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log2_f32:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-SDAG-LABEL: v_log2_fabs_f32:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
; GFX1100-GISEL-LABEL: v_log2_fabs_f32:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
; GFX1100-SDAG-LABEL: v_log2_fneg_fabs_f32:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e64 s0, 0x80800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
; GFX1100-GISEL-LABEL: v_log2_fneg_fabs_f32:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -|v0|
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
; GFX1100-SDAG-LABEL: v_log2_fneg_f32:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0x80800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log2_fneg_f32:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
; GFX1100-LABEL: v_log2_f32_fast:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_log2_f32_unsafe_math_attr:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_log2_f32_approx_fn_attr:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log2_f32_ninf:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log2_f32_ninf:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-LABEL: v_log2_f32_afn:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_log2_f32_afn_daz:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_log2_f32_afn_dynamic:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_fabs_log2_f32_afn:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e64 v0, |v0|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_log2_f32_daz:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log2_f32_nnan:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log2_f32_nnan:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-LABEL: v_log2_f32_nnan_daz:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log2_f32_nnan_dynamic:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log2_f32_nnan_dynamic:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-LABEL: v_log2_f32_ninf_daz:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log2_f32_ninf_dynamic:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log2_f32_ninf_dynamic:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-SDAG-LABEL: v_log2_f32_nnan_ninf:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log2_f32_nnan_ninf:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-LABEL: v_log2_f32_nnan_ninf_daz:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log2_f32_nnan_ninf_dynamic:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log2_f32_nnan_ninf_dynamic:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-LABEL: v_log2_f32_fast_daz:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log2_f32_dynamic_mode:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log2_f32_dynamic_mode:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-SDAG-LABEL: v_log2_f32_undef:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, s0
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log2_f32_undef:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, 0x4f800000, s0
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v1, s0, 1.0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x800000, s0
; GFX1100-SDAG-LABEL: v_log2_f32_0:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, 0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, 0xc2000000, v0
; GFX1100-GISEL-LABEL: v_log2_f32_0:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 0xff800000
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_log2_f32_from_fpext_f16:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-NEXT: v_log_f32_e32 v0, v0
; GFX1100-LABEL: v_log2_f32_from_fpext_math_f16:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_add_f16_e32 v0, v0, v1
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX1100-SDAG-LABEL: v_log2_f32_from_fpext_bf16:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-LABEL: v_log2_f32_from_fpext_bf16:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-LABEL: v_log2_f16:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f16_e32 v0, v0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_log2_fabs_f16:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f16_e64 v0, |v0|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_log2_fneg_fabs_f16:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f16_e64 v0, -|v0|
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_log2_fneg_f16:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f16_e64 v0, -v0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_log2_f16_fast:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_log_f16_e32 v0, v0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_log2_v2f16:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1100-NEXT: v_log_f16_e32 v0, v0
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-LABEL: v_log2_fabs_v2f16:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-LABEL: v_log2_fabs_v2f16:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1100-SDAG-LABEL: v_log2_fneg_fabs_v2f16:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, -|v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-LABEL: v_log2_fneg_fabs_v2f16:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1100-SDAG-LABEL: v_log2_fneg_v2f16:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, -v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-LABEL: v_log2_fneg_v2f16:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1100-LABEL: v_log2_v2f16_fast:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX1100-NEXT: v_log_f16_e32 v0, v0
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-LABEL: v_log2_v3f16:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX1100-NEXT: v_log_f16_e32 v0, v0
; GFX1100-NEXT: v_log_f16_e32 v1, v1
; GFX1100-LABEL: v_log2_v3f16_fast:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX1100-NEXT: v_log_f16_e32 v0, v0
; GFX1100-NEXT: v_log_f16_e32 v1, v1
; GFX1100-SDAG-LABEL: v_log2_v4f16:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX1100-SDAG-NEXT: v_log_f16_e32 v1, v1
; GFX1100-GISEL-LABEL: v_log2_v4f16:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0
; GFX1100-SDAG-LABEL: v_log2_v4f16_fast:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX1100-SDAG-NEXT: v_log_f16_e32 v1, v1
; GFX1100-GISEL-LABEL: v_log2_v4f16_fast:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0
; GFX10-LABEL: umulo_i64_v_v:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v4, v0
; GFX10-NEXT: v_mov_b32_e32 v5, v1
; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v4, v2, 0
; GFX11-LABEL: umulo_i64_v_v:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v4, v0 :: v_dual_mov_b32 v5, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v4, v2, 0
; GFX10-LABEL: smulo_i64_v_v:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v4, v0
; GFX10-NEXT: v_mov_b32_e32 v5, v1
; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v4, v2, 0
; GFX11-LABEL: smulo_i64_v_v:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v4, v0 :: v_dual_mov_b32 v5, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v4, v2, 0
; GFX10-LABEL: smulo_i64_v_4:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshlrev_b64 v[4:5], 2, v[0:1]
; GFX10-NEXT: v_alignbit_b32 v3, v1, v0, 30
; GFX10-NEXT: v_ashrrev_i64 v[5:6], 2, v[4:5]
; GFX11-LABEL: smulo_i64_v_4:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshlrev_b64 v[4:5], 2, v[0:1]
; GFX11-NEXT: v_alignbit_b32 v3, v1, v0, 30
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-LABEL: umulo_i64_v_4:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v7, 0x3fffffff, v1
; GFX10-NEXT: v_mov_b32_e32 v6, v0
; GFX10-NEXT: v_lshlrev_b64 v[4:5], 2, v[0:1]
; GFX11-LABEL: umulo_i64_v_4:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v6, v0 :: v_dual_and_b32 v7, 0x3fffffff, v1
; GFX11-NEXT: v_lshlrev_b64 v[4:5], 2, v[0:1]
; GFX11-NEXT: v_alignbit_b32 v3, v1, v0, 30
; GFX11-LABEL: v_powi_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-NEXT: v_cvt_f32_i32_e32 v1, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-LABEL: v_powi_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_log_f32_e32 v0, v0
; GFX11-NEXT: v_cvt_f32_i32_e32 v1, v1
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-LABEL: v_powi_0_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v0, 1.0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32.i32(float %l, i32 0)
; GFX11-LABEL: v_powi_1_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32.i32(float %l, i32 1)
ret float %res
; GFX11-LABEL: v_powi_neg1_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_rcp_f32_e32 v2, v1
; GFX11-LABEL: v_powi_2_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32.i32(float %l, i32 2)
; GFX11-LABEL: v_powi_neg2_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0
; GFX11-LABEL: v_powi_4_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
; GFX11-LABEL: v_powi_8_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
; GFX11-LABEL: v_powi_16_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
; GFX11-LABEL: v_powi_128_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
; GFX11-LABEL: v_powi_neg128_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
# RUN: llc -march=amdgcn -mcpu=gfx802 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
-# RUN: llc -march=amdgcn -mcpu=gfx1010 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX10 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
# GCN-LABEL: {{^}}lo_to_lo:
# GCN: v_mov_b32_sdwa v1, v0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
# GCN-LABEL: {{^}}lo_to_lo_samereg:
# GCN: s_waitcnt
-# GFX10-NEXT: s_waitcnt_vscnt
# GCN-NEXT: s_endpgm
name: lo_to_lo_samereg
tracksRegLiveness: true
# GCN-LABEL: {{^}}hi_to_hi_samereg:
# GCN: s_waitcnt
-# GFX10-NEXT: s_waitcnt_vscnt
# GCN-NEXT: s_endpgm
name: hi_to_hi_samereg
tracksRegLiveness: true
# NB: copy of undef just killed instead of expansion
# GCN-LABEL: {{^}}lo_to_lo_undef:
# GCN: s_waitcnt
-# GFX10-NEXT: s_waitcnt_vscnt
# GCN-NEXT: v_mov_b32_e32 v2, v1
# GCN-NEXT: s_endpgm
name: lo_to_lo_undef
; GFX10-LABEL: load_lds_v4i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_read_b128 v[0:3], v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: load_lds_v4i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_b128 v[0:3], v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: load_lds_v4i32_align1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_read_u8 v1, v0
; GFX10-NEXT: ds_read_u8 v2, v0 offset:1
; GFX10-NEXT: ds_read_u8 v3, v0 offset:2
; GFX11-LABEL: load_lds_v4i32_align1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_u8 v1, v0
; GFX11-NEXT: ds_load_u8 v2, v0 offset:1
; GFX11-NEXT: ds_load_u8 v3, v0 offset:2
; GFX10-LABEL: load_lds_v4i32_align2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_read_u16 v1, v0
; GFX10-NEXT: ds_read_u16 v2, v0 offset:2
; GFX10-NEXT: ds_read_u16 v3, v0 offset:4
; GFX11-LABEL: load_lds_v4i32_align2:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_u16 v1, v0
; GFX11-NEXT: ds_load_u16 v2, v0 offset:2
; GFX11-NEXT: ds_load_u16 v3, v0 offset:4
; GFX10-LABEL: load_lds_v4i32_align4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v2, v0
; GFX10-NEXT: ds_read2_b32 v[0:1], v0 offset1:1
; GFX10-NEXT: ds_read2_b32 v[2:3], v2 offset0:2 offset1:3
; GFX11-LABEL: load_lds_v4i32_align4:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v2, v0
; GFX11-NEXT: ds_load_2addr_b32 v[0:1], v0 offset1:1
; GFX11-NEXT: ds_load_2addr_b32 v[2:3], v2 offset0:2 offset1:3
; GFX10-LABEL: load_lds_v4i32_align8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_read2_b64 v[0:3], v0 offset1:1
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: load_lds_v4i32_align8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_2addr_b64 v[0:3], v0 offset1:1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: load_lds_v4i32_align16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_read_b128 v[0:3], v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: load_lds_v4i32_align16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_b128 v[0:3], v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: load_lds_v3i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_read_b96 v[0:2], v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: load_lds_v3i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_b96 v[0:2], v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: load_lds_v3i32_align1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_read_u8 v1, v0
; GFX10-NEXT: ds_read_u8 v2, v0 offset:1
; GFX10-NEXT: ds_read_u8 v3, v0 offset:2
; GFX11-LABEL: load_lds_v3i32_align1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_u8 v1, v0
; GFX11-NEXT: ds_load_u8 v2, v0 offset:1
; GFX11-NEXT: ds_load_u8 v3, v0 offset:2
; GFX10-LABEL: load_lds_v3i32_align2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_read_u16 v1, v0
; GFX10-NEXT: ds_read_u16 v2, v0 offset:2
; GFX10-NEXT: ds_read_u16 v3, v0 offset:4
; GFX11-LABEL: load_lds_v3i32_align2:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_u16 v1, v0
; GFX11-NEXT: ds_load_u16 v2, v0 offset:2
; GFX11-NEXT: ds_load_u16 v3, v0 offset:4
; GFX10-LABEL: load_lds_v3i32_align4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v2, v0
; GFX10-NEXT: ds_read2_b32 v[0:1], v0 offset1:1
; GFX10-NEXT: ds_read_b32 v2, v2 offset:8
; GFX11-LABEL: load_lds_v3i32_align4:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v2, v0
; GFX11-NEXT: ds_load_2addr_b32 v[0:1], v0 offset1:1
; GFX11-NEXT: ds_load_b32 v2, v2 offset:8
; GFX10-LABEL: load_lds_v3i32_align8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v2, v0
; GFX10-NEXT: ds_read_b64 v[0:1], v0
; GFX10-NEXT: ds_read_b32 v2, v2 offset:8
; GFX11-LABEL: load_lds_v3i32_align8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v2, v0
; GFX11-NEXT: ds_load_b64 v[0:1], v0
; GFX11-NEXT: ds_load_b32 v2, v2 offset:8
; GFX10-LABEL: load_lds_v3i32_align16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_read_b96 v[0:2], v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: load_lds_v3i32_align16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_load_b96 v[0:2], v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v3, 0x3c00
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
; GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack:
; SDAG-GFX11: ; %bb.0:
; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG-GFX11-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GISEL-GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack:
; GISEL-GFX11: ; %bb.0:
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX11-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; SDAG-GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext:
; SDAG-GFX11: ; %bb.0:
; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG-GFX11-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GISEL-GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext:
; GISEL-GFX11: ; %bb.0:
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX11-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
; GFX11-NEXT: v_fma_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
; GFX11-NEXT: global_store_b16 v[0:1], v3, off dlc
; GFX1100-LABEL: mixlo_simple:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX1100-LABEL: v_mad_mix_v2f32:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; GFX1100-LABEL: v_mad_mix_v3f32:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
; GFX1100-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-LABEL: v_mad_mix_v4f32:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
; GFX1100-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1100-LABEL: v_mad_mix_v2f32_clamp_postcvt:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-GFX1100: ; %bb.0:
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG-GFX1100-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
; SDAG-GFX1100-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GISEL-GFX1100-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; GISEL-GFX1100: ; %bb.0:
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX1100-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; GISEL-GFX1100-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1100-LABEL: v_mad_mix_v4f32_clamp_postcvt:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; GFX1100-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] clamp
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
; SDAG-GFX1100: ; %bb.0:
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG-GFX1100-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; SDAG-GFX1100-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
; GISEL-GFX1100: ; %bb.0:
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX1100-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v4, v3
; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
; SDAG-GFX1100: ; %bb.0:
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG-GFX1100-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
; SDAG-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; SDAG-GFX1100-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
; GISEL-GFX1100: ; %bb.0:
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX1100-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; GISEL-GFX1100-NEXT: v_fma_mixlo_f16 v4, v0, v1, v2 op_sel_hi:[1,1,1]
; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_clamp_precvt:
; SDAG-GFX1100: ; %bb.0:
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_clamp_precvt:
; GISEL-GFX1100: ; %bb.0:
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX1100-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; SDAG-GFX1100-LABEL: v_mad_mix_v3f32_clamp_precvt:
; SDAG-GFX1100: ; %bb.0:
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
; GISEL-GFX1100-LABEL: v_mad_mix_v3f32_clamp_precvt:
; GISEL-GFX1100: ; %bb.0:
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX1100-NEXT: v_fma_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; GISEL-GFX1100-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-LABEL: v_mad_mix_v4f32_clamp_precvt:
; SDAG-GFX1100: ; %bb.0:
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; GISEL-GFX1100-LABEL: v_mad_mix_v4f32_clamp_precvt:
; GISEL-GFX1100: ; %bb.0:
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX1100-NEXT: v_fma_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; GISEL-GFX1100-NEXT: v_fma_mix_f32 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_mad_mix_v2f32:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
; GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-LABEL: v_mad_mix_v2f32_shuffle:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
; GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
; SDAG-GFX1100: ; %bb.0:
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG-GFX1100-NEXT: s_mov_b32 s0, 1.0
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0]
; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
; GISEL-GFX1100: ; %bb.0:
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 1.0
; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
; SDAG-GFX1100: ; %bb.0:
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0.15915494
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0]
; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
; GISEL-GFX1100: ; %bb.0:
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 0.15915494
; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
; SDAG-GFX1100: ; %bb.0:
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0x3e230000
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0]
; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
; GISEL-GFX1100: ; %bb.0:
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 0x3e230000
; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
; SDAG-GFX1100: ; %bb.0:
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0x367c0000
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0]
; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
; GISEL-GFX1100: ; %bb.0:
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 0x367c0000
; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; GFX1100-LABEL: v_mad_mix_v2f32_f32imm1:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: s_mov_b32 s0, 1.0
; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0]
; GFX1100-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: s_mov_b32 s0, 0x3e230000
; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0]
; GFX1100-LABEL: v_mad_mix_v2f32_f32imminv2pi:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: s_mov_b32 s0, 0.15915494
; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0]
; GFX1100-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: no_mix_simple:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_f32 v0, v0, v1, v2
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: no_mix_simple_fabs:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_f32 v0, |v0|, v1, v2
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX1100-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX1100-NEXT: v_cvt_f32_f16_e32 v2, v2
; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX1100-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GFX1100-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_mad_u16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_mad_u16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mad_u16 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%mul = mul i16 %arg0, %arg1
; GFX10-LABEL: v_mad_u16_zext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u16 v0, v0, v1, v2
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_mad_u16_zext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mad_u16 v0, v0, v1, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-LABEL: v_mad_u16_zext64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u16 v0, v0, v1, v2
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-LABEL: v_mad_u16_zext64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mad_u16 v0, v0, v1, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0xffff, v0
; GFX11-LABEL: mad_i64_i32_sextops:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3]
; GFX11-LABEL: mad_i64_i32_sextops_commute:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3]
; GFX11-LABEL: mad_u64_u32_zextops:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3]
; GFX11-LABEL: mad_u64_u32_zextops_commute:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3]
; GFX11-LABEL: mad_i64_i32_sextops_i32_i128:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mad_u64_u32 v[6:7], null, v0, v1, 0
; GFX11-NEXT: v_mov_b32_e32 v8, 0
; GFX11-NEXT: v_ashrrev_i32_e32 v14, 31, v0
; GFX11-LABEL: mad_i64_i32_sextops_i32_i63:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3]
; GFX11-LABEL: mad_i64_i32_sextops_i31_i63:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_bfe_i32 v4, v1, 0, 31
; GFX11-NEXT: v_bfe_i32 v5, v0, 0, 31
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-LABEL: mad_i64_i32_extops_i32_i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3]
; GFX11-LABEL: mad_u64_u32_bitops:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v3, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, v[4:5]
; GFX11-LABEL: mad_u64_u32_bitops_lhs_mask_small:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v0
; GFX11-NEXT: v_mov_b32_e32 v6, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-LABEL: mad_u64_u32_bitops_rhs_mask_small:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v6, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v6, v2, v[4:5]
; GFX11-LABEL: mad_i64_i32_bitops:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v3, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mad_i64_i32 v[0:1], null, v3, v2, v[4:5]
; GFX11-LABEL: mad_i64_i32_unpack_i64ops:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v1, v0, v[0:1]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
; GFX11-LABEL: mad_i64_i32_twice:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mad_i64_i32 v[6:7], null, v0, v1, v[2:3]
; GFX11-NEXT: v_mad_i64_i32 v[2:3], null, v0, v1, v[4:5]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-LABEL: mad_i64_i32_thrice:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mad_i64_i32 v[8:9], null, v0, v1, 0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v8, v2
; GFX11-LABEL: mad_i64_i32_secondary_use:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mad_i64_i32 v[4:5], null, v0, v1, 0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v4, v2
; GFX11-LABEL: mad_i48_i48:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v7, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v7, v2, v[4:5]
; GFX10-NEXT: v_add_nc_u32_e32 v0, v1, v2
; GFX10-NEXT: flat_store_dword v[0:1], v1
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: mad_i32_vvv_multiuse:
; GFX11-NEXT: v_add_nc_u32_e32 v0, v1, v2
; GFX11-NEXT: flat_store_b32 v[0:1], v1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ; return to shader part epilog
%mul = mul i32 %a, %b
%add = add i32 %mul, %c
; GCN-SCRATCH-LABEL: mubuf_clause:
; GCN-SCRATCH: ; %bb.0: ; %bb
; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-SCRATCH-NEXT: v_lshlrev_b32_e32 v2, 4, v31
; GCN-SCRATCH-NEXT: v_and_b32_e32 v18, 0x3ff0, v2
; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v0, v0, v18
; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v18, v[10:13], off
; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0)
; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v19, v[14:17], off
-; GCN-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-SCRATCH-NEXT: s_setpc_b64 s[30:31]
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
; GCN-SCRATCH-LABEL: load_global_d16_hi:
; GCN-SCRATCH: ; %bb.0: ; %entry
; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-SCRATCH-NEXT: v_mov_b32_e32 v5, v2
; GCN-SCRATCH-NEXT: s_clause 0x1
; GCN-SCRATCH-NEXT: global_load_short_d16_hi v5, v[0:1], off
; GCN-SCRATCH-NEXT: global_store_dword v[3:4], v5, off
; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0)
; GCN-SCRATCH-NEXT: global_store_dword v[3:4], v2, off offset:128
-; GCN-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-SCRATCH-NEXT: s_setpc_b64 s[30:31]
entry:
%gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 32
; GCN-SCRATCH-LABEL: load_global_d16_lo:
; GCN-SCRATCH: ; %bb.0: ; %entry
; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-SCRATCH-NEXT: v_mov_b32_e32 v5, v2
; GCN-SCRATCH-NEXT: s_clause 0x1
; GCN-SCRATCH-NEXT: global_load_short_d16 v5, v[0:1], off
; GCN-SCRATCH-NEXT: global_store_dword v[3:4], v5, off
; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0)
; GCN-SCRATCH-NEXT: global_store_dword v[3:4], v2, off offset:128
-; GCN-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-SCRATCH-NEXT: s_setpc_b64 s[30:31]
entry:
%gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 32
; GFX11-LABEL: test_minmax_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_maxmin_i32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%smax = call i32 @llvm.smax.i32(i32 %a, i32 %b)
; GFX11-LABEL: test_minmax_commuted_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_maxmin_i32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%smax = call i32 @llvm.smax.i32(i32 %a, i32 %b)
; GFX11-LABEL: test_maxmin_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_minmax_i32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%smin = call i32 @llvm.smin.i32(i32 %a, i32 %b)
; GFX11-LABEL: test_maxmin_commuted_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_minmax_i32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%smin = call i32 @llvm.smin.i32(i32 %a, i32 %b)
; GFX11-LABEL: test_smed3_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_med3_i32 v2, v2, v3, v4
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%tmp0 = call i32 @llvm.smin.i32(i32 %x, i32 %y)
%tmp1 = call i32 @llvm.smax.i32(i32 %x, i32 %y)
; GFX11-LABEL: test_minmax_u32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_maxmin_u32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%umax = call i32 @llvm.umax.i32(i32 %a, i32 %b)
; GFX11-LABEL: test_minmax_commuted_u32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_maxmin_u32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%umax = call i32 @llvm.umax.i32(i32 %a, i32 %b)
; GFX11-LABEL: test_maxmin_u32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_minmax_u32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%umin = call i32 @llvm.umin.i32(i32 %a, i32 %b)
; GFX11-LABEL: test_maxmin_commuted_u32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_minmax_u32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%umin = call i32 @llvm.umin.i32(i32 %a, i32 %b)
; GFX11-LABEL: test_umed3_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_med3_u32 v2, v2, v3, v4
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%tmp0 = call i32 @llvm.umin.i32(i32 %x, i32 %y)
%tmp1 = call i32 @llvm.umax.i32(i32 %x, i32 %y)
; SDAG-LABEL: test_minmax_f32_ieee_true:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
; SDAG-NEXT: v_max_f32_e32 v2, v2, v2
; SDAG-NEXT: v_maxmin_f32 v0, v0, v1, v2
; GISEL-LABEL: test_minmax_f32_ieee_true:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
; GISEL-NEXT: v_max_f32_e32 v2, v2, v2
; GISEL-NEXT: v_maxmin_f32 v0, v0, v1, v2
; SDAG-LABEL: test_maxmin_f32_ieee_true:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
; SDAG-NEXT: v_max_f32_e32 v2, v2, v2
; SDAG-NEXT: v_minmax_f32 v0, v0, v1, v2
; GISEL-LABEL: test_maxmin_f32_ieee_true:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
; GISEL-NEXT: v_max_f32_e32 v2, v2, v2
; GISEL-NEXT: v_minmax_f32 v0, v0, v1, v2
; GFX11-LABEL: test_med3_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_med3_f32 v2, v2, v3, v4
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%tmp0 = call float @llvm.minnum.f32(float %x, float %y)
%tmp1 = call float @llvm.maxnum.f32(float %x, float %y)
; SDAG-LABEL: test_minmax_commuted_f16_ieee_true:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG-NEXT: v_max_f16_e32 v1, v1, v1
; SDAG-NEXT: v_max_f16_e32 v0, v0, v0
; SDAG-NEXT: v_max_f16_e32 v2, v2, v2
; GISEL-LABEL: test_minmax_commuted_f16_ieee_true:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-NEXT: v_max_f16_e32 v0, v0, v0
; GISEL-NEXT: v_max_f16_e32 v1, v1, v1
; GISEL-NEXT: v_max_f16_e32 v2, v2, v2
; SDAG-LABEL: test_maxmin_commuted_f16_ieee_true:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG-NEXT: v_max_f16_e32 v1, v1, v1
; SDAG-NEXT: v_max_f16_e32 v0, v0, v0
; SDAG-NEXT: v_max_f16_e32 v2, v2, v2
; GISEL-LABEL: test_maxmin_commuted_f16_ieee_true:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-NEXT: v_max_f16_e32 v0, v0, v0
; GISEL-NEXT: v_max_f16_e32 v1, v1, v1
; GISEL-NEXT: v_max_f16_e32 v2, v2, v2
; GFX11-LABEL: test_med3_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_med3_f16 v2, v2, v3, v4
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%tmp0 = call half @llvm.minnum.f16(half %x, half %y)
%tmp1 = call half @llvm.maxnum.f16(half %x, half %y)
; GFX10-LABEL: nonkernel:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, v0
; GFX10-NEXT: ds_write_b32 v0, v0 offset:8
; G_GFX10-LABEL: nonkernel:
; G_GFX10: ; %bb.0:
; G_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; G_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; G_GFX10-NEXT: v_mov_b32_e32 v2, 0
; G_GFX10-NEXT: v_mov_b32_e32 v3, 8
; G_GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX1010_W32-LABEL: mubuf_vgpr:
; GFX1010_W32: ; %bb.0:
; GFX1010_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010_W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010_W32-NEXT: s_mov_b32 s5, exec_lo
; GFX1010_W32-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
; GFX1010_W32-NEXT: v_readfirstlane_b32 s8, v0
; GFX1010_W64-LABEL: mubuf_vgpr:
; GFX1010_W64: ; %bb.0:
; GFX1010_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010_W64-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010_W64-NEXT: s_mov_b64 s[6:7], exec
; GFX1010_W64-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
; GFX1010_W64-NEXT: v_readfirstlane_b32 s8, v0
; GFX1100_W32-LABEL: mubuf_vgpr:
; GFX1100_W32: ; %bb.0:
; GFX1100_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100_W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100_W32-NEXT: s_mov_b32 s1, exec_lo
; GFX1100_W32-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
; GFX1100_W32-NEXT: v_readfirstlane_b32 s4, v0
; GFX1100_W64-LABEL: mubuf_vgpr:
; GFX1100_W64: ; %bb.0:
; GFX1100_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100_W64-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100_W64-NEXT: s_mov_b64 s[2:3], exec
; GFX1100_W64-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
; GFX1100_W64-NEXT: v_readfirstlane_b32 s4, v0
; GFX1010_W32-LABEL: mubuf_vgpr_adjacent_in_block:
; GFX1010_W32: ; %bb.0: ; %entry
; GFX1010_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010_W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010_W32-NEXT: s_mov_b32 s5, exec_lo
; GFX1010_W32-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1010_W32-NEXT: v_readfirstlane_b32 s8, v0
; GFX1010_W64-LABEL: mubuf_vgpr_adjacent_in_block:
; GFX1010_W64: ; %bb.0: ; %entry
; GFX1010_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010_W64-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010_W64-NEXT: s_mov_b64 s[6:7], exec
; GFX1010_W64-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1010_W64-NEXT: v_readfirstlane_b32 s8, v0
; GFX1100_W32-LABEL: mubuf_vgpr_adjacent_in_block:
; GFX1100_W32: ; %bb.0: ; %entry
; GFX1100_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100_W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100_W32-NEXT: s_mov_b32 s1, exec_lo
; GFX1100_W32-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1100_W32-NEXT: v_readfirstlane_b32 s4, v0
; GFX1100_W64-LABEL: mubuf_vgpr_adjacent_in_block:
; GFX1100_W64: ; %bb.0: ; %entry
; GFX1100_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100_W64-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100_W64-NEXT: s_mov_b64 s[2:3], exec
; GFX1100_W64-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1100_W64-NEXT: v_readfirstlane_b32 s4, v0
; GFX1010_W32-LABEL: mubuf_vgpr_outside_entry:
; GFX1010_W32: ; %bb.0: ; %entry
; GFX1010_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010_W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010_W32-NEXT: ;;#ASMSTART
; GFX1010_W32-NEXT: s_mov_b32 s4, 17
; GFX1010_W32-NEXT: ;;#ASMEND
; GFX1010_W64-LABEL: mubuf_vgpr_outside_entry:
; GFX1010_W64: ; %bb.0: ; %entry
; GFX1010_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010_W64-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010_W64-NEXT: ;;#ASMSTART
; GFX1010_W64-NEXT: s_mov_b32 s4, 17
; GFX1010_W64-NEXT: ;;#ASMEND
; GFX1100_W32-LABEL: mubuf_vgpr_outside_entry:
; GFX1100_W32: ; %bb.0: ; %entry
; GFX1100_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100_W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100_W32-NEXT: ;;#ASMSTART
; GFX1100_W32-NEXT: s_mov_b32 s4, 17
; GFX1100_W32-NEXT: ;;#ASMEND
; GFX1100_W64-LABEL: mubuf_vgpr_outside_entry:
; GFX1100_W64: ; %bb.0: ; %entry
; GFX1100_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100_W64-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100_W64-NEXT: ;;#ASMSTART
; GFX1100_W64-NEXT: s_mov_b32 s4, 17
; GFX1100_W64-NEXT: ;;#ASMEND
; GFX1010_W32-LABEL: mubuf_vgpr:
; GFX1010_W32: ; %bb.0:
; GFX1010_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010_W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010_W32-NEXT: s_mov_b32 s5, exec_lo
; GFX1010_W32-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
; GFX1010_W32-NEXT: v_readfirstlane_b32 s8, v0
; GFX1010_W64-LABEL: mubuf_vgpr:
; GFX1010_W64: ; %bb.0:
; GFX1010_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010_W64-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010_W64-NEXT: s_mov_b64 s[6:7], exec
; GFX1010_W64-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
; GFX1010_W64-NEXT: v_readfirstlane_b32 s8, v0
; GFX1100_W32-LABEL: mubuf_vgpr:
; GFX1100_W32: ; %bb.0:
; GFX1100_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100_W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100_W32-NEXT: s_mov_b32 s1, exec_lo
; GFX1100_W32-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
; GFX1100_W32-NEXT: v_readfirstlane_b32 s4, v0
; GFX1100_W64-LABEL: mubuf_vgpr:
; GFX1100_W64: ; %bb.0:
; GFX1100_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100_W64-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100_W64-NEXT: s_mov_b64 s[2:3], exec
; GFX1100_W64-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
; GFX1100_W64-NEXT: v_readfirstlane_b32 s4, v0
; GFX1010_W32-LABEL: mubuf_vgpr_adjacent_in_block:
; GFX1010_W32: ; %bb.0: ; %entry
; GFX1010_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010_W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010_W32-NEXT: s_mov_b32 s5, exec_lo
; GFX1010_W32-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1010_W32-NEXT: v_readfirstlane_b32 s8, v0
; GFX1010_W64-LABEL: mubuf_vgpr_adjacent_in_block:
; GFX1010_W64: ; %bb.0: ; %entry
; GFX1010_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010_W64-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010_W64-NEXT: s_mov_b64 s[6:7], exec
; GFX1010_W64-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1010_W64-NEXT: v_readfirstlane_b32 s8, v0
; GFX1100_W32-LABEL: mubuf_vgpr_adjacent_in_block:
; GFX1100_W32: ; %bb.0: ; %entry
; GFX1100_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100_W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100_W32-NEXT: s_mov_b32 s1, exec_lo
; GFX1100_W32-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1100_W32-NEXT: v_readfirstlane_b32 s4, v0
; GFX1100_W64-LABEL: mubuf_vgpr_adjacent_in_block:
; GFX1100_W64: ; %bb.0: ; %entry
; GFX1100_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100_W64-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100_W64-NEXT: s_mov_b64 s[2:3], exec
; GFX1100_W64-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1100_W64-NEXT: v_readfirstlane_b32 s4, v0
; GFX1010_W32-LABEL: mubuf_vgpr_outside_entry:
; GFX1010_W32: ; %bb.0: ; %entry
; GFX1010_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010_W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010_W32-NEXT: ;;#ASMSTART
; GFX1010_W32-NEXT: s_mov_b32 s4, 17
; GFX1010_W32-NEXT: ;;#ASMEND
; GFX1010_W64-LABEL: mubuf_vgpr_outside_entry:
; GFX1010_W64: ; %bb.0: ; %entry
; GFX1010_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010_W64-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010_W64-NEXT: ;;#ASMSTART
; GFX1010_W64-NEXT: s_mov_b32 s4, 17
; GFX1010_W64-NEXT: ;;#ASMEND
; GFX1100_W32-LABEL: mubuf_vgpr_outside_entry:
; GFX1100_W32: ; %bb.0: ; %entry
; GFX1100_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100_W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100_W32-NEXT: ;;#ASMSTART
; GFX1100_W32-NEXT: s_mov_b32 s4, 17
; GFX1100_W32-NEXT: ;;#ASMEND
; GFX1100_W64-LABEL: mubuf_vgpr_outside_entry:
; GFX1100_W64: ; %bb.0: ; %entry
; GFX1100_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100_W64-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1100_W64-NEXT: ;;#ASMSTART
; GFX1100_W64-NEXT: s_mov_b32 s4, 17
; GFX1100_W64-NEXT: ;;#ASMEND
; GFX10-LABEL: flat_inst_valu_offset_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-LABEL: flat_inst_valu_offset_1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:1
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-SDAG-LABEL: flat_inst_valu_offset_11bit_max:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-LABEL: flat_inst_valu_offset_11bit_max:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:2047
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-GISEL-LABEL: flat_inst_valu_offset_11bit_max:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0x7ff
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX10-SDAG-LABEL: flat_inst_valu_offset_12bit_max:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-LABEL: flat_inst_valu_offset_12bit_max:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-GISEL-LABEL: flat_inst_valu_offset_12bit_max:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0xfff
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX10-SDAG-LABEL: flat_inst_valu_offset_13bit_max:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-SDAG-LABEL: flat_inst_valu_offset_13bit_max:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095
; GFX10-GISEL-LABEL: flat_inst_valu_offset_13bit_max:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0x1fff
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX11-GISEL-LABEL: flat_inst_valu_offset_13bit_max:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_mov_b64 s[0:1], 0x1fff
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX10-SDAG-LABEL: flat_inst_valu_offset_neg_11bit_max:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-SDAG-LABEL: flat_inst_valu_offset_neg_11bit_max:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
; GFX10-GISEL-LABEL: flat_inst_valu_offset_neg_11bit_max:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xf800
; GFX10-GISEL-NEXT: s_mov_b32 s5, -1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: flat_inst_valu_offset_neg_11bit_max:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xf800
; GFX11-GISEL-NEXT: s_mov_b32 s1, -1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: flat_inst_valu_offset_neg_12bit_max:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-SDAG-LABEL: flat_inst_valu_offset_neg_12bit_max:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
; GFX10-GISEL-LABEL: flat_inst_valu_offset_neg_12bit_max:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xf000
; GFX10-GISEL-NEXT: s_mov_b32 s5, -1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: flat_inst_valu_offset_neg_12bit_max:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xf000
; GFX11-GISEL-NEXT: s_mov_b32 s1, -1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: flat_inst_valu_offset_neg_13bit_max:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-SDAG-LABEL: flat_inst_valu_offset_neg_13bit_max:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
; GFX10-GISEL-LABEL: flat_inst_valu_offset_neg_13bit_max:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xe000
; GFX10-GISEL-NEXT: s_mov_b32 s5, -1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: flat_inst_valu_offset_neg_13bit_max:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xe000
; GFX11-GISEL-NEXT: s_mov_b32 s1, -1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: flat_inst_valu_offset_2x_11bit_max:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-LABEL: flat_inst_valu_offset_2x_11bit_max:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-GISEL-LABEL: flat_inst_valu_offset_2x_11bit_max:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0xfff
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX10-SDAG-LABEL: flat_inst_valu_offset_2x_12bit_max:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_12bit_max:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095
; GFX10-GISEL-LABEL: flat_inst_valu_offset_2x_12bit_max:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0x1fff
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_12bit_max:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_mov_b64 s[0:1], 0x1fff
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX10-SDAG-LABEL: flat_inst_valu_offset_2x_13bit_max:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x3fff, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_13bit_max:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x3000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095
; GFX10-GISEL-LABEL: flat_inst_valu_offset_2x_13bit_max:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0x3fff
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_13bit_max:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_mov_b64 s[0:1], 0x3fff
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX10-SDAG-LABEL: flat_inst_valu_offset_2x_neg_11bit_max:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_neg_11bit_max:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
; GFX10-GISEL-LABEL: flat_inst_valu_offset_2x_neg_11bit_max:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xf000
; GFX10-GISEL-NEXT: s_mov_b32 s5, -1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_neg_11bit_max:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xf000
; GFX11-GISEL-NEXT: s_mov_b32 s1, -1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: flat_inst_valu_offset_2x_neg_12bit_max:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_neg_12bit_max:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
; GFX10-GISEL-LABEL: flat_inst_valu_offset_2x_neg_12bit_max:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xe000
; GFX10-GISEL-NEXT: s_mov_b32 s5, -1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_neg_12bit_max:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xe000
; GFX11-GISEL-NEXT: s_mov_b32 s1, -1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: flat_inst_valu_offset_2x_neg_13bit_max:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_neg_13bit_max:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
; GFX10-GISEL-LABEL: flat_inst_valu_offset_2x_neg_13bit_max:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xc000
; GFX10-GISEL-NEXT: s_mov_b32 s5, -1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_neg_13bit_max:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xc000
; GFX11-GISEL-NEXT: s_mov_b32 s1, -1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:2047
; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x7ff
; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x7ff
; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:2048
; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x800
; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x800
; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095
; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xfff
; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xfff
; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1000
; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1000
; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095
; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1fff
; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1fff
; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x2000
; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x2000
; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x7ff
; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x7ff
; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x800
; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x800
; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xfff
; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xfff
; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1000
; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1000
; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1fff
; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1fff
; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x2000
; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x2000
; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-LABEL: global_inst_valu_offset_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: global_inst_valu_offset_1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: global_inst_valu_offset_11bit_max:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: global_inst_valu_offset_11bit_max:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:2047
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-GISEL-LABEL: global_inst_valu_offset_12bit_max:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0xfff
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX11-LABEL: global_inst_valu_offset_12bit_max:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4095
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-SDAG-LABEL: global_inst_valu_offset_12bit_max:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
; GFX10-GISEL-LABEL: global_inst_valu_offset_13bit_max:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0x1fff
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX11-GISEL-LABEL: global_inst_valu_offset_13bit_max:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_mov_b64 s[0:1], 0x1fff
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX10-SDAG-LABEL: global_inst_valu_offset_13bit_max:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1800, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
; GFX11-SDAG-LABEL: global_inst_valu_offset_13bit_max:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095
; GFX10-LABEL: global_inst_valu_offset_neg_11bit_max:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: global_inst_valu_offset_neg_11bit_max:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:-2048
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-GISEL-LABEL: global_inst_valu_offset_neg_12bit_max:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xf000
; GFX10-GISEL-NEXT: s_mov_b32 s5, -1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-LABEL: global_inst_valu_offset_neg_12bit_max:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:-4096
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-SDAG-LABEL: global_inst_valu_offset_neg_12bit_max:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-GISEL-LABEL: global_inst_valu_offset_neg_13bit_max:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xe000
; GFX10-GISEL-NEXT: s_mov_b32 s5, -1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: global_inst_valu_offset_neg_13bit_max:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xe000
; GFX11-GISEL-NEXT: s_mov_b32 s1, -1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: global_inst_valu_offset_neg_13bit_max:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
; GFX11-SDAG-LABEL: global_inst_valu_offset_neg_13bit_max:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off
; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_11bit_max:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0xfff
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX11-LABEL: global_inst_valu_offset_2x_11bit_max:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4095
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_11bit_max:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_12bit_max:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0x1fff
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_12bit_max:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_mov_b64 s[0:1], 0x1fff
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_12bit_max:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1800, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
; GFX11-SDAG-LABEL: global_inst_valu_offset_2x_12bit_max:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095
; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_13bit_max:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0x3fff
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_13bit_max:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_mov_b64 s[0:1], 0x3fff
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_13bit_max:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x3800, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
; GFX11-SDAG-LABEL: global_inst_valu_offset_2x_13bit_max:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x3000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095
; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_neg_11bit_max:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xf000
; GFX10-GISEL-NEXT: s_mov_b32 s5, -1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-LABEL: global_inst_valu_offset_2x_neg_11bit_max:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:-4096
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_neg_11bit_max:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_neg_12bit_max:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xe000
; GFX10-GISEL-NEXT: s_mov_b32 s5, -1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_neg_12bit_max:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xe000
; GFX11-GISEL-NEXT: s_mov_b32 s1, -1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_neg_12bit_max:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
; GFX11-SDAG-LABEL: global_inst_valu_offset_2x_neg_12bit_max:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off
; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_neg_13bit_max:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xc000
; GFX10-GISEL-NEXT: s_mov_b32 s5, -1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_neg_13bit_max:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xc000
; GFX11-GISEL-NEXT: s_mov_b32 s1, -1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_neg_13bit_max:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
; GFX11-SDAG-LABEL: global_inst_valu_offset_2x_neg_13bit_max:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x7ff
; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x7ff
; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split0:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split0:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:2047
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x800
; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x800
; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split1:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:2048
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xfff
; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xfff
; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split0:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split0:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1000
; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1000
; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split1:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1fff
; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1fff
; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split0:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1800, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split0:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x2000
; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x2000
; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split1:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x7ff
; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x7ff
; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-2049
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x800
; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x800
; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-2048
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xfff
; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xfff
; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-1
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1000
; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1000
; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1fff
; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1fff
; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-1
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x2000
; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x2000
; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off
; GFX10-LABEL: shuffle6766:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x6060706
; GFX10-NEXT: global_store_dword v[4:5], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: shuffle6766:
; GFX10-LABEL: shuffle3744:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v6, v[0:1], off
; GFX10-NEXT: global_load_dword v7, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_perm_b32 v0, v6, v7, 0x307
; GFX10-NEXT: global_store_dword v[4:5], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: shuffle3744:
; GFX10-LABEL: shuffle4445:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x5040404
; GFX10-NEXT: global_store_dword v[4:5], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: shuffle4445:
; GFX10-LABEL: shuffle0101:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x5040504
; GFX10-NEXT: global_store_dword v[4:5], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: shuffle0101:
; GFX10-LABEL: shuffle1004:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v6, v[0:1], off
; GFX10-NEXT: global_load_dword v7, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_perm_b32 v0, v6, v7, 0x40405
; GFX10-NEXT: global_store_dword v[4:5], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: shuffle1004:
; GFX10-LABEL: shuffle7533:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: flat_load_dword v6, v[0:1]
; GFX10-NEXT: flat_load_dword v7, v[2:3]
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_perm_b32 v0, v7, v6, 0x3030507
; GFX10-NEXT: flat_store_dword v[4:5], v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: shuffle7533:
; GFX10-LABEL: shuffle7767:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: flat_load_dword v0, v[2:3]
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x7060707
; GFX10-NEXT: flat_store_dword v[4:5], v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: shuffle7767:
; GFX10-LABEL: shuffle0554:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_read_b32 v0, v0
; GFX10-NEXT: ds_read_b32 v1, v1
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-LABEL: shuffle2127:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_read_b32 v0, v0
; GFX10-NEXT: ds_read_b32 v1, v1
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-LABEL: shuffle5047:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen
; GFX10-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_perm_b32 v0, v4, v3, 0x7040005
; GFX10-NEXT: buffer_store_dword v0, v2, s[0:3], 0 offen
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: shuffle5047:
; GFX10-LABEL: shuffle3546:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v6, v[0:1], off
; GFX10-NEXT: global_load_dword v7, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_perm_b32 v0, v6, v7, 0x2000107
; GFX10-NEXT: global_store_dword v[4:5], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: shuffle3546:
; GFX10-LABEL: shuffle7330ud2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x4070706
; GFX10-NEXT: global_store_dword v[2:3], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: shuffle7330ud2:
; GFX10-LABEL: shuffle5341ud2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x5040706
; GFX10-NEXT: global_store_dword v[2:3], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: shuffle5341ud2:
; GFX10-LABEL: shuffle6106ud2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x5040504
; GFX10-NEXT: global_store_dword v[2:3], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: shuffle6106ud2:
; GFX10-LABEL: shuffle4327ud2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x7060706
; GFX10-NEXT: global_store_dword v[2:3], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: shuffle4327ud2:
; GFX10-LABEL: shuffle3263ud2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x7060607
; GFX10-NEXT: global_store_dword v[2:3], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: shuffle3263ud2:
; GFX10-LABEL: shuffle2763ud2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x7060706
; GFX10-NEXT: global_store_dword v[2:3], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: shuffle2763ud2:
; GFX10-LABEL: shuffle1327ud2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x7060705
; GFX10-NEXT: global_store_dword v[2:3], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: shuffle1327ud2:
; GFX10-LABEL: shuffle0605ud2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x5040504
; GFX10-NEXT: global_store_dword v[2:3], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: shuffle0605ud2:
; GFX10-LABEL: insertUsesOr:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: v_lshlrev_b16 v1, 8, v4
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: global_store_dword v[5:6], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: insertUsesOr:
; GFX10-LABEL: addUsesOr:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v4, v[0:1], off
; GFX10-NEXT: global_load_dword v7, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX10-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX10-NEXT: global_store_dword v[5:6], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: addUsesOr:
; GFX10-LABEL: add:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v4, v[0:1], off
; GFX10-NEXT: global_load_dword v7, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX10-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX10-NEXT: global_store_dword v[5:6], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: add:
; GFX10-LABEL: add_div:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
; GFX10-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: global_store_dword v[5:6], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: add_div:
; GFX10-LABEL: add_store:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v4, v[0:1], off
; GFX10-NEXT: global_load_dword v9, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX10-NEXT: global_store_dword v[5:6], v1, off
; GFX10-NEXT: global_store_dword v[7:8], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: add_store:
; GFX10-LABEL: add_store_div_16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX10-NEXT: global_store_dword v[5:6], v1, off
; GFX10-NEXT: global_store_dword v[7:8], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: add_store_div_16:
; GFX10-LABEL: add_store_div:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
; GFX10-NEXT: v_perm_b32 v1, v4, v9, 0x10705
; GFX10-NEXT: global_store_dword v[5:6], v0, off
; GFX10-NEXT: global_store_dword v[7:8], v1, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: add_store_div:
; GFX10-LABEL: and_store_div:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
; GFX10-NEXT: v_perm_b32 v1, v4, v9, 0x5070006
; GFX10-NEXT: global_store_dword v[5:6], v0, off
; GFX10-NEXT: global_store_dword v[7:8], v1, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: and_store_div:
; GFX10-LABEL: ashr_store_div:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
; GFX10-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX10-NEXT: global_store_dword v[5:6], v1, off
; GFX10-NEXT: global_store_dword v[7:8], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: ashr_store_div:
; GFX10-LABEL: bc_store_div:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
; GFX10-NEXT: v_perm_b32 v0, v9, v4, 0x7060104
; GFX10-NEXT: global_store_dword v[7:8], v0, off
; GFX10-NEXT: global_store_dword v[5:6], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: bc_store_div:
; GFX10-LABEL: eve_store_div:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
; GFX10-NEXT: v_perm_b32 v1, v5, v4, 0x1020305
; GFX10-NEXT: global_store_byte v[9:10], v0, off
; GFX10-NEXT: global_store_dword v[7:8], v1, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: eve_store_div:
; GFX10-LABEL: ive_store_div:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v9, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v9, 2, v9
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v9
; GFX10-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX10-NEXT: global_store_dword v[5:6], v1, off
; GFX10-NEXT: global_store_dword v[7:8], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: ive_store_div:
; GFX10-LABEL: lhsr_store_div:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
; GFX10-NEXT: v_perm_b32 v1, v9, v4, 0x1030707
; GFX10-NEXT: global_store_dword v[5:6], v0, off
; GFX10-NEXT: global_store_dword v[7:8], v1, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: lhsr_store_div:
; GFX10-LABEL: mul_store_div:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
; GFX10-NEXT: v_perm_b32 v1, v4, v9, 0x2000504
; GFX10-NEXT: global_store_dword v[5:6], v0, off
; GFX10-NEXT: global_store_dword v[7:8], v1, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: mul_store_div:
; GFX10-LABEL: or_store_div:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
; GFX10-NEXT: v_perm_b32 v1, v9, v4, 0x2010005
; GFX10-NEXT: global_store_dword v[5:6], v0, off
; GFX10-NEXT: global_store_dword v[7:8], v1, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: or_store_div:
; GFX10-LABEL: sdiv_store_div:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
; GFX10-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX10-NEXT: global_store_dword v[5:6], v0, off
; GFX10-NEXT: global_store_dword v[7:8], v1, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: sdiv_store_div:
; GFX10-LABEL: sext_store_div:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
; GFX10-NEXT: v_perm_b32 v2, v9, v4, 0x3010707
; GFX10-NEXT: global_store_dwordx2 v[7:8], v[0:1], off
; GFX10-NEXT: global_store_dword v[5:6], v2, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: sext_store_div:
; GFX10-LABEL: shl_store_div:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
; GFX10-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX10-NEXT: global_store_dword v[5:6], v0, off
; GFX10-NEXT: global_store_dword v[7:8], v1, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: shl_store_div:
; GFX10-LABEL: sitofp_store_div:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
; GFX10-NEXT: v_cvt_f32_i32_sdwa v1, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
; GFX10-NEXT: global_store_dwordx4 v[7:8], v[0:3], off
; GFX10-NEXT: global_store_dword v[5:6], v4, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: sitofp_store_div:
; GFX10-LABEL: srem_store_div:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
; GFX10-NEXT: v_perm_b32 v1, v4, v9, 0x2070306
; GFX10-NEXT: global_store_dword v[5:6], v0, off
; GFX10-NEXT: global_store_dword v[7:8], v1, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: srem_store_div:
; GFX10-LABEL: sub_store_div:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
; GFX10-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX10-NEXT: global_store_dword v[5:6], v1, off
; GFX10-NEXT: global_store_dword v[7:8], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: sub_store_div:
; GFX10-LABEL: sv_store_div:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_perm_b32 v0, v4, v5, 0x50705
; GFX10-NEXT: global_store_dword v[7:8], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: sv_store_div:
; GFX10-LABEL: trunc_store_div:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
; GFX10-NEXT: v_and_b32_e32 v0, 15, v0
; GFX10-NEXT: global_store_byte v[7:8], v0, off
; GFX10-NEXT: global_store_dword v[5:6], v1, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: trunc_store_div:
; GFX10-LABEL: udiv:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
; GFX10-NEXT: v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX10-NEXT: global_store_dword v[5:6], v1, off
; GFX10-NEXT: global_store_dword v[7:8], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: udiv:
; GFX10-LABEL: uitofp_store_div:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
; GFX10-NEXT: v_perm_b32 v4, v4, v9, 0x5020104
; GFX10-NEXT: global_store_dwordx4 v[7:8], v[0:3], off
; GFX10-NEXT: global_store_dword v[5:6], v4, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: uitofp_store_div:
; GFX10-LABEL: urem_store_div:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
; GFX10-NEXT: v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX10-NEXT: global_store_dword v[5:6], v1, off
; GFX10-NEXT: global_store_dword v[7:8], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: urem_store_div:
; GFX10-LABEL: xor_store_div:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
; GFX10-NEXT: v_perm_b32 v1, v9, v4, 0x5060307
; GFX10-NEXT: global_store_dword v[5:6], v0, off
; GFX10-NEXT: global_store_dword v[7:8], v1, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: xor_store_div:
; GFX10-LABEL: zext_store_div:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
; GFX10-NEXT: v_perm_b32 v1, v3, v10, 0x5040100
; GFX10-NEXT: global_store_dwordx2 v[7:8], v[0:1], off
; GFX10-NEXT: global_store_dword v[5:6], v2, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: zext_store_div:
; GFX10-LABEL: shl_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshlrev_b16 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: shl_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshlrev_b16 v0, v1, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%res = shl i16 %x, %y
; GFX10-LABEL: lshr_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b16 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: lshr_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b16 v0, v1, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%res = lshr i16 %x, %y
; GFX10-LABEL: ashr_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_ashrrev_i16 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: ashr_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_ashrrev_i16 v0, v1, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%res = ashr i16 %x, %y
; GFX10-LABEL: add_u16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: add_u16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_add_nc_u16 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%res = add i16 %x, %y
; GFX10-LABEL: sub_u16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_sub_nc_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: sub_u16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_sub_nc_u16 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%res = sub i16 %x, %y
; GFX10-LABEL: mul_lo_u16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: mul_lo_u16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%res = mul i16 %x, %y
; GFX10-LABEL: min_u16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_min_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: min_u16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_min_u16 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp ule i16 %x, %y
; GFX10-LABEL: min_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_min_i16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: min_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_min_i16 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp sle i16 %x, %y
; GFX10-LABEL: max_u16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_max_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: max_u16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_u16 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp uge i16 %x, %y
; GFX10-LABEL: max_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_max_i16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: max_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_i16 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp sge i16 %x, %y
; GFX10-LABEL: shl_i16_zext_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshlrev_b16 v0, v1, v0
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: shl_i16_zext_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshlrev_b16 v0, v1, v0
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: lshr_i16_zext_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b16 v0, v1, v0
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: lshr_i16_zext_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b16 v0, v1, v0
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: ashr_i16_zext_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_ashrrev_i16 v0, v1, v0
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: ashr_i16_zext_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_ashrrev_i16 v0, v1, v0
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: add_u16_zext_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u16 v0, v0, v1
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: add_u16_zext_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_add_nc_u16 v0, v0, v1
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: sub_u16_zext_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_sub_nc_u16 v0, v0, v1
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: sub_u16_zext_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_sub_nc_u16 v0, v0, v1
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: mul_lo_u16_zext_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: mul_lo_u16_zext_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: min_u16_zext_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_min_u16 v0, v0, v1
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: min_u16_zext_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_min_u16 v0, v0, v1
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: min_i16_zext_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_min_i16 v0, v0, v1
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: min_i16_zext_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_min_i16 v0, v0, v1
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: max_u16_zext_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_max_u16 v0, v0, v1
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: max_u16_zext_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_u16 v0, v0, v1
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: max_i16_zext_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_max_i16 v0, v0, v1
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: max_i16_zext_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_max_i16 v0, v0, v1
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: zext_fadd_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f16_e32 v0, v0, v1
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: zext_fadd_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_add_f16_e32 v0, v0, v1
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: zext_fma_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fmac_f16_e32 v2, v0, v1
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: zext_fma_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fmac_f16_e32 v2, v0, v1
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: zext_div_fixup_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_div_fixup_f16 v0, v0, v1, v2
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: zext_div_fixup_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_div_fixup_f16 v0, v0, v1, v2
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: zext_fptrunc_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: zext_fptrunc_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: zext_fptrunc_fma_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: zext_fptrunc_fma_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_ptrmask_global_variable_i64:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v2
; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v3
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_ptrmask_global_variable_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v0, v0, v2
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_ptrmask_global_variable_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, v0, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%masked = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %ptr, i32 %mask)
; GFX10-LABEL: v_ptrmask_global_variable_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_ptrmask_global_variable_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v2
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_ptrmask_local_variable_i64:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%masked = call ptr addrspace(3) @llvm.ptrmask.p3.i64(ptr addrspace(3) %ptr, i64 %mask)
; GFX10PLUS-LABEL: v_ptrmask_local_variable_i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%masked = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) %ptr, i32 %mask)
; GFX10-LABEL: v_ptrmask_local_variable_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_ptrmask_local_variable_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX11-NEXT: v_and_b32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_add_1_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%add = add i32 %y, 1
; GFX10-LABEL: v_mul_add_1_i32_commute:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%add = add i32 %y, 1
; GFX10-LABEL: v_mul_add_x_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%mul = mul i32 %x, %y
; GFX10-LABEL: v_mul_sub_1_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v1, -1, v1
; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_sub_1_i32_commute:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v1, -1, v1
; GFX10-NEXT: v_mul_lo_u32 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_sub_x_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_lo_u32 v1, v0, v1
; GFX10-NEXT: v_sub_nc_u32_e32 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_add_2_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v1, 2, v1
; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_sub_2_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v1, -2, v1
; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_add_65_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x41, v1
; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_sub_65_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v1, 0xffffffbf, v1
; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_add_1_i24_zext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u32_u24 v0, v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%add = add i24 %y, 1
; GFX10-LABEL: v_mul_sub_1_i24_zext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v1, -1, v1
; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_add_mul_i24_zext_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u32_u24 v0, v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%mul = mul i24 %x, %y
; GFX10-LABEL: v_mul_add_1_i24_sext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u32_u24 v0, v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%add = add i24 %y, 1
; GFX10-LABEL: v_add_mul_i24_sext_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u32_u24 v0, v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%mul = mul i24 %x, %y
; GFX10-LABEL: v_mul_sub_1_i24_sext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v1, -1, v1
; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_add_1_i25_zext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%add = add i25 %y, 1
; GFX10-LABEL: v_mul_sub_1_i25_zext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x1ffffff, v1
; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_add_1_i25_sext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%add = add i25 %y, 1
; GFX10-LABEL: v_mul_sub_1_i25_sext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x1ffffff, v1
; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_add_1_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%add = add i16 %y, 1
; GFX10-LABEL: v_mul_add_1_i16_zext_result:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_add_1_i16_commute:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%add = add i16 %y, 1
; GFX10-LABEL: v_mul_add_x_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%mul = mul i16 %x, %y
; GFX10-LABEL: v_mul_sub_1_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u16 v1, v1, -1
; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_sub_1_i16_commute:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u16 v1, v1, -1
; GFX10-NEXT: v_mul_lo_u16 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_sub_x_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_lo_u16 v1, v0, v1
; GFX10-NEXT: v_sub_nc_u16 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_add_2_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u16 v1, v1, 2
; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_sub_2_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u16 v1, v1, -2
; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_add_1_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, v[0:1]
; GFX10-NEXT: v_mul_lo_u32 v0, v0, v3
; GFX10-NEXT: v_mul_lo_u32 v1, v1, v2
; GFX10-LABEL: v_mul_add_1_i64_commute:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, v[0:1]
; GFX10-NEXT: v_mul_lo_u32 v0, v0, v3
; GFX10-NEXT: v_mul_lo_u32 v1, v1, v2
; GFX10-LABEL: v_mul_add_x_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, v[0:1]
; GFX10-NEXT: v_mul_lo_u32 v0, v0, v3
; GFX10-NEXT: v_mul_lo_u32 v1, v1, v2
; GFX10-LABEL: v_mul_sub_1_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, -1
; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, -1, v3, vcc_lo
; GFX10-NEXT: v_mul_lo_u32 v4, v1, v2
; GFX10-LABEL: v_mul_sub_1_i64_commute:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, -1
; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, -1, v3, vcc_lo
; GFX10-NEXT: v_mul_lo_u32 v4, v2, v1
; GFX10-LABEL: v_mul_sub_x_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_lo_u32 v4, v1, v2
; GFX10-NEXT: v_mul_lo_u32 v5, v0, v3
; GFX10-NEXT: v_mad_u64_u32 v[2:3], null, v0, v2, 0
; GFX10-LABEL: v_mul_add_2_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, 2
; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
; GFX10-NEXT: v_mul_lo_u32 v4, v1, v2
; GFX10-LABEL: v_mul_sub_2_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, -2
; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, -1, v3, vcc_lo
; GFX10-NEXT: v_mul_lo_u32 v4, v1, v2
; GFX10-LABEL: v_mul_add_1_i32_multiple:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v3, v1
; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v3, v[0:1]
; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v2, v3, v[2:3]
; GFX10-LABEL: v_mul_add_1_i32_other_use:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v1
; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_add_1_i32_chain:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0
; GFX10-NEXT: v_mul_lo_u32 v1, v2, v1
; GFX10-NEXT: v_add_nc_u32_e32 v2, v1, v2
; GFX10-LABEL: v_mul_add_1_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_add_1_v2i16_commute:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
; GFX10-NEXT: v_pk_mul_lo_u16 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_add_x_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_lo_u16 v1, v0, v1
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_sub_1_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_sub_i16 v1, v1, 1 op_sel_hi:[1,0]
; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_sub_1_v2i16_commute:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_sub_i16 v1, v1, 1 op_sel_hi:[1,0]
; GFX10-NEXT: v_pk_mul_lo_u16 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_sub_x_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_lo_u16 v1, v0, v1
; GFX10-NEXT: v_pk_sub_i16 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_add_2_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_sub_u16 v1, v1, -2 op_sel_hi:[1,0]
; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_sub_2_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_sub_i16 v1, v1, 2 op_sel_hi:[1,0]
; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_add_1_v2i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, v[0:1]
; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v1, v3, v[1:2]
; GFX10-NEXT: v_mov_b32_e32 v0, v4
; GFX10-LABEL: v_mul_add_1_v2i32_commute:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, v[0:1]
; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v1, v3, v[1:2]
; GFX10-NEXT: v_mov_b32_e32 v0, v4
; GFX10-LABEL: v_mul_add_x_v2i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, v[0:1]
; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v1, v3, v[1:2]
; GFX10-NEXT: v_mov_b32_e32 v0, v4
; GFX10-LABEL: v_mul_sub_1_v2i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v2, -1, v2
; GFX10-NEXT: v_add_nc_u32_e32 v3, -1, v3
; GFX10-NEXT: v_mul_lo_u32 v0, v0, v2
; GFX10-LABEL: v_mul_sub_1_v2i32_commute:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v2, -1, v2
; GFX10-NEXT: v_add_nc_u32_e32 v3, -1, v3
; GFX10-NEXT: v_mul_lo_u32 v0, v2, v0
; GFX10-LABEL: v_mul_sub_x_v2i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_lo_u32 v2, v0, v2
; GFX10-NEXT: v_mul_lo_u32 v3, v1, v3
; GFX10-NEXT: v_sub_nc_u32_e32 v0, v2, v0
; GFX10-LABEL: v_mul_add_2_v2i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v2, 2, v2
; GFX10-NEXT: v_add_nc_u32_e32 v3, 2, v3
; GFX10-NEXT: v_mul_lo_u32 v0, v0, v2
; GFX10-LABEL: v_mul_sub_2_v2i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v2, -2, v2
; GFX10-NEXT: v_add_nc_u32_e32 v3, -2, v3
; GFX10-NEXT: v_mul_lo_u32 v0, v0, v2
; GFX10-LABEL: v_mul_add_1_v2i24:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u32_u24 v0, v0, v2, v0
; GFX10-NEXT: v_mad_u32_u24 v1, v1, v3, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_add_1_v2i24_commute:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u32_u24 v0, v0, v2, v0
; GFX10-NEXT: v_mad_u32_u24 v1, v1, v3, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_add_x_v2i24:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u32_u24 v0, v0, v2, v0
; GFX10-NEXT: v_mad_u32_u24 v1, v1, v3, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_sub_1_v2i24:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v2, -1, v2
; GFX10-NEXT: v_add_nc_u32_e32 v3, -1, v3
; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v2
; GFX10-LABEL: v_mul_sub_1_v2i24_commute:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v2, -1, v2
; GFX10-NEXT: v_add_nc_u32_e32 v3, -1, v3
; GFX10-NEXT: v_mul_u32_u24_e32 v0, v2, v0
; GFX10-LABEL: v_mul_sub_x_v2i24:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_u32_u24_e32 v2, v0, v2
; GFX10-NEXT: v_mul_u32_u24_e32 v3, v1, v3
; GFX10-NEXT: v_sub_nc_u32_e32 v0, v2, v0
; GFX10-LABEL: v_mul_add_2_v2i24:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v2, 2, v2
; GFX10-NEXT: v_add_nc_u32_e32 v3, 2, v3
; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v2
; GFX10-LABEL: v_mul_sub_2_v2i24:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e32 v2, -2, v2
; GFX10-NEXT: v_add_nc_u32_e32 v3, -2, v3
; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v2
; GFX10-LABEL: v_mul_9_add_52_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 9, 52
; GFX10-NEXT: s_setpc_b64 s[30:31]
%mul = mul i32 %arg, 9
; GFX10-LABEL: v_mul_9_add_52_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u16 v0, v0, 9, 52
; GFX10-NEXT: s_setpc_b64 s[30:31]
%mul = mul i16 %arg, 9
; GFX10-LABEL: v_mul_9_add_52_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, 9 op_sel_hi:[1,0]
; GFX10-NEXT: v_pk_add_u16 v0, v0, 52 op_sel_hi:[1,0]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_9_add_52_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v2, v1
; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 9, 52
; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v2, 9, v[1:2]
; GFX10-LABEL: v_mul_5_add_1_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 5, 1
; GFX10-NEXT: s_setpc_b64 s[30:31]
%mul = mul i32 %arg, 5
; GFX10-LABEL: v_mul_284_add_82_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_movk_i32 s4, 0x11c
; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s4, 0x52
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_5_add_1_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u16 v0, v0, 5, 1
; GFX10-NEXT: s_setpc_b64 s[30:31]
%mul = mul i16 %arg, 5
; GFX10-LABEL: v_mul_284_add_82_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_movk_i32 s4, 0x11c
; GFX10-NEXT: v_mad_u16 v0, v0, s4, 0x52
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_5_add_1_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, 5 op_sel_hi:[1,0]
; GFX10-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_284_add_82_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_lo_u16 v0, 0x11c, v0 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_add_u16 v0, 0x52, v0 op_sel_hi:[0,1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_mul_5_add_1_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v2, v1
; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 5, 1
; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v2, 5, v[1:2]
; GFX10-LABEL: v_mul_284_add_82_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b64 s[4:5], 0x52
; GFX10-NEXT: v_mov_b32_e32 v2, v1
; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, 0x11c, v0, s[4:5]
; GFX10-LABEL: v_mul_934584645_add_8234599_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b64 s[4:5], 0x7da667
; GFX10-NEXT: v_mov_b32_e32 v2, v1
; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, 0x37b4a145, v0, s[4:5]
; GFX10-LABEL: v_mul_add_1_i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%add = add i8 %y, 1
; GFX10-LABEL: v_mul_add_1_i8_commute:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%add = add i8 %y, 1
; GFX10-LABEL: v_mul_add_1_i8_zext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%add = add i8 %y, 1
; GFX10-LABEL: v_mul_add_1_i8_zext_commute:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%add = add i8 %y, 1
; GFX10-LABEL: v_mul_add_1_v2i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u16 v1, v1, v3, v1
; GFX10-NEXT: v_mad_u16 v0, v0, v2, v0
; GFX10-NEXT: v_lshlrev_b16 v2, 8, v1
; GFX10-LABEL: v_mul_add_1_v2i8_commute:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mad_u16 v1, v1, v3, v1
; GFX10-NEXT: v_mad_u16 v0, v0, v2, v0
; GFX10-NEXT: v_lshlrev_b16 v2, 8, v1
; GFX10PLUS-LABEL: v_roundeven_f32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
;
; SDAG_GFX10PLUS-LABEL: v_roundeven_f32:
; SDAG_GFX10PLUS: ; %bb.0:
; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0
; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%roundeven = call float @llvm.roundeven.f32(float %x)
; GFX10PLUS-LABEL: v_roundeven_v2f32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0
; GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; SDAG_GFX10PLUS-LABEL: v_roundeven_v2f32:
; SDAG_GFX10PLUS: ; %bb.0:
; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0
; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1
; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_roundeven_v3f32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0
; GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1
; GFX10PLUS-NEXT: v_rndne_f32_e32 v2, v2
; SDAG_GFX10PLUS-LABEL: v_roundeven_v3f32:
; SDAG_GFX10PLUS: ; %bb.0:
; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0
; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1
; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v2, v2
; GFX10PLUS-LABEL: v_roundeven_v4f32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0
; GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1
; GFX10PLUS-NEXT: v_rndne_f32_e32 v2, v2
; SDAG_GFX10PLUS-LABEL: v_roundeven_v4f32:
; SDAG_GFX10PLUS: ; %bb.0:
; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0
; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1
; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v2, v2
; GFX10PLUS-LABEL: v_roundeven_f16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_rndne_f16_e32 v0, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
;
; SDAG_GFX10PLUS-LABEL: v_roundeven_f16:
; SDAG_GFX10PLUS: ; %bb.0:
; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG_GFX10PLUS-NEXT: v_rndne_f16_e32 v0, v0
; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%roundeven = call half @llvm.roundeven.f16(half %x)
; GFX10-LABEL: v_roundeven_v2f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_rndne_f16_e32 v1, v0
; GFX10-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX10-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX11-LABEL: v_roundeven_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX11-NEXT: v_rndne_f16_e32 v0, v0
; GFX11-NEXT: v_rndne_f16_e32 v1, v1
; SDAG_GFX10-LABEL: v_roundeven_v2f16:
; SDAG_GFX10: ; %bb.0:
; SDAG_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG_GFX10-NEXT: v_rndne_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; SDAG_GFX10-NEXT: v_rndne_f16_e32 v0, v0
; SDAG_GFX10-NEXT: v_pack_b32_f16 v0, v0, v1
; SDAG_GFX11-LABEL: v_roundeven_v2f16:
; SDAG_GFX11: ; %bb.0:
; SDAG_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG_GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; SDAG_GFX11-NEXT: v_rndne_f16_e32 v0, v0
; SDAG_GFX11-NEXT: v_rndne_f16_e32 v1, v1
; GFX10-LABEL: v_roundeven_v2f16_fneg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX10-NEXT: v_rndne_f16_e32 v1, v0
; GFX10-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX11-LABEL: v_roundeven_v2f16_fneg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX11-NEXT: v_rndne_f16_e32 v0, v0
; SDAG_GFX10-LABEL: v_roundeven_v2f16_fneg:
; SDAG_GFX10: ; %bb.0:
; SDAG_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG_GFX10-NEXT: v_rndne_f16_sdwa v1, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; SDAG_GFX10-NEXT: v_rndne_f16_e64 v0, -v0
; SDAG_GFX10-NEXT: v_pack_b32_f16 v0, v0, v1
; SDAG_GFX11-LABEL: v_roundeven_v2f16_fneg:
; SDAG_GFX11: ; %bb.0:
; SDAG_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG_GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; SDAG_GFX11-NEXT: v_rndne_f16_e64 v0, -v0
; SDAG_GFX11-NEXT: v_rndne_f16_e64 v1, -v1
; GFX10-LABEL: v_roundeven_v4f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_rndne_f16_e32 v2, v0
; GFX10-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX10-NEXT: v_rndne_f16_e32 v3, v1
; GFX11-LABEL: v_roundeven_v4f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX11-NEXT: v_rndne_f16_e32 v0, v0
; SDAG_GFX10-LABEL: v_roundeven_v4f16:
; SDAG_GFX10: ; %bb.0:
; SDAG_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG_GFX10-NEXT: v_rndne_f16_sdwa v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; SDAG_GFX10-NEXT: v_rndne_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; SDAG_GFX10-NEXT: v_rndne_f16_e32 v0, v0
; SDAG_GFX11-LABEL: v_roundeven_v4f16:
; SDAG_GFX11: ; %bb.0:
; SDAG_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG_GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; SDAG_GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; SDAG_GFX11-NEXT: v_rndne_f16_e32 v1, v1
; GFX10PLUS-LABEL: v_roundeven_f32_fabs:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_rndne_f32_e64 v0, |v0|
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
;
; SDAG_GFX10PLUS-LABEL: v_roundeven_f32_fabs:
; SDAG_GFX10PLUS: ; %bb.0:
; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e64 v0, |v0|
; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call float @llvm.fabs.f32(float %x)
; GFX10PLUS-LABEL: v_roundeven_f32_fneg:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_rndne_f32_e64 v0, -v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
;
; SDAG_GFX10PLUS-LABEL: v_roundeven_f32_fneg:
; SDAG_GFX10PLUS: ; %bb.0:
; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e64 v0, -v0
; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%neg.x = fneg float %x
; GFX10PLUS-LABEL: v_roundeven_f64:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
;
; SDAG_GFX10PLUS-LABEL: v_roundeven_f64:
; SDAG_GFX10PLUS: ; %bb.0:
; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG_GFX10PLUS-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%roundeven = call double @llvm.roundeven.f64(double %x)
; GFX10PLUS-LABEL: v_roundeven_f64_fneg:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1]
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
;
; SDAG_GFX10PLUS-LABEL: v_roundeven_f64_fneg:
; SDAG_GFX10PLUS: ; %bb.0:
; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG_GFX10PLUS-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1]
; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%neg.x = fneg double %x
; GFX10PLUS-LABEL: v_roundeven_v2f64:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
; GFX10PLUS-NEXT: v_rndne_f64_e32 v[2:3], v[2:3]
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; SDAG_GFX10PLUS-LABEL: v_roundeven_v2f64:
; SDAG_GFX10PLUS: ; %bb.0:
; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG_GFX10PLUS-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
; SDAG_GFX10PLUS-NEXT: v_rndne_f64_e32 v[2:3], v[2:3]
; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_saddsat_i8:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 8, v0
; GFX10PLUS-NEXT: v_add_nc_i16 v0, v0, v1 clamp
; GFX10PLUS-LABEL: v_saddsat_i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_nc_i16 v0, v0, v1 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call i16 @llvm.sadd.sat.i16(i16 %lhs, i16 %rhs)
; GFX10PLUS-LABEL: v_saddsat_i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_nc_i32 v0, v0, v1 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call i32 @llvm.sadd.sat.i32(i32 %lhs, i32 %rhs)
; GFX10PLUS-LABEL: v_saddsat_v2i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_add_i16 v0, v0, v1 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs)
; GFX10PLUS-LABEL: v_saddsat_v3i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_add_i16 v0, v0, v2 clamp
; GFX10PLUS-NEXT: v_pk_add_i16 v1, v1, v3 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_saddsat_v4i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_add_i16 v0, v0, v2 clamp
; GFX10PLUS-NEXT: v_pk_add_i16 v1, v1, v3 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_saddsat_v2i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_nc_i32 v0, v0, v2 clamp
; GFX10PLUS-NEXT: v_add_nc_i32 v1, v1, v3 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_saddsat_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2
; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
; GFX10-NEXT: v_cmp_gt_i64_e64 s4, 0, v[2:3]
; GFX11-LABEL: v_saddsat_i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2
; GFX11-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
; GFX11-NEXT: v_cmp_gt_i64_e64 s0, 0, v[2:3]
; CHECK-LABEL: xori64i32:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v1
; CHECK-NEXT: v_xor_b32_e32 v0, 0x7fffffff, v0
; CHECK-NEXT: s_setpc_b64 s[30:31]
; CHECK-LABEL: selecti64i64:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v1
; CHECK-NEXT: v_xor_b32_e32 v0, 0x7fffffff, v1
; CHECK-NEXT: s_setpc_b64 s[30:31]
; CHECK-LABEL: selecti64i32:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v1
; CHECK-NEXT: v_xor_b32_e32 v0, 0x7fffffff, v0
; CHECK-NEXT: s_setpc_b64 s[30:31]
; CHECK-LABEL: selecti32i64:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; CHECK-NEXT: v_xor_b32_e32 v0, 0x7fffffff, v1
; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v1
; CHECK-LABEL: xori32i8:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v0
; CHECK-NEXT: v_xor_b32_e32 v0, 0x54, v0
; CHECK-NEXT: s_setpc_b64 s[30:31]
; CHECK-LABEL: selecti32i32:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v0
; CHECK-NEXT: v_xor_b32_e32 v0, 0x54, v0
; CHECK-NEXT: s_setpc_b64 s[30:31]
; CHECK-LABEL: selecti32i8:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v0
; CHECK-NEXT: v_xor_b32_e32 v0, 0x54, v0
; CHECK-NEXT: s_setpc_b64 s[30:31]
; CHECK-LABEL: selecti8i32:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; CHECK-NEXT: v_bfe_i32 v0, v0, 0, 8
; CHECK-NEXT: v_mov_b32_e32 v1, 0x54
; CHECK-NEXT: v_ashrrev_i16 v0, 7, v0
; CHECK-LABEL: icmpasreq:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, 0, v0
; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; CHECK-NEXT: s_setpc_b64 s[30:31]
; CHECK-LABEL: icmpasrne:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, -1, v0
; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; CHECK-NEXT: s_setpc_b64 s[30:31]
; CHECK-LABEL: oneusecmp:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, 0, v0
; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v0
; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
; GFX11-LABEL: add_select_fabs_fabs_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-LABEL: add_select_multi_use_lhs_fabs_fabs_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; GFX11-NEXT: v_add_f16_e64 v1, |v1|, v3
; GFX11-LABEL: add_select_multi_store_use_lhs_fabs_fabs_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff, v1
; GFX11-LABEL: add_select_multi_use_rhs_fabs_fabs_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; GFX11-NEXT: v_add_f16_e64 v1, |v2|, v4
; GFX11-LABEL: add_select_fabs_var_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff, v1
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_fabs_negk_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff, v1
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_fabs_negk_negk_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v2, 0xc000
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_posk_posk_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v2, 0x4000
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_negk_fabs_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff, v1
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_negliteralk_fabs_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff, v1
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_fabs_posk_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-LABEL: add_select_posk_fabs_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-LABEL: add_select_fneg_fneg_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-LABEL: add_select_multi_use_lhs_fneg_fneg_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; GFX11-NEXT: v_sub_f16_e32 v1, v4, v1
; GFX11-LABEL: add_select_multi_store_use_lhs_fneg_fneg_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; GFX11-NEXT: v_xor_b32_e32 v1, 0x8000, v1
; GFX11-LABEL: add_select_multi_use_rhs_fneg_fneg_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; GFX11-NEXT: v_sub_f16_e32 v1, v4, v2
; GFX11-LABEL: add_select_fneg_var_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v1, 0x8000, v1
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_fneg_negk_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-LABEL: add_select_fneg_inv2pi_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xb118, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-LABEL: add_select_fneg_neginv2pi_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3118, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-LABEL: add_select_negk_negk_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v2, 0xc000
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_negliteralk_negliteralk_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v2, 0xe800
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_fneg_negk_negk_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v2, 0xc000
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_negk_fneg_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-LABEL: add_select_fneg_posk_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-LABEL: add_select_posk_fneg_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-LABEL: add_select_negfabs_fabs_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_or_b32_e32 v1, 0x8000, v1
; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff, v2
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-LABEL: add_select_fabs_negfabs_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff, v1
; GFX11-NEXT: v_or_b32_e32 v2, 0x8000, v2
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-LABEL: add_select_neg_fabs_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v1, 0x8000, v1
; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff, v2
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-LABEL: add_select_fabs_neg_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff, v1
; GFX11-NEXT: v_xor_b32_e32 v2, 0x8000, v2
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-LABEL: add_select_neg_negfabs_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff, v2
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_negfabs_neg_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff, v1
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-LABEL: mul_select_negfabs_posk_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_or_b32_e32 v1, 0x8000, v1
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-LABEL: mul_select_posk_negfabs_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_or_b32_e32 v1, 0x8000, v1
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-LABEL: mul_select_negfabs_negk_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_or_b32_e32 v1, 0x8000, v1
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-LABEL: mul_select_negk_negfabs_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_or_b32_e32 v1, 0x8000, v1
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SAFE-LABEL: select_fneg_posk_src_add_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_add_f16_e32 v1, 4.0, v1
; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NSZ-LABEL: select_fneg_posk_src_add_f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_sub_f16_e32 v1, -4.0, v1
; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-SAFE-LABEL: select_fneg_posk_src_sub_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_add_f16_e32 v1, -4.0, v1
; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NSZ-LABEL: select_fneg_posk_src_sub_f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_sub_f16_e32 v1, 4.0, v1
; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-LABEL: select_fneg_posk_src_mul_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mul_f16_e32 v1, -4.0, v1
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-SAFE-LABEL: select_fneg_posk_src_fma_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_fmac_f16_e32 v2, 4.0, v1
; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NSZ-LABEL: select_fneg_posk_src_fma_f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_fma_f16 v1, v1, -4.0, -v2
; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-SAFE-LABEL: select_fneg_posk_src_fmad_f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_fmac_f16_e32 v2, 4.0, v1
; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NSZ-LABEL: select_fneg_posk_src_fmad_f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_fma_f16 v1, v1, -4.0, -v2
; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-LABEL: add_select_fabs_fabs_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
; GFX11-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
; GFX11-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-LABEL: add_select_fabs_var_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v3
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX11-LABEL: add_select_fabs_negk_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-LABEL: add_select_fabs_negk_negk_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v3, 0xc000
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_posk_posk_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v3, 0x4000
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_negk_fabs_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-LABEL: add_select_negliteralk_fabs_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-LABEL: add_select_fabs_posk_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-LABEL: add_select_posk_fabs_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-LABEL: add_select_fneg_fneg_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v3
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX11-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v3
; GFX11-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v3
; GFX11-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v3
; GFX11-LABEL: add_select_fneg_var_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v3
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX11-LABEL: add_select_fneg_negk_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_fneg_inv2pi_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_fneg_neginv2pi_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_negk_negk_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v3, 0xc000
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_negliteralk_negliteralk_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v3, 0xe800
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_fneg_negk_negk_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_mov_b32_e32 v3, 0xc000
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_negk_fneg_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_fneg_posk_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_posk_fneg_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_negfabs_fabs_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_or_b32_e32 v2, 0x80008000, v2
; GFX11-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-LABEL: add_select_fabs_negfabs_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_or_b32_e32 v3, 0x80008000, v3
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_neg_fabs_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
; GFX11-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-LABEL: add_select_fabs_neg_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_xor_b32_e32 v3, 0x80008000, v3
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-LABEL: add_select_neg_negfabs_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX11-LABEL: add_select_negfabs_neg_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v3
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
; GFX11-LABEL: mul_select_negfabs_posk_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_or_b32_e32 v2, 0x80008000, v2
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-LABEL: mul_select_posk_negfabs_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_or_b32_e32 v2, 0x80008000, v2
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-LABEL: mul_select_negfabs_negk_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_or_b32_e32 v2, 0x80008000, v2
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-LABEL: mul_select_negk_negfabs_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_or_b32_e32 v2, 0x80008000, v2
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-SAFE-LABEL: select_fneg_posk_src_add_v2f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0]
; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NSZ-LABEL: select_fneg_posk_src_add_v2f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0]
; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NSZ-LABEL: select_fneg_posk_src_sub_v2f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-LABEL: select_fneg_posk_src_mul_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0]
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-SAFE-LABEL: select_fneg_posk_src_fma_v2f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NSZ-LABEL: select_fneg_posk_src_fma_v2f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16:
; GFX11-SAFE: ; %bb.0:
; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SAFE-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16:
; GFX11-NSZ: ; %bb.0:
; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NSZ-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; CHECK-LABEL: f:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: ds_read_b32 v0, v0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
}
define void @skip_mode_switch(i32 %arg) {
-; SI-LABEL: skip_mode_switch:
-; SI: ; %bb.0: ; %entry
-; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; SI-NEXT: s_cbranch_execz .LBB16_2
-; SI-NEXT: ; %bb.1: ; %bb.0
-; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
-; SI-NEXT: .LBB16_2: ; %bb.1
-; SI-NEXT: s_or_b64 exec, exec, s[4:5]
-; SI-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-WAVE64-LABEL: skip_mode_switch:
-; GFX10-WAVE64: ; %bb.0: ; %entry
-; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB16_2
-; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb.0
-; GFX10-WAVE64-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
-; GFX10-WAVE64-NEXT: .LBB16_2: ; %bb.1
-; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[4:5]
-; GFX10-WAVE64-NEXT: s_setpc_b64 s[30:31]
+; WAVE64-LABEL: skip_mode_switch:
+; WAVE64: ; %bb.0: ; %entry
+; WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; WAVE64-NEXT: s_cbranch_execz .LBB16_2
+; WAVE64-NEXT: ; %bb.1: ; %bb.0
+; WAVE64-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
+; WAVE64-NEXT: .LBB16_2: ; %bb.1
+; WAVE64-NEXT: s_or_b64 exec, exec, s[4:5]
+; WAVE64-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-WAVE32-LABEL: skip_mode_switch:
; GFX10-WAVE32: ; %bb.0: ; %entry
; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s4, vcc_lo
; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB16_2
; GFX11-LABEL: skip_mode_switch:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b64 s[0:1], exec
; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX11-NEXT: s_cbranch_execz .LBB16_2
; GFX10PLUS-LABEL: v_ssubsat_i8:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 8, v0
; GFX10PLUS-NEXT: v_sub_nc_i16 v0, v0, v1 clamp
; GFX10PLUS-LABEL: v_ssubsat_i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_i16 v0, v0, v1 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call i16 @llvm.ssub.sat.i16(i16 %lhs, i16 %rhs)
; GFX10PLUS-LABEL: v_ssubsat_i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v1 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call i32 @llvm.ssub.sat.i32(i32 %lhs, i32 %rhs)
; GFX10PLUS-LABEL: v_ssubsat_v2i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v1 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs)
; GFX10PLUS-LABEL: v_ssubsat_v3i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v2 clamp
; GFX10PLUS-NEXT: v_pk_sub_i16 v1, v1, v3 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_ssubsat_v4i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v2 clamp
; GFX10PLUS-NEXT: v_pk_sub_i16 v1, v1, v3 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_ssubsat_v2i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v2 clamp
; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v3 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_ssubsat_v3i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v3 clamp
; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v4 clamp
; GFX10PLUS-NEXT: v_sub_nc_i32 v2, v2, v5 clamp
; GFX10PLUS-LABEL: v_ssubsat_v4i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v4 clamp
; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v5 clamp
; GFX10PLUS-NEXT: v_sub_nc_i32 v2, v2, v6 clamp
; GFX10PLUS-LABEL: v_ssubsat_v8i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v8 clamp
; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v9 clamp
; GFX10PLUS-NEXT: v_sub_nc_i32 v2, v2, v10 clamp
; GFX10-LABEL: v_ssubsat_v16i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX10-NEXT: v_sub_nc_i32 v0, v0, v16 clamp
; GFX10-NEXT: v_sub_nc_i32 v1, v1, v17 clamp
; GFX11-LABEL: v_ssubsat_v16i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: v_sub_nc_i32 v0, v0, v16 clamp
; GFX11-NEXT: v_sub_nc_i32 v1, v1, v17 clamp
; GFX10-LABEL: v_ssubsat_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2
; GFX10-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
; GFX10-NEXT: v_cmp_lt_i64_e64 s4, 0, v[2:3]
; GFX11-LABEL: v_ssubsat_i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2
; GFX11-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
; GFX11-NEXT: v_cmp_lt_i64_e64 s0, 0, v[2:3]
; GFX10-LABEL: local_store_i56:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: ds_write_b8_d16_hi v0, v2 offset:6
; GFX10-NEXT: ds_write_b16 v0, v2 offset:4
; GFX10-NEXT: ds_write_b32 v0, v1
; GFX11-LABEL: local_store_i56:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_store_b8_d16_hi v0, v2 offset:6
; GFX11-NEXT: ds_store_b16 v0, v2 offset:4
; GFX11-NEXT: ds_store_b32 v0, v1
; GFX10-LABEL: local_store_i13:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v1, 0x1fff, v1
; GFX10-NEXT: ds_write_b16 v0, v1
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-LABEL: local_store_i13:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v1, 0x1fff, v1
; GFX11-NEXT: ds_store_b16 v0, v1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-LABEL: local_store_i17:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v2, 0x1ffff, v1
; GFX10-NEXT: ds_write_b16 v0, v1
; GFX10-NEXT: ds_write_b8_d16_hi v0, v2 offset:2
; GFX11-LABEL: local_store_i17:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v2, 0x1ffff, v1
; GFX11-NEXT: ds_store_b16 v0, v1
; GFX11-NEXT: ds_store_b8_d16_hi v0, v2 offset:2
; GFX10PLUS-LABEL: v_constained_fadd_f16_fpexcept_strict:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_f16_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call half @llvm.experimental.constrained.fadd.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
; GFX10PLUS-LABEL: v_constained_fadd_f16_fpexcept_ignore:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_f16_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call half @llvm.experimental.constrained.fadd.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
; GFX10PLUS-LABEL: v_constained_fadd_f16_fpexcept_maytrap:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_f16_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call half @llvm.experimental.constrained.fadd.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
; GFX10PLUS-LABEL: v_constained_fadd_v2f16_fpexcept_strict:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_add_f16 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x half> @llvm.experimental.constrained.fadd.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
; GFX10PLUS-LABEL: v_constained_fadd_v2f16_fpexcept_ignore:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_add_f16 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x half> @llvm.experimental.constrained.fadd.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
; GFX10PLUS-LABEL: v_constained_fadd_v2f16_fpexcept_maytrap:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_add_f16 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x half> @llvm.experimental.constrained.fadd.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
; GFX10PLUS-LABEL: v_constained_fadd_v3f16_fpexcept_strict:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_add_f16 v0, v0, v2
; GFX10PLUS-NEXT: v_add_f16_e32 v1, v1, v3
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_constained_fadd_v4f16_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10-NEXT: v_add_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10-NEXT: v_add_f16_e32 v0, v0, v2
; GFX11-LABEL: v_constained_fadd_v4f16_fpexcept_strict:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX10PLUS-LABEL: v_constained_fadd_f32_fpexcept_strict:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_f32_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
; GFX10PLUS-LABEL: v_constained_fadd_f32_fpexcept_ignore:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_f32_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
; GFX10PLUS-LABEL: v_constained_fadd_f32_fpexcept_maytrap:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_f32_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
; GFX10-LABEL: v_constained_fadd_v2f32_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f32_e32 v0, v0, v2
; GFX10-NEXT: v_add_f32_e32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_constained_fadd_v2f32_fpexcept_strict:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
; GFX10-LABEL: v_constained_fadd_v2f32_fpexcept_ignore:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f32_e32 v0, v0, v2
; GFX10-NEXT: v_add_f32_e32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_constained_fadd_v2f32_fpexcept_ignore:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
; GFX10-LABEL: v_constained_fadd_v2f32_fpexcept_maytrap:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f32_e32 v0, v0, v2
; GFX10-NEXT: v_add_f32_e32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_constained_fadd_v2f32_fpexcept_maytrap:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
; GFX10-LABEL: v_constained_fadd_v3f32_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f32_e32 v0, v0, v3
; GFX10-NEXT: v_add_f32_e32 v1, v1, v4
; GFX10-NEXT: v_add_f32_e32 v2, v2, v5
; GFX11-LABEL: v_constained_fadd_v3f32_fpexcept_strict:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_add_f32 v0, v0, v3 :: v_dual_add_f32 v1, v1, v4
; GFX11-NEXT: v_add_f32_e32 v2, v2, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_constained_fadd_f32_fpexcept_strict_fabs_lhs:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_f32_e64 v0, |v0|, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call float @llvm.fabs.f32(float %x)
; GFX10PLUS-LABEL: v_constained_fadd_f32_fpexcept_strict_fabs_rhs:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_add_f32_e64 v0, v0, |v1|
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%fabs.y = call float @llvm.fabs.f32(float %y)
; GFX10PLUS-LABEL: v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_f32_e64 v0, v1, |v0|
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call float @llvm.fabs.f32(float %x)
; GFX10-LABEL: v_constained_fadd_f64_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
; GFX10-LABEL: v_constained_fadd_f64_fpexcept_ignore:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
; GFX10-LABEL: v_constained_fadd_f64_fpexcept_maytrap:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
; GFX10-LABEL: v_constained_fadd_v2f64_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_constained_fadd_v2f64_fpexcept_ignore:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_constained_fadd_v2f64_fpexcept_maytrap:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_constained_fadd_v3f64_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[6:7]
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[8:9]
; GFX10-NEXT: v_add_f64 v[4:5], v[4:5], v[10:11]
; GFX10-LABEL: v_constained_fma_f16_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_constained_fma_f16_fpexcept_strict:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_f16 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call half @llvm.experimental.constrained.fma.f16(half %x, half %y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
; GFX10-LABEL: v_constained_fma_v2f16_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_constained_fma_v2f16_fpexcept_strict:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x half> @llvm.experimental.constrained.fma.v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
; GFX10-LABEL: v_constained_fma_v3f16_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_fma_f16 v0, v0, v2, v4
; GFX10-NEXT: v_fma_f16 v1, v1, v3, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_constained_fma_v3f16_fpexcept_strict:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_pk_fma_f16 v0, v0, v2, v4
; GFX11-NEXT: v_fma_f16 v1, v1, v3, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_constained_fma_v4f16_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v5
; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v3
; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1
; GFX11-LABEL: v_constained_fma_v4f16_fpexcept_strict:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v5
; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v3
; GFX11-NEXT: v_lshrrev_b32_e32 v8, 16, v1
; GFX10-LABEL: v_constained_fma_f16_fpexcept_strict_fneg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f16 v0, v0, v1, -v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_constained_fma_f16_fpexcept_strict_fneg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_f16 v0, v0, v1, -v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.z = fneg half %z
; GFX10-LABEL: v_constained_fma_f16_fpexcept_strict_fneg_fneg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f16 v0, -v0, -v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_constained_fma_f16_fpexcept_strict_fneg_fneg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_f16 v0, -v0, -v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.x = fneg half %x
; GFX10-LABEL: v_constained_fma_f16_fpexcept_strict_fabs_fabs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f16 v0, |v0|, |v1|, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_constained_fma_f16_fpexcept_strict_fabs_fabs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_fma_f16 v0, |v0|, |v1|, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.x = call half @llvm.fabs.f16(half %x)
; GFX10-LABEL: v_constained_fma_v2f16_fpexcept_strict_fneg_fneg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,1,0] neg_hi:[1,1,0]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_constained_fma_v2f16_fpexcept_strict_fneg_fneg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,1,0] neg_hi:[1,1,0]
; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.x = fneg <2 x half> %x
; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f32 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = call float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
; GFX10-LABEL: v_constained_fma_v2f32_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f32 v0, v0, v2, v4
; GFX10-NEXT: v_fma_f32 v1, v1, v3, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_constained_fma_v3f32_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f32 v0, v0, v3, v6
; GFX10-NEXT: v_fma_f32 v1, v1, v4, v7
; GFX10-NEXT: v_fma_f32 v2, v2, v5, v8
; GFX10-LABEL: v_constained_fma_v4f32_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f32 v0, v0, v4, v8
; GFX10-NEXT: v_fma_f32 v1, v1, v5, v9
; GFX10-NEXT: v_fma_f32 v2, v2, v6, v10
; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict_fneg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f32 v0, v0, v1, -v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.z = fneg float %z
; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict_fneg_fneg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f32 v0, -v0, -v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.x = fneg float %x
; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict_fabs_fabs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f32 v0, |v0|, |v1|, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.x = call float @llvm.fabs.f32(float %x)
; GFX10-LABEL: v_constained_fma_v2f32_fpexcept_strict_fneg_fneg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f32 v0, -v0, -v2, v4
; GFX10-NEXT: v_fma_f32 v1, -v1, -v3, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_constained_fma_f64_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = call double @llvm.experimental.constrained.fma.f64(double %x, double %y, double %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
; GFX10-LABEL: v_constained_fma_v2f64_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_constained_fma_v3f64_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13]
; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15]
; GFX10-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17]
; GFX10-LABEL: v_constained_fma_v4f64_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17]
; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19]
; GFX10-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21]
; GFX10-LABEL: v_constained_fma_f64_fpexcept_strict_fneg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.z = fneg double %z
; GFX10-LABEL: v_constained_fma_f64_fpexcept_strict_fneg_fneg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f64 v[0:1], -v[0:1], -v[2:3], v[4:5]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.x = fneg double %x
; GFX10-LABEL: v_constained_fma_f64_fpexcept_strict_fabs_fabs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f64 v[0:1], |v[0:1]|, |v[2:3]|, v[4:5]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.x = call double @llvm.fabs.f64(double %x)
; GFX10-LABEL: v_constained_fma_v2f64_fpexcept_strict_fneg_fneg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f64 v[0:1], -v[0:1], -v[4:5], v[8:9]
; GFX10-NEXT: v_fma_f64 v[2:3], -v[2:3], -v[6:7], v[10:11]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_constained_fmul_f16_fpexcept_strict:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
; GFX10PLUS-LABEL: v_constained_fmul_f16_fpexcept_ignore:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
; GFX10PLUS-LABEL: v_constained_fmul_f16_fpexcept_maytrap:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
; GFX10PLUS-LABEL: v_constained_fmul_v2f16_fpexcept_strict:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_mul_f16 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
; GFX10PLUS-LABEL: v_constained_fmul_v2f16_fpexcept_ignore:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_mul_f16 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
; GFX10PLUS-LABEL: v_constained_fmul_v2f16_fpexcept_maytrap:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_mul_f16 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
; GFX10-SDAG-LABEL: v_constained_fmul_v3f16_fpexcept_strict:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-SDAG-NEXT: v_mul_f16_e32 v1, v1, v3
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
; GFX10-GISEL-LABEL: v_constained_fmul_v3f16_fpexcept_strict:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-GISEL-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX11-SDAG-LABEL: v_constained_fmul_v3f16_fpexcept_strict:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX11-SDAG-NEXT: v_mul_f16_e32 v1, v1, v3
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
; GFX1-GISEL-LABEL: v_constained_fmul_v3f16_fpexcept_strict:
; GFX1-GISEL: ; %bb.0:
; GFX1-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1-GISEL-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX1-GISEL-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX1-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX10-SDAG-LABEL: v_constained_fmul_v4f16_fpexcept_strict:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_mul_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10-SDAG-NEXT: v_mul_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10-SDAG-NEXT: v_mul_f16_e32 v0, v0, v2
; GFX10-GISEL-LABEL: v_constained_fmul_v4f16_fpexcept_strict:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-GISEL-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX11-SDAG-LABEL: v_constained_fmul_v4f16_fpexcept_strict:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX1-GISEL-LABEL: v_constained_fmul_v4f16_fpexcept_strict:
; GFX1-GISEL: ; %bb.0:
; GFX1-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1-GISEL-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX1-GISEL-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX1-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_strict:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_ignore:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_maytrap:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
; GFX10-LABEL: v_constained_fmul_v2f32_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2
; GFX10-NEXT: v_mul_f32_e32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_constained_fmul_v2f32_fpexcept_strict:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
; GFX10-LABEL: v_constained_fmul_v2f32_fpexcept_ignore:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2
; GFX10-NEXT: v_mul_f32_e32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_constained_fmul_v2f32_fpexcept_ignore:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
; GFX10-LABEL: v_constained_fmul_v2f32_fpexcept_maytrap:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2
; GFX10-NEXT: v_mul_f32_e32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_constained_fmul_v2f32_fpexcept_maytrap:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
; GFX10-LABEL: v_constained_fmul_v3f32_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v3
; GFX10-NEXT: v_mul_f32_e32 v1, v1, v4
; GFX10-NEXT: v_mul_f32_e32 v2, v2, v5
; GFX11-LABEL: v_constained_fmul_v3f32_fpexcept_strict:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mul_f32 v0, v0, v3 :: v_dual_mul_f32 v1, v1, v4
; GFX11-NEXT: v_mul_f32_e32 v2, v2, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_strict_fabs_lhs:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_mul_f32_e64 v0, |v0|, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call float @llvm.fabs.f32(float %x)
; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_strict_fabs_rhs:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_mul_f32_e64 v0, v0, |v1|
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%fabs.y = call float @llvm.fabs.f32(float %y)
; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_strict_fneg_fabs_lhs:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_mul_f32_e64 v0, -|v0|, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call float @llvm.fabs.f32(float %x)
; GFX10-LABEL: v_constained_fmul_f64_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = call double @llvm.experimental.constrained.fmul.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
; GFX10-LABEL: v_constained_fmul_f64_fpexcept_ignore:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = call double @llvm.experimental.constrained.fmul.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
; GFX10-LABEL: v_constained_fmul_f64_fpexcept_maytrap:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = call double @llvm.experimental.constrained.fmul.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
; GFX10-LABEL: v_constained_fmul_v2f64_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[6:7]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_constained_fmul_v2f64_fpexcept_ignore:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[6:7]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_constained_fmul_v2f64_fpexcept_maytrap:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[6:7]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_constained_fmul_v3f64_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[6:7]
; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
; GFX10-NEXT: v_mul_f64 v[4:5], v[4:5], v[10:11]
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
define float @v_constrained_fpext_f16_to_f32_fpexcept_strict(half %arg) #0 {
-; GFX89-LABEL: v_constrained_fpext_f16_to_f32_fpexcept_strict:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: v_constrained_fpext_f16_to_f32_fpexcept_strict:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX1011-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_constrained_fpext_f16_to_f32_fpexcept_strict:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg, metadata !"fpexcept.strict")
ret float %result
}
; GFX10-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v0
; GFX10-NEXT: v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX10-NEXT: v_mov_b32_e32 v0, v2
; GFX11-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX10-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_f16_e32 v4, v0
; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v1
; GFX11-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v2
}
define double @v_constrained_fpext_f32_to_f64_fpexcept_strict(float %arg) #0 {
-; GFX89-LABEL: v_constrained_fpext_f32_to_f64_fpexcept_strict:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: v_constrained_fpext_f32_to_f64_fpexcept_strict:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX1011-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_constrained_fpext_f32_to_f64_fpexcept_strict:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
%result = call double @llvm.experimental.constrained.fpext.f64.f32(float %arg, metadata !"fpexcept.strict")
ret double %result
}
define <2 x double> @v_constrained_fpext_v2f32_to_v2f64_fpexcept_strict(<2 x float> %arg) #0 {
-; GFX89-LABEL: v_constrained_fpext_v2f32_to_v2f64_fpexcept_strict:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_mov_b32_e32 v2, v1
-; GFX89-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
-; GFX89-NEXT: v_cvt_f64_f32_e32 v[2:3], v2
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: v_constrained_fpext_v2f32_to_v2f64_fpexcept_strict:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX1011-NEXT: v_mov_b32_e32 v2, v1
-; GFX1011-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
-; GFX1011-NEXT: v_cvt_f64_f32_e32 v[2:3], v2
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_constrained_fpext_v2f32_to_v2f64_fpexcept_strict:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v2, v1
+; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
+; GCN-NEXT: v_cvt_f64_f32_e32 v[2:3], v2
+; GCN-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float> %arg, metadata !"fpexcept.strict")
ret <2 x double> %result
}
; GFX1011-LABEL: v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1011-NEXT: v_mov_b32_e32 v4, v2
; GFX1011-NEXT: v_mov_b32_e32 v2, v1
; GFX1011-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
}
define double @v_constrained_fpext_f16_to_f64_fpexcept_strict(half %arg) #0 {
-; GFX89-LABEL: v_constrained_fpext_f16_to_f64_fpexcept_strict:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX89-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: v_constrained_fpext_f16_to_f64_fpexcept_strict:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX1011-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX1011-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_constrained_fpext_f16_to_f64_fpexcept_strict:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
%result = call double @llvm.experimental.constrained.fpext.f64.f16(half %arg, metadata !"fpexcept.strict")
ret double %result
}
; GFX10-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v0
; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX10-NEXT: v_cvt_f64_f32_e32 v[0:1], v1
; GFX11-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v1
; GFX10-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v0
; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX10-NEXT: v_cvt_f32_f16_e32 v4, v1
; GFX11-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v1
}
define float @v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict(half %arg) #0 {
-; GFX89-LABEL: v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX89-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX1011-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX1011-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg, metadata !"fpexcept.strict")
%neg.result = fneg float %result
ret float %neg.result
}
define float @v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict(half %arg) #0 {
-; GFX89-LABEL: v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_cvt_f32_f16_e64 v0, -v0
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX1011-NEXT: v_cvt_f32_f16_e64 v0, -v0
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cvt_f32_f16_e64 v0, -v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
%neg.arg = fneg half %arg
%result = call float @llvm.experimental.constrained.fpext.f32.f16(half %neg.arg, metadata !"fpexcept.strict")
ret float %result
}
define double @v_constrained_fpext_fneg_f32_to_f64_fpexcept_strict(float %arg) #0 {
-; GFX89-LABEL: v_constrained_fpext_fneg_f32_to_f64_fpexcept_strict:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: v_constrained_fpext_fneg_f32_to_f64_fpexcept_strict:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX1011-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_constrained_fpext_fneg_f32_to_f64_fpexcept_strict:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
%neg.arg = fneg float %arg
%result = call double @llvm.experimental.constrained.fpext.f64.f32(float %arg, metadata !"fpexcept.strict")
ret double %result
}
define double @v_constrained_fneg_fpext_f32_to_f64_fpexcept_strict(float %arg) #0 {
-; GFX89-LABEL: v_constrained_fneg_fpext_f32_to_f64_fpexcept_strict:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
-; GFX89-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: v_constrained_fneg_fpext_f32_to_f64_fpexcept_strict:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX1011-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
-; GFX1011-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_constrained_fneg_fpext_f32_to_f64_fpexcept_strict:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
+; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
+; GCN-NEXT: s_setpc_b64 s[30:31]
%result = call double @llvm.experimental.constrained.fpext.f64.f32(float %arg, metadata !"fpexcept.strict")
%neg.result = fneg double %result
ret double %neg.result
attributes #0 = { strictfp }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GCN: {{.*}}
; GFX8: {{.*}}
; GFX9: {{.*}}
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
define half @v_constrained_fptrunc_f32_to_f16_fpexcept_strict(float %arg) #0 {
-; GFX89-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX1011-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
%val = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret half %val
}
; GFX1011-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1011-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX1011-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX1011-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX1011-LABEL: v_constrained_fptrunc_v3f32_to_v3f16_fpexcept_strict:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1011-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX1011-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX1011-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
}
define float @v_constrained_fptrunc_f64_to_f32_fpexcept_strict(double %arg) #0 {
-; GFX89-LABEL: v_constrained_fptrunc_f64_to_f32_fpexcept_strict:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: v_constrained_fptrunc_f64_to_f32_fpexcept_strict:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX1011-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_constrained_fptrunc_f64_to_f32_fpexcept_strict:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
+; GCN-NEXT: s_setpc_b64 s[30:31]
%val = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret float %val
}
define <2 x float> @v_constrained_fptrunc_v2f64_to_v2f32_fpexcept_strict(<2 x double> %arg) #0 {
-; GFX89-LABEL: v_constrained_fptrunc_v2f64_to_v2f32_fpexcept_strict:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
-; GFX89-NEXT: v_cvt_f32_f64_e32 v1, v[2:3]
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: v_constrained_fptrunc_v2f64_to_v2f32_fpexcept_strict:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX1011-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
-; GFX1011-NEXT: v_cvt_f32_f64_e32 v1, v[2:3]
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_constrained_fptrunc_v2f64_to_v2f32_fpexcept_strict:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
+; GCN-NEXT: v_cvt_f32_f64_e32 v1, v[2:3]
+; GCN-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret <2 x float> %val
}
define <3 x float> @v_constrained_fptrunc_v3f64_to_v3f32_fpexcept_strict(<3 x double> %arg) #0 {
-; GFX89-LABEL: v_constrained_fptrunc_v3f64_to_v3f32_fpexcept_strict:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
-; GFX89-NEXT: v_cvt_f32_f64_e32 v1, v[2:3]
-; GFX89-NEXT: v_cvt_f32_f64_e32 v2, v[4:5]
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: v_constrained_fptrunc_v3f64_to_v3f32_fpexcept_strict:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX1011-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
-; GFX1011-NEXT: v_cvt_f32_f64_e32 v1, v[2:3]
-; GFX1011-NEXT: v_cvt_f32_f64_e32 v2, v[4:5]
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_constrained_fptrunc_v3f64_to_v3f32_fpexcept_strict:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
+; GCN-NEXT: v_cvt_f32_f64_e32 v1, v[2:3]
+; GCN-NEXT: v_cvt_f32_f64_e32 v2, v[4:5]
+; GCN-NEXT: s_setpc_b64 s[30:31]
%val = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret <3 x float> %val
}
; }
define half @v_constrained_fneg_fptrunc_f32_to_f16_fpexcept_strict(float %arg) #0 {
-; GFX89-LABEL: v_constrained_fneg_fptrunc_f32_to_f16_fpexcept_strict:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX89-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: v_constrained_fneg_fptrunc_f32_to_f16_fpexcept_strict:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX1011-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX1011-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_constrained_fneg_fptrunc_f32_to_f16_fpexcept_strict:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GCN-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
%val = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
%neg.val = fneg half %val
ret half %neg.val
}
define half @v_constrained_fptrunc_fneg_f32_to_f16_fpexcept_strict(float %arg) #0 {
-; GFX89-LABEL: v_constrained_fptrunc_fneg_f32_to_f16_fpexcept_strict:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_cvt_f16_f32_e64 v0, -v0
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: v_constrained_fptrunc_fneg_f32_to_f16_fpexcept_strict:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX1011-NEXT: v_cvt_f16_f32_e64 v0, -v0
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_constrained_fptrunc_fneg_f32_to_f16_fpexcept_strict:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cvt_f16_f32_e64 v0, -v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
%neg.arg = fneg float %arg
%val = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %neg.arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret half %val
}
define float @v_constrained_fneg_fptrunc_f64_to_f32_fpexcept_strict(double %arg) #0 {
-; GFX89-LABEL: v_constrained_fneg_fptrunc_f64_to_f32_fpexcept_strict:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
-; GFX89-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: v_constrained_fneg_fptrunc_f64_to_f32_fpexcept_strict:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX1011-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
-; GFX1011-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_constrained_fneg_fptrunc_f64_to_f32_fpexcept_strict:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
+; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
%val = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
%neg.val = fneg float %val
ret float %neg.val
}
define float @v_constrained_fptrunc_fneg_f64_to_f32_fpexcept_strict(double %arg) #0 {
-; GFX89-LABEL: v_constrained_fptrunc_fneg_f64_to_f32_fpexcept_strict:
-; GFX89: ; %bb.0:
-; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_cvt_f32_f64_e64 v0, -v[0:1]
-; GFX89-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: v_constrained_fptrunc_fneg_f64_to_f32_fpexcept_strict:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX1011-NEXT: v_cvt_f32_f64_e64 v0, -v[0:1]
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_constrained_fptrunc_fneg_f64_to_f32_fpexcept_strict:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cvt_f32_f64_e64 v0, -v[0:1]
+; GCN-NEXT: s_setpc_b64 s[30:31]
%neg.arg = fneg double %arg
%val = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %neg.arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret float %val
attributes #0 = { strictfp }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GCN: {{.*}}
; GFX10: {{.*}}
; GFX11: {{.*}}
+; GFX89: {{.*}}
; GFX10PLUS-LABEL: v_constained_fsub_f16_fpexcept_strict:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_f16_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call half @llvm.experimental.constrained.fsub.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
; GFX10PLUS-LABEL: v_constained_fsub_f16_fpexcept_ignore:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_f16_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call half @llvm.experimental.constrained.fsub.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
; GFX10PLUS-LABEL: v_constained_fsub_f16_fpexcept_maytrap:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_f16_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call half @llvm.experimental.constrained.fsub.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
; GFX10-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_strict:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10-SDAG-NEXT: v_sub_f16_e32 v0, v0, v1
; GFX10-SDAG-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
; GFX10-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_strict:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10PLUS-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_strict:
; GFX10PLUS-SDAG: ; %bb.0:
; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX10PLUS-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX10PLUS-SDAG-NEXT: v_sub_f16_e32 v0, v0, v1
; GFX10PLUS-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_strict:
; GFX10PLUS-GISEL: ; %bb.0:
; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-GISEL-NEXT: v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
; GFX10PLUS-GISEL-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x half> @llvm.experimental.constrained.fsub.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
; GFX10-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_ignore:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10-SDAG-NEXT: v_sub_f16_e32 v0, v0, v1
; GFX10-SDAG-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
; GFX10-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_ignore:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10PLUS-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_ignore:
; GFX10PLUS-SDAG: ; %bb.0:
; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX10PLUS-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX10PLUS-SDAG-NEXT: v_sub_f16_e32 v0, v0, v1
; GFX10PLUS-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_ignore:
; GFX10PLUS-GISEL: ; %bb.0:
; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-GISEL-NEXT: v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
; GFX10PLUS-GISEL-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x half> @llvm.experimental.constrained.fsub.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
; GFX10-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10-SDAG-NEXT: v_sub_f16_e32 v0, v0, v1
; GFX10-SDAG-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
; GFX10-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10PLUS-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap:
; GFX10PLUS-SDAG: ; %bb.0:
; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX10PLUS-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX10PLUS-SDAG-NEXT: v_sub_f16_e32 v0, v0, v1
; GFX10PLUS-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap:
; GFX10PLUS-GISEL: ; %bb.0:
; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-GISEL-NEXT: v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
; GFX10PLUS-GISEL-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x half> @llvm.experimental.constrained.fsub.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
; GFX10-SDAG-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_sub_f16_sdwa v4, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10-SDAG-NEXT: v_sub_f16_e32 v0, v0, v2
; GFX10-SDAG-NEXT: v_sub_f16_e32 v1, v1, v3
; GFX10-GISEL-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_sub_f16_e32 v4, v0, v2
; GFX10-GISEL-NEXT: v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10-GISEL-NEXT: v_sub_f16_e32 v1, v1, v3
; GFX10PLUS-SDAG-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
; GFX10PLUS-SDAG: ; %bb.0:
; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v2
; GFX10PLUS-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v0
; GFX10PLUS-SDAG-NEXT: v_sub_f16_e32 v0, v0, v2
; GFX10PLUS-GISEL-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
; GFX10PLUS-GISEL: ; %bb.0:
; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX10PLUS-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX10PLUS-GISEL-NEXT: v_sub_f16_e32 v0, v0, v2
; GFX10-SDAG-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_sub_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10-SDAG-NEXT: v_sub_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10-SDAG-NEXT: v_sub_f16_e32 v0, v0, v2
; GFX10-GISEL-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_sub_f16_e32 v4, v0, v2
; GFX10-GISEL-NEXT: v_sub_f16_e32 v5, v1, v3
; GFX10-GISEL-NEXT: v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10PLUS-SDAG-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
; GFX10PLUS-SDAG: ; %bb.0:
; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX10PLUS-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX10PLUS-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX10PLUS-GISEL-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
; GFX10PLUS-GISEL: ; %bb.0:
; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX10PLUS-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v1
; GFX10PLUS-GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_ignore:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_maytrap:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
; GFX10-LABEL: v_constained_fsub_v2f32_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX10-NEXT: v_sub_f32_e32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_constained_fsub_v2f32_fpexcept_strict:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
; GFX10-LABEL: v_constained_fsub_v2f32_fpexcept_ignore:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX10-NEXT: v_sub_f32_e32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_constained_fsub_v2f32_fpexcept_ignore:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
; GFX10-LABEL: v_constained_fsub_v2f32_fpexcept_maytrap:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX10-NEXT: v_sub_f32_e32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: v_constained_fsub_v2f32_fpexcept_maytrap:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
; GFX10-LABEL: v_constained_fsub_v3f32_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_sub_f32_e32 v0, v0, v3
; GFX10-NEXT: v_sub_f32_e32 v1, v1, v4
; GFX10-NEXT: v_sub_f32_e32 v2, v2, v5
; GFX11-LABEL: v_constained_fsub_v3f32_fpexcept_strict:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_sub_f32 v0, v0, v3 :: v_dual_sub_f32 v1, v1, v4
; GFX11-NEXT: v_sub_f32_e32 v2, v2, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_lhs:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_f32_e64 v0, |v0|, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call float @llvm.fabs.f32(float %x)
; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_rhs:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_f32_e64 v0, v0, |v1|
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%fabs.y = call float @llvm.fabs.f32(float %y)
; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_f32_e64 v0, -|v0|, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call float @llvm.fabs.f32(float %x)
; GFX10-LABEL: v_constained_fsub_f64_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = call double @llvm.experimental.constrained.fsub.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
; GFX10-LABEL: v_constained_fsub_f64_fpexcept_ignore:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = call double @llvm.experimental.constrained.fsub.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
; GFX10-LABEL: v_constained_fsub_f64_fpexcept_maytrap:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = call double @llvm.experimental.constrained.fsub.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
; GFX10-LABEL: v_constained_fsub_v2f64_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5]
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_constained_fsub_v2f64_fpexcept_ignore:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5]
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_constained_fsub_v2f64_fpexcept_maytrap:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5]
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7]
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_constained_fsub_v3f64_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[6:7]
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], -v[8:9]
; GFX10-NEXT: v_add_f64 v[4:5], v[4:5], -v[10:11]
; GFX11-SDAG-LABEL: test_ldexp_f16_i32:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-NEXT: v_med3_i32 v0, v3, s0, 0x7fff
; GFX11-GISEL-LABEL: test_ldexp_f16_i32:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x7fff
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v3, v0
; GFX11-SDAG-LABEL: test_ldexp_v2f16_v2i32:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-SDAG-NEXT: v_med3_i32 v0, v3, s0, 0x7fff
; GFX11-GISEL-LABEL: test_ldexp_v2f16_v2i32:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x7fff
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v3, v0
; GFX11-SDAG-LABEL: test_ldexp_v3f16_v3i32:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-SDAG-NEXT: v_med3_i32 v0, v4, s0, 0x7fff
; GFX11-GISEL-LABEL: test_ldexp_v3f16_v3i32:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x7fff
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v4, v0
; GFX11-SDAG-LABEL: test_ldexp_v4f16_v4i32:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-SDAG-NEXT: v_med3_i32 v0, v6, s0, 0x7fff
; GFX11-GISEL-LABEL: test_ldexp_v4f16_v4i32:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x7fff
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v2
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v3
; GFX11-LABEL: test_ldexp_f32_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_ldexp_f32 v0, v2, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.experimental.constrained.ldexp.f32.i32(float %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
; GFX11-LABEL: test_ldexp_v2f32_v2i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_ldexp_f32 v0, v2, v4
; GFX11-NEXT: v_ldexp_f32 v1, v3, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: test_ldexp_v3f32_v3i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_ldexp_f32 v0, v2, v5
; GFX11-NEXT: v_ldexp_f32 v1, v3, v6
; GFX11-NEXT: v_ldexp_f32 v2, v4, v7
; GFX11-LABEL: test_ldexp_v4f32_v4i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_ldexp_f32 v0, v2, v6
; GFX11-NEXT: v_ldexp_f32 v1, v3, v7
; GFX11-NEXT: v_ldexp_f32 v2, v4, v8
; }
define double @test_ldexp_f64_i32(ptr addrspace(1) %out, double %a, i32 %b) #0 {
-; GFX6-LABEL: test_ldexp_f64_i32:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_ldexp_f64 v[0:1], v[2:3], v4
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: test_ldexp_f64_i32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_ldexp_f64 v[0:1], v[2:3], v4
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: test_ldexp_f64_i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_ldexp_f64 v[0:1], v[2:3], v4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: test_ldexp_f64_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: v_ldexp_f64 v[0:1], v[2:3], v4
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: test_ldexp_f64_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_ldexp_f64 v[0:1], v[2:3], v4
+; GCN-NEXT: s_setpc_b64 s[30:31]
%result = call double @llvm.experimental.constrained.ldexp.f64.i32(double %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret double %result
}
; }
define <2 x double> @test_ldexp_v2f64_v2i32(ptr addrspace(1) %out, <2 x double> %a, <2 x i32> %b) #0 {
-; GFX6-LABEL: test_ldexp_v2f64_v2i32:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_ldexp_f64 v[0:1], v[2:3], v6
-; GFX6-NEXT: v_ldexp_f64 v[2:3], v[4:5], v7
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: test_ldexp_v2f64_v2i32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_ldexp_f64 v[0:1], v[2:3], v6
-; GFX8-NEXT: v_ldexp_f64 v[2:3], v[4:5], v7
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: test_ldexp_v2f64_v2i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_ldexp_f64 v[0:1], v[2:3], v6
-; GFX9-NEXT: v_ldexp_f64 v[2:3], v[4:5], v7
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: test_ldexp_v2f64_v2i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: v_ldexp_f64 v[0:1], v[2:3], v6
-; GFX11-NEXT: v_ldexp_f64 v[2:3], v[4:5], v7
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: test_ldexp_v2f64_v2i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_ldexp_f64 v[0:1], v[2:3], v6
+; GCN-NEXT: v_ldexp_f64 v[2:3], v[4:5], v7
+; GCN-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x double> @llvm.experimental.constrained.ldexp.v2f64.v2i32(<2 x double> %a, <2 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <2 x double> %result
}
define <3 x double> @test_ldexp_v3f64_v3i32(ptr addrspace(1) %out, <3 x double> %a, <3 x i32> %b) #0 {
-; GFX6-LABEL: test_ldexp_v3f64_v3i32:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_ldexp_f64 v[0:1], v[2:3], v8
-; GFX6-NEXT: v_ldexp_f64 v[2:3], v[4:5], v9
-; GFX6-NEXT: v_ldexp_f64 v[4:5], v[6:7], v10
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: test_ldexp_v3f64_v3i32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_ldexp_f64 v[0:1], v[2:3], v8
-; GFX8-NEXT: v_ldexp_f64 v[2:3], v[4:5], v9
-; GFX8-NEXT: v_ldexp_f64 v[4:5], v[6:7], v10
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: test_ldexp_v3f64_v3i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_ldexp_f64 v[0:1], v[2:3], v8
-; GFX9-NEXT: v_ldexp_f64 v[2:3], v[4:5], v9
-; GFX9-NEXT: v_ldexp_f64 v[4:5], v[6:7], v10
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: test_ldexp_v3f64_v3i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: v_ldexp_f64 v[0:1], v[2:3], v8
-; GFX11-NEXT: v_ldexp_f64 v[2:3], v[4:5], v9
-; GFX11-NEXT: v_ldexp_f64 v[4:5], v[6:7], v10
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: test_ldexp_v3f64_v3i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_ldexp_f64 v[0:1], v[2:3], v8
+; GCN-NEXT: v_ldexp_f64 v[2:3], v[4:5], v9
+; GCN-NEXT: v_ldexp_f64 v[4:5], v[6:7], v10
+; GCN-NEXT: s_setpc_b64 s[30:31]
%result = call <3 x double> @llvm.experimental.constrained.ldexp.v3f64.v3i32(<3 x double> %a, <3 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <3 x double> %result
}
define <4 x double> @test_ldexp_v4f64_v4i32(ptr addrspace(1) %out, <4 x double> %a, <4 x i32> %b) #0 {
-; GFX6-LABEL: test_ldexp_v4f64_v4i32:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_ldexp_f64 v[0:1], v[2:3], v10
-; GFX6-NEXT: v_ldexp_f64 v[2:3], v[4:5], v11
-; GFX6-NEXT: v_ldexp_f64 v[4:5], v[6:7], v12
-; GFX6-NEXT: v_ldexp_f64 v[6:7], v[8:9], v13
-; GFX6-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: test_ldexp_v4f64_v4i32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_ldexp_f64 v[0:1], v[2:3], v10
-; GFX8-NEXT: v_ldexp_f64 v[2:3], v[4:5], v11
-; GFX8-NEXT: v_ldexp_f64 v[4:5], v[6:7], v12
-; GFX8-NEXT: v_ldexp_f64 v[6:7], v[8:9], v13
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: test_ldexp_v4f64_v4i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_ldexp_f64 v[0:1], v[2:3], v10
-; GFX9-NEXT: v_ldexp_f64 v[2:3], v[4:5], v11
-; GFX9-NEXT: v_ldexp_f64 v[4:5], v[6:7], v12
-; GFX9-NEXT: v_ldexp_f64 v[6:7], v[8:9], v13
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: test_ldexp_v4f64_v4i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: v_ldexp_f64 v[0:1], v[2:3], v10
-; GFX11-NEXT: v_ldexp_f64 v[2:3], v[4:5], v11
-; GFX11-NEXT: v_ldexp_f64 v[4:5], v[6:7], v12
-; GFX11-NEXT: v_ldexp_f64 v[6:7], v[8:9], v13
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: test_ldexp_v4f64_v4i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_ldexp_f64 v[0:1], v[2:3], v10
+; GCN-NEXT: v_ldexp_f64 v[2:3], v[4:5], v11
+; GCN-NEXT: v_ldexp_f64 v[4:5], v[6:7], v12
+; GCN-NEXT: v_ldexp_f64 v[6:7], v[8:9], v13
+; GCN-NEXT: s_setpc_b64 s[30:31]
%result = call <4 x double> @llvm.experimental.constrained.ldexp.v4f64.v4i32(<4 x double> %a, <4 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <4 x double> %result
}
attributes #0 = { strictfp }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GCN: {{.*}}
+; GFX11: {{.*}}
; GFX11-GISEL: {{.*}}
; GFX11-SDAG: {{.*}}
+; GFX6: {{.*}}
; GFX6-GISEL: {{.*}}
; GFX6-SDAG: {{.*}}
+; GFX8: {{.*}}
; GFX8-GISEL: {{.*}}
; GFX8-SDAG: {{.*}}
+; GFX9: {{.*}}
; GFX9-GISEL: {{.*}}
; GFX9-SDAG: {{.*}}
; GFX10-LABEL: v_uaddsat_i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX10-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX10-NEXT: v_add_nc_u16 v0, v0, v1
; GFX10-LABEL: v_uaddsat_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u16 v0, v0, v1 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%result = call i16 @llvm.uadd.sat.i16(i16 %lhs, i16 %rhs)
; GFX10-LABEL: v_uaddsat_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v1 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%result = call i32 @llvm.uadd.sat.i32(i32 %lhs, i32 %rhs)
; GFX10-LABEL: v_uaddsat_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs)
; GFX10-LABEL: v_uaddsat_v3i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_add_u16 v0, v0, v2 clamp
; GFX10-NEXT: v_pk_add_u16 v1, v1, v3 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_uaddsat_v4i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_add_u16 v0, v0, v2 clamp
; GFX10-NEXT: v_pk_add_u16 v1, v1, v3 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_uaddsat_v2i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v2 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v1, v1, v3 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_uaddsat_v3i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v3 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v1, v1, v4 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v2, v2, v5 clamp
; GFX10-LABEL: v_uaddsat_v4i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v4 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v1, v1, v5 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v2, v2, v6 clamp
; GFX10-LABEL: v_uaddsat_v8i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v8 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v1, v1, v9 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v2, v2, v10 clamp
; GFX10-LABEL: v_uaddsat_v16i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v16 clamp
; GFX10-NEXT: v_add_nc_u32_e64 v1, v1, v17 clamp
; GFX10-LABEL: v_uaddsat_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2
; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo
; GFX10-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1]
; GFX1030-LABEL: v_test_udiv64_mulhi_fold:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1030-NEXT: s_mov_b32 s4, 0x346d900
; GFX1030-NEXT: s_add_u32 s4, 0x4237, s4
; GFX1030-NEXT: s_addc_u32 s5, 0, 0
; GFX10PLUS-LABEL: v_usubsat_i8:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX10PLUS-NEXT: v_sub_nc_u16 v0, v0, v1 clamp
; GFX10PLUS-LABEL: v_usubsat_i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_u16 v0, v0, v1 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call i16 @llvm.usub.sat.i16(i16 %lhs, i16 %rhs)
; GFX10PLUS-LABEL: usubsat_as_bithack_i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_u16 v0, v0, 0x8000 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%signsplat = ashr i16 %x, 15
; GFX10PLUS-LABEL: usubsat_as_bithack2_i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_u16 v0, v0, 0x8000 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%signsplat = ashr i16 %x, 15
; GFX10PLUS-LABEL: usubsat_as_bithack_commute_i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_u16 v0, v0, 0x8000 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%signsplat = ashr i16 %x, 15
; GFX10PLUS-LABEL: v_usubsat_i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v0, v0, v1 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call i32 @llvm.usub.sat.i32(i32 %lhs, i32 %rhs)
; GFX10PLUS-LABEL: v_usubsat_v2i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_sub_u16 v0, v0, v1 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs)
; GFX10PLUS-LABEL: v_usubsat_v3i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_sub_u16 v0, v0, v2 clamp
; GFX10PLUS-NEXT: v_pk_sub_u16 v1, v1, v3 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_usubsat_v4i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_pk_sub_u16 v0, v0, v2 clamp
; GFX10PLUS-NEXT: v_pk_sub_u16 v1, v1, v3 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_usubsat_v2i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v0, v0, v2 clamp
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v1, v1, v3 clamp
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_usubsat_v3i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v0, v0, v3 clamp
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v1, v1, v4 clamp
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v2, v2, v5 clamp
; GFX10PLUS-LABEL: v_usubsat_v4i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v0, v0, v4 clamp
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v1, v1, v5 clamp
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v2, v2, v6 clamp
; GFX10PLUS-LABEL: v_usubsat_v8i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v0, v0, v8 clamp
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v1, v1, v9 clamp
; GFX10PLUS-NEXT: v_sub_nc_u32_e64 v2, v2, v10 clamp
; GFX10-LABEL: v_usubsat_v16i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX10-NEXT: v_sub_nc_u32_e64 v0, v0, v16 clamp
; GFX10-NEXT: v_sub_nc_u32_e64 v1, v1, v17 clamp
; GFX11-LABEL: v_usubsat_v16i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: v_sub_nc_u32_e64 v0, v0, v16 clamp
; GFX11-NEXT: v_sub_nc_u32_e64 v1, v1, v17 clamp
; GFX10PLUS-LABEL: v_usubsat_i64:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2
; GFX10PLUS-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo
; GFX10PLUS-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[2:3], v[0:1]
; SDAG-GFX11-LABEL: basic_smax_smin:
; SDAG-GFX11: ; %bb.0:
; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
; SDAG-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-LABEL: basic_smax_smin:
; GISEL-GFX11: ; %bb.0:
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
; GISEL-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; SDAG-GFX11-LABEL: basic_smin_smax:
; SDAG-GFX11: ; %bb.0:
; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
; SDAG-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-LABEL: basic_smin_smax:
; GISEL-GFX11: ; %bb.0:
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
; GISEL-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; SDAG-GFX11-LABEL: basic_smin_smax_combined:
; SDAG-GFX11: ; %bb.0:
; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; SDAG-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
; SDAG-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-LABEL: basic_smin_smax_combined:
; GISEL-GFX11: ; %bb.0:
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GISEL-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
; GISEL-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-LABEL: vec_smax_smin:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_pk_max_i16 v0, v0, 0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
; GFX11-LABEL: vec_smin_smax:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pk_max_i16 v0, v0, 0
; GFX10-LABEL: shuffle_v4f16_23uu:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off offset:4
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: shuffle_v4f16_23uu:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: shuffle_v4f16_234u:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v6, v[0:1], off offset:4
; GFX10-NEXT: global_load_dwordx2 v[4:5], v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX11-LABEL: shuffle_v4f16_234u:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4
; GFX11-NEXT: global_load_b64 v[1:2], v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: shuffle_v4f16_u1u3:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: shuffle_v4f16_u1u3:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: shuffle_v4f16_u3u1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[1:2], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, v2
; GFX11-LABEL: shuffle_v4f16_u3u1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[1:2], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, v2
; GFX10-LABEL: shuffle_v4f16_u3uu:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off offset:4
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: shuffle_v4f16_u3uu:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: shuffle_v4f16_3u6u:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v5, v[0:1], off offset:4
; GFX10-NEXT: global_load_dword v4, v[2:3], off offset:4
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX11-LABEL: shuffle_v4f16_3u6u:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4
; GFX11-NEXT: global_load_b32 v1, v[2:3], off offset:4
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX10-LABEL: shuffle_v4f16_3uu7:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v5, v[0:1], off offset:4
; GFX10-NEXT: global_load_dword v4, v[2:3], off offset:4
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX11-LABEL: shuffle_v4f16_3uu7:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4
; GFX11-NEXT: global_load_b32 v1, v[2:3], off offset:4
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX10-LABEL: shuffle_v4f16_35u5:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v5, v[0:1], off offset:4
; GFX10-NEXT: global_load_dword v4, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX11-LABEL: shuffle_v4f16_35u5:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4
; GFX11-NEXT: global_load_b32 v1, v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: shuffle_v4f16_357u:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[4:5], v[2:3], off
; GFX10-NEXT: global_load_dword v6, v[0:1], off offset:4
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX11-LABEL: shuffle_v4f16_357u:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[2:3], v[2:3], off
; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX10-LABEL: shuffle_v4f16_0101:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v1, v0
; GFX11-LABEL: shuffle_v4f16_0101:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v1, v0
; GFX10-LABEL: shuffle_v4f16_0123:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: shuffle_v4f16_0123:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: shuffle_v4f16_0145:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v4, v[0:1], off
; GFX10-NEXT: global_load_dword v5, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX11-LABEL: shuffle_v4f16_0145:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: global_load_b32 v1, v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: shuffle_v4f16_0167:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v4, v[0:1], off
; GFX10-NEXT: global_load_dword v5, v[2:3], off offset:4
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX11-LABEL: shuffle_v4f16_0167:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: global_load_b32 v1, v[2:3], off offset:4
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: shuffle_v4f16_2301:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[1:2], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, v2
; GFX11-LABEL: shuffle_v4f16_2301:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[1:2], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, v2
; GFX10-LABEL: shuffle_v4f16_2323:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off offset:4
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v1, v0
; GFX11-LABEL: shuffle_v4f16_2323:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v1, v0
; GFX10-LABEL: shuffle_v4f16_2345:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v4, v[0:1], off offset:4
; GFX10-NEXT: global_load_dword v5, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX11-LABEL: shuffle_v4f16_2345:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4
; GFX11-NEXT: global_load_b32 v1, v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: shuffle_v4f16_2367:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v4, v[0:1], off offset:4
; GFX10-NEXT: global_load_dword v5, v[2:3], off offset:4
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX11-LABEL: shuffle_v4f16_2367:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4
; GFX11-NEXT: global_load_b32 v1, v[2:3], off offset:4
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: shuffle_v4f16_4501:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v4, v[2:3], off
; GFX10-NEXT: global_load_dword v5, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX11-LABEL: shuffle_v4f16_4501:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v2, v[2:3], off
; GFX11-NEXT: global_load_b32 v1, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX10-LABEL: shuffle_v4f16_4523:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v4, v[2:3], off
; GFX10-NEXT: global_load_dword v5, v[0:1], off offset:4
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX11-LABEL: shuffle_v4f16_4523:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v2, v[2:3], off
; GFX11-NEXT: global_load_b32 v1, v[0:1], off offset:4
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX10-LABEL: shuffle_v4f16_4545:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v1, v0
; GFX11-LABEL: shuffle_v4f16_4545:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v1, v0
; GFX10-LABEL: shuffle_v4f16_4567:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: shuffle_v4f16_4567:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: shuffle_v4f16_6701:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v4, v[2:3], off offset:4
; GFX10-NEXT: global_load_dword v5, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX11-LABEL: shuffle_v4f16_6701:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v2, v[2:3], off offset:4
; GFX11-NEXT: global_load_b32 v1, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX10-LABEL: shuffle_v4f16_6723:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v4, v[2:3], off offset:4
; GFX10-NEXT: global_load_dword v5, v[0:1], off offset:4
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX11-LABEL: shuffle_v4f16_6723:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v2, v[2:3], off offset:4
; GFX11-NEXT: global_load_b32 v1, v[0:1], off offset:4
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX10-LABEL: shuffle_v4f16_6745:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[1:2], v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, v2
; GFX11-LABEL: shuffle_v4f16_6745:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[1:2], v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, v2
; GFX10-LABEL: shuffle_v4f16_6767:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[2:3], off offset:4
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v1, v0
; GFX11-LABEL: shuffle_v4f16_6767:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[2:3], off offset:4
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v1, v0
; GFX10-LABEL: shuffle_v4f16_2356:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[5:6], v[2:3], off
; GFX10-NEXT: global_load_dword v4, v[0:1], off offset:4
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX11-LABEL: shuffle_v4f16_2356:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[2:3], v[2:3], off
; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX10-LABEL: shuffle_v4f16_5623:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[5:6], v[2:3], off
; GFX10-NEXT: global_load_dword v4, v[0:1], off offset:4
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX11-LABEL: shuffle_v4f16_5623:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[2:3], v[2:3], off
; GFX11-NEXT: global_load_b32 v1, v[0:1], off offset:4
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX10-LABEL: shuffle_v4f16_3456:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[4:5], v[2:3], off
; GFX10-NEXT: global_load_dword v6, v[0:1], off offset:4
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX11-LABEL: shuffle_v4f16_3456:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[2:3], v[2:3], off
; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX10-LABEL: shuffle_v4f16_5634:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[4:5], v[2:3], off
; GFX10-NEXT: global_load_dword v6, v[0:1], off offset:4
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX11-LABEL: shuffle_v4f16_5634:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[2:3], v[2:3], off
; GFX11-NEXT: global_load_b32 v1, v[0:1], off offset:4
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX10-LABEL: shuffle_v4f16_5734:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[4:5], v[2:3], off
; GFX10-NEXT: global_load_dword v6, v[0:1], off offset:4
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX11-LABEL: shuffle_v4f16_5734:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[2:3], v[2:3], off
; GFX11-NEXT: global_load_b32 v1, v[0:1], off offset:4
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX10-LABEL: shuffle_v4i16_2356:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[5:6], v[2:3], off
; GFX10-NEXT: global_load_dword v4, v[0:1], off offset:4
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX11-LABEL: shuffle_v4i16_2356:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[2:3], v[2:3], off
; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX10-LABEL: shuffle_v4i16_0167:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v4, v[0:1], off
; GFX10-NEXT: global_load_dword v5, v[2:3], off offset:4
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX11-LABEL: shuffle_v4i16_0167:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: global_load_b32 v1, v[2:3], off offset:4
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: shuffle_v4f16_0000:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x5040100
; GFX11-LABEL: shuffle_v4f16_0000:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_perm_b32 v0, v0, v0, 0x5040100
; GFX10-LABEL: shuffle_v4f16_1010:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_alignbit_b32 v0, v0, v0, 16
; GFX11-LABEL: shuffle_v4f16_1010:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_alignbit_b32 v0, v0, v0, 16
; GFX10-LABEL: shuffle_v4f16_1100:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[1:2], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_perm_b32 v0, v1, v1, 0x7060302
; GFX11-LABEL: shuffle_v4f16_1100:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[1:2], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_perm_b32 v0, v1, v1, 0x7060302
; GFX10-LABEL: shuffle_v4f16_6161:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v4, v[0:1], off
; GFX10-NEXT: global_load_dword v5, v[2:3], off offset:4
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX11-LABEL: shuffle_v4f16_6161:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: global_load_b32 v1, v[2:3], off offset:4
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: shuffle_v4f16_2333:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off offset:4
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_perm_b32 v1, v0, v0, 0x7060302
; GFX11-LABEL: shuffle_v4f16_2333:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_perm_b32 v1, v0, v0, 0x7060302
; GFX10-LABEL: shuffle_v4f16_6667:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off offset:4
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_perm_b32 v1, v0, v0, 0x7060302
; GFX11-LABEL: shuffle_v4f16_6667:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_perm_b32 v1, v0, v0, 0x7060302
; GFX10-LABEL: shuffle_v8f16_0101:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v1, v0
; GFX11-LABEL: shuffle_v8f16_0101:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v1, v0
; GFX10-LABEL: shuffle_v8f16_0123:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-LABEL: shuffle_v8f16_0123:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: shuffle_v8f16_4589:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v4, v[0:1], off offset:8
; GFX10-NEXT: global_load_dword v5, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX11-LABEL: shuffle_v8f16_4589:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:8
; GFX11-NEXT: global_load_b32 v1, v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: shuffle_v8f16_10_11_2_3:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v4, v[2:3], off offset:4
; GFX10-NEXT: global_load_dword v5, v[0:1], off offset:4
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX11-LABEL: shuffle_v8f16_10_11_2_3:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v2, v[2:3], off offset:4
; GFX11-NEXT: global_load_b32 v1, v[0:1], off offset:4
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX10-LABEL: shuffle_v8f16_13_14_2_3:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[5:6], v[2:3], off offset:8
; GFX10-NEXT: global_load_dword v4, v[0:1], off offset:4
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX11-LABEL: shuffle_v8f16_13_14_2_3:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[2:3], v[2:3], off offset:8
; GFX11-NEXT: global_load_b32 v1, v[0:1], off offset:4
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX10-LABEL: shuffle_v3f16_0122:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_perm_b32 v1, v1, v1, 0x5040100
; GFX11-LABEL: shuffle_v3f16_0122:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_perm_b32 v1, v1, v1, 0x5040100
; GFX10-LABEL: shuffle_v2f16_0122:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_alignbit_b32 v1, v0, v0, 16
; GFX11-LABEL: shuffle_v2f16_0122:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_alignbit_b32 v1, v0, v0, 16
; GFX10-LABEL: shuffle_v6f16_452367:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v6, v1
; GFX10-NEXT: v_mov_b32_e32 v5, v0
; GFX10-NEXT: v_mov_b32_e32 v4, v3
; GFX11-LABEL: shuffle_v6f16_452367:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_dual_mov_b32 v4, v3 :: v_dual_mov_b32 v3, v2
; GFX11-NEXT: global_load_b96 v[0:2], v[0:1], off
; GFX11-NEXT: global_load_b32 v3, v[3:4], off
; GFX10-LABEL: shuffle_v4f16_0456:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[4:5], v[0:1], off
; GFX10-NEXT: global_load_dwordx2 v[5:6], v[2:3], off
; GFX10-NEXT: ; kill: killed $vgpr0 killed $vgpr1
; GFX11-LABEL: shuffle_v4f16_0456:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: global_load_b64 v[1:2], v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: low16bits:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v4, v[0:1], off
; GFX10-NEXT: global_load_dword v5, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX11-LABEL: low16bits:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: global_load_b32 v1, v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: hi16bits:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v4, v[0:1], off
; GFX10-NEXT: global_load_dword v5, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX11-LABEL: hi16bits:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: global_load_b32 v1, v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: low16hi16bits:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v4, v[0:1], off
; GFX10-NEXT: global_load_dword v5, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX11-LABEL: low16hi16bits:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: global_load_b32 v1, v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: hi16low16bits:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v4, v[0:1], off
; GFX10-NEXT: global_load_dword v5, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX11-LABEL: hi16low16bits:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: global_load_b32 v1, v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: i16_low16bits:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v4, v[0:1], off
; GFX10-NEXT: global_load_dword v5, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX11-LABEL: i16_low16bits:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: global_load_b32 v1, v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: i16_low16hi16bits:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v4, v[0:1], off
; GFX10-NEXT: global_load_dword v5, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX11-LABEL: i16_low16hi16bits:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: global_load_b32 v1, v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: i16_hi16low16bits:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v4, v[0:1], off
; GFX10-NEXT: global_load_dword v5, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX11-LABEL: i16_hi16low16bits:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: global_load_b32 v1, v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: i16_hi16bits:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v4, v[0:1], off
; GFX10-NEXT: global_load_dword v5, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX11-LABEL: i16_hi16bits:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: global_load_b32 v1, v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX10-LABEL: v2i16_hi16bits:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GFX11-LABEL: v2i16_hi16bits:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GFX10-LABEL: v2half_hi16bits:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GFX11-LABEL: v2half_hi16bits:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GFX10-LABEL: shuffle_v8f16_concat:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[6:7], v[0:1], off
; GFX10-NEXT: global_load_dwordx2 v[8:9], v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dwordx4 v[4:5], v[6:9], off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: shuffle_v8f16_concat:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: global_load_b64 v[2:3], v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, ptr addrspace(1) %arg0
%val1 = load <4 x half>, ptr addrspace(1) %arg1
; GFX10-LABEL: shuffle_v16f16_concat:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[6:9], v[2:3], off
; GFX10-NEXT: global_load_dwordx4 v[10:13], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX10-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dwordx4 v[4:5], v[10:13], off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: shuffle_v16f16_concat:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[6:9], v[2:3], off
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX11-NEXT: global_store_b128 v[4:5], v[6:9], off offset:16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <8 x half>, ptr addrspace(1) %arg0
%val1 = load <8 x half>, ptr addrspace(1) %arg1
; GFX10-LABEL: shuffle_v32f16_concat:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: global_load_dwordx4 v[6:9], v[2:3], off
; GFX10-NEXT: global_load_dwordx4 v[10:13], v[2:3], off offset:16
; GFX10-NEXT: global_store_dwordx4 v[4:5], v[14:17], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dwordx4 v[4:5], v[18:21], off offset:16
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: shuffle_v32f16_concat:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_load_b128 v[6:9], v[2:3], off
; GFX11-NEXT: global_load_b128 v[10:13], v[2:3], off offset:16
; GFX11-NEXT: global_store_b128 v[4:5], v[14:17], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off offset:16
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <16 x half>, ptr addrspace(1) %arg0
%val1 = load <16 x half>, ptr addrspace(1) %arg1
; GFX10-LABEL: shuffle_v8i16_concat:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[6:7], v[0:1], off
; GFX10-NEXT: global_load_dwordx2 v[8:9], v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dwordx4 v[4:5], v[6:9], off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: shuffle_v8i16_concat:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: global_load_b64 v[2:3], v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x i16>, ptr addrspace(1) %arg0
%val1 = load <4 x i16>, ptr addrspace(1) %arg1
; GFX10-LABEL: shuffle_v16i16_concat:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[6:9], v[2:3], off
; GFX10-NEXT: global_load_dwordx4 v[10:13], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX10-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dwordx4 v[4:5], v[10:13], off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: shuffle_v16i16_concat:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[6:9], v[2:3], off
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX11-NEXT: global_store_b128 v[4:5], v[6:9], off offset:16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <8 x i16>, ptr addrspace(1) %arg0
%val1 = load <8 x i16>, ptr addrspace(1) %arg1
; GFX10-LABEL: shuffle_v32i16_concat:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: global_load_dwordx4 v[6:9], v[2:3], off
; GFX10-NEXT: global_load_dwordx4 v[10:13], v[2:3], off offset:16
; GFX10-NEXT: global_store_dwordx4 v[4:5], v[14:17], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dwordx4 v[4:5], v[18:21], off offset:16
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: shuffle_v32i16_concat:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_load_b128 v[6:9], v[2:3], off
; GFX11-NEXT: global_load_b128 v[10:13], v[2:3], off offset:16
; GFX11-NEXT: global_store_b128 v[4:5], v[14:17], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off offset:16
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <16 x i16>, ptr addrspace(1) %arg0
%val1 = load <16 x i16>, ptr addrspace(1) %arg1
; GFX10-LABEL: shuffle_v4i8_concat:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_ushort v0, v[0:1], off
; GFX10-NEXT: global_load_short_d16_hi v0, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v[4:5], v0, off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: shuffle_v4i8_concat:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_u16 v0, v[0:1], off
; GFX11-NEXT: global_load_d16_hi_b16 v0, v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v[4:5], v0, off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <2 x i8>, ptr addrspace(1) %arg0
%val1 = load <2 x i8>, ptr addrspace(1) %arg1
; GFX10-LABEL: shuffle_v8i8_concat:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dword v6, v[0:1], off
; GFX10-NEXT: global_load_dword v7, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dwordx2 v[4:5], v[6:7], off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: shuffle_v8i8_concat:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b32 v0, v[0:1], off
; GFX11-NEXT: global_load_b32 v1, v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b64 v[4:5], v[0:1], off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x i8>, ptr addrspace(1) %arg0
%val1 = load <4 x i8>, ptr addrspace(1) %arg1
; GFX10-LABEL: shuffle_v16i8_concat:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[6:7], v[0:1], off
; GFX10-NEXT: global_load_dwordx2 v[8:9], v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dwordx4 v[4:5], v[6:9], off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: shuffle_v16i8_concat:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: global_load_b64 v[2:3], v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <8 x i8>, ptr addrspace(1) %arg0
%val1 = load <8 x i8>, ptr addrspace(1) %arg1
; GFX10-LABEL: shuffle_v32i8_concat:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[6:9], v[2:3], off
; GFX10-NEXT: global_load_dwordx4 v[10:13], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX10-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dwordx4 v[4:5], v[10:13], off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: shuffle_v32i8_concat:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[6:9], v[2:3], off
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX11-NEXT: global_store_b128 v[4:5], v[6:9], off offset:16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <16 x i8>, ptr addrspace(1) %arg0
%val1 = load <16 x i8>, ptr addrspace(1) %arg1
; GFX10-LABEL: shuffle_v4i32_concat:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx2 v[6:7], v[0:1], off
; GFX10-NEXT: global_load_dwordx2 v[8:9], v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dwordx4 v[4:5], v[6:9], off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: shuffle_v4i32_concat:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
; GFX11-NEXT: global_load_b64 v[2:3], v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <2 x i32>, ptr addrspace(1) %arg0
%val1 = load <2 x i32>, ptr addrspace(1) %arg1
; GFX10-LABEL: shuffle_v8i32_concat:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_load_dwordx4 v[6:9], v[2:3], off
; GFX10-NEXT: global_load_dwordx4 v[10:13], v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX10-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dwordx4 v[4:5], v[10:13], off
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: shuffle_v8i32_concat:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_load_b128 v[6:9], v[2:3], off
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX11-NEXT: global_store_b128 v[4:5], v[6:9], off offset:16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x i32>, ptr addrspace(1) %arg0
%val1 = load <4 x i32>, ptr addrspace(1) %arg1
; GFX10-LABEL: shuffle_v16i32_concat:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: global_load_dwordx4 v[6:9], v[2:3], off
; GFX10-NEXT: global_load_dwordx4 v[10:13], v[2:3], off offset:16
; GFX10-NEXT: global_store_dwordx4 v[4:5], v[14:17], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dwordx4 v[4:5], v[18:21], off offset:16
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: shuffle_v16i32_concat:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_load_b128 v[6:9], v[2:3], off
; GFX11-NEXT: global_load_b128 v[10:13], v[2:3], off offset:16
; GFX11-NEXT: global_store_b128 v[4:5], v[14:17], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off offset:16
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <8 x i32>, ptr addrspace(1) %arg0
%val1 = load <8 x i32>, ptr addrspace(1) %arg1
; GCN-LABEL: vgpr_descriptor_waterfall_loop_idom_update:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: v_add_co_u32 v6, vcc_lo, v0, 8
; GCN-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v1, vcc_lo
; GCN-NEXT: .LBB0_1: ; %bb0
; GCN-NEXT: s_mov_b32 vcc_lo, exec_lo
; GCN-NEXT: s_cbranch_vccnz .LBB0_1
; GCN-NEXT: ; %bb.4: ; %DummyReturnBlock
-; GCN-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: vgpr_descriptor_waterfall_loop_idom_update:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: .p2align 6
; GFX11-NEXT: .LBB0_1: ; %bb0
; GFX11-NEXT: ; =>This Loop Header: Depth=1
; GFX11-NEXT: s_mov_b32 vcc_lo, exec_lo
; GFX11-NEXT: s_cbranch_vccnz .LBB0_1
; GFX11-NEXT: ; %bb.4: ; %DummyReturnBlock
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
br label %bb0
; GFX10-LABEL: non_preserved_vgpr_tuple8:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s4, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s5, -1
; GFX11-LABEL: non_preserved_vgpr_tuple8:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX10-LABEL: call_preserved_vgpr_tuple8:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s4, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s5, -1
; GFX11-LABEL: call_preserved_vgpr_tuple8:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
bb.0:
; GCN-LABEL: name: waitcnt-check-inorder
; GCN: S_WAITCNT 0
- ; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
; GCN-NEXT: S_ENDPGM 0
bb.0:
; GCN-LABEL: name: waitcnt-check-vs-vmem
; GCN: S_WAITCNT 0
- ; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
; GCN-NEXT: S_WAITCNT 16240
; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
bb.0:
; GCN-LABEL: name: waitcnt-check-vs-mimg-samp
; GCN: S_WAITCNT 0
- ; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
; GCN-NEXT: S_WAITCNT 16240
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr20_vgpr21, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
bb.0:
; GCN-LABEL: name: waitcnt-check-vs-vmem-reverse
; GCN: S_WAITCNT 0
- ; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
; GCN-NEXT: S_WAITCNT 16240
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
bb.0:
; GCN-LABEL: name: waitcnt-check-vs-mimg-samp-reverse
; GCN: S_WAITCNT 0
- ; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr16_vgpr17, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
; GCN-NEXT: S_WAITCNT 16240
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
; GFX10: liveins: $vgpr99
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: S_WAITCNT 0
- ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GFX10-NEXT: $vgpr0_vgpr1 = DS_READ2_B32_gfx9 renamable $vgpr99, 0, 1, 0, implicit $exec
; GFX10-NEXT: $vgpr2_vgpr3 = DS_READ2_B32_gfx9 renamable $vgpr99, 2, 3, 0, implicit $exec
; GFX10-NEXT: $vgpr4_vgpr5 = DS_READ2_B32_gfx9 renamable $vgpr99, 4, 5, 0, implicit $exec
; GFX11: liveins: $vgpr99
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: S_WAITCNT 0
- ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GFX11-NEXT: $vgpr0_vgpr1 = DS_READ2_B32_gfx9 renamable $vgpr99, 0, 1, 0, implicit $exec
; GFX11-NEXT: $vgpr2_vgpr3 = DS_READ2_B32_gfx9 renamable $vgpr99, 2, 3, 0, implicit $exec
; GFX11-NEXT: $vgpr4_vgpr5 = DS_READ2_B32_gfx9 renamable $vgpr99, 4, 5, 0, implicit $exec
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: S_WAITCNT 0
- ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
; GFX10-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec
; GFX10-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, implicit $exec
; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: S_WAITCNT 0
- ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GFX11-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
; GFX11-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec
; GFX11-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, implicit $exec
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr0, $vgpr1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: S_WAITCNT 0
- ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GFX10-NEXT: EXP 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
; GFX10-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
; GFX10-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: S_WAITCNT 0
- ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GFX11-NEXT: EXP 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
; GFX11-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
; GFX11-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: S_WAITCNT 0
- ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GFX10-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GFX10-NEXT: S_BARRIER
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: S_WAITCNT 0
- ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GFX11-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GFX11-NEXT: S_BARRIER
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: S_WAITCNT 0
- ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GFX10-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 1
; GFX10-NEXT: S_BARRIER
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: S_WAITCNT 0
- ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GFX11-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GFX11-NEXT: S_BARRIER
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: S_WAITCNT 0
- ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GFX10-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
; GFX10-NEXT: S_WAITCNT 112
; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: S_WAITCNT 0
- ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GFX11-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
; GFX11-NEXT: S_WAITCNT 112
; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: S_WAITCNT 0
- ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GFX10-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GFX10-NEXT: S_BARRIER
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: S_WAITCNT 0
- ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GFX11-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GFX11-NEXT: S_BARRIER
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: S_WAITCNT 0
- ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GFX10-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
; GFX10-NEXT: S_WAITCNT 0
; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 1
; GFX11: liveins: $vgpr0_vgpr1, $vgpr2
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: S_WAITCNT 0
- ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GFX11-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
; GFX11-NEXT: S_WAITCNT 0
; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
; GCN-LABEL: store_vscnt_private:
; GCN: {{buffer|scratch}}_store_{{dword|b32}}
; GFX8_9: s_waitcnt vmcnt(0)
-; GFX10PLUS: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_setpc_b64
define void @store_vscnt_private(ptr addrspace(5) %p) {
store i32 0, ptr addrspace(5) %p
; GFX8: flat_store_dword
; GFX9PLUS: global_store_{{dword|b32}}
; GFX8_9: s_waitcnt vmcnt(0)
-; GFX10PLUS: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_setpc_b64
define void @store_vscnt_global(ptr addrspace(1) %p) {
store i32 0, ptr addrspace(1) %p
; GCN: flat_store_{{dword|b32}}
; GFX8_9: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10PLUS: s_waitcnt lgkmcnt(0){{$}}
-; GFX10PLUS: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_setpc_b64
define void @store_vscnt_flat(ptr %p) {
store i32 0, ptr %p
; GCN-LABEL: function_prologue:
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0){{$}}
-; GFX10PLUS: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_setpc_b64
define void @function_prologue() {
ret void
; GFX1032-LABEL: test_mad_i64_i32:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1032-NEXT: v_mad_i64_i32 v[0:1], s4, v0, v1, v[2:3]
; GFX1032-NEXT: s_setpc_b64 s[30:31]
;
; GFX1064-LABEL: test_mad_i64_i32:
; GFX1064: ; %bb.0:
; GFX1064-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1064-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3]
; GFX1064-NEXT: s_setpc_b64 s[30:31]
%sext0 = sext i32 %arg0 to i64
; GFX1032-LABEL: test_mad_u64_u32:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1032-NEXT: v_mad_u64_u32 v[0:1], s4, v0, v1, v[2:3]
; GFX1032-NEXT: s_setpc_b64 s[30:31]
;
; GFX1064-LABEL: test_mad_u64_u32:
; GFX1064: ; %bb.0:
; GFX1064-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1064-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[2:3]
; GFX1064-NEXT: s_setpc_b64 s[30:31]
%sext0 = zext i32 %arg0 to i64
; GFX1032-LABEL: callee_no_stack_with_call:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1032-NEXT: s_mov_b32 s16, s33
; GFX1032-NEXT: s_mov_b32 s33, s32
; GFX1032-NEXT: s_or_saveexec_b32 s17, -1
; GFX1064-LABEL: callee_no_stack_with_call:
; GFX1064: ; %bb.0:
; GFX1064-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1064-NEXT: s_mov_b32 s16, s33
; GFX1064-NEXT: s_mov_b32 s33, s32
; GFX1064-NEXT: s_or_saveexec_b64 s[18:19], -1
; GFX10-W32-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
; GFX10-W32-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
-; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-W32-NEXT: ; return to shader part epilog
main_body:
%tex = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %c, <8 x i32> %rsrc, i32 0, i32 0)
; GFX10-W32-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
; GFX10-W32-NEXT: buffer_store_dwordx4 v[0:3], v0, s[0:3], 0 idxen
-; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-W32-NEXT: ; return to shader part epilog
main_body:
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
; GFX10-W32-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
; GFX10-W32-NEXT: buffer_store_dwordx4 v[0:3], v0, s[0:3], 0 idxen
-; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-W32-NEXT: ; return to shader part epilog
main_body:
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
; GFX10-W32-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
; GFX10-W32-NEXT: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 idxen
-; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-W32-NEXT: ; return to shader part epilog
main_body:
%c.1 = mul i32 %c, %d
; GFX10-W32-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
; GFX10-W32-NEXT: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 idxen
-; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-W32-NEXT: ; return to shader part epilog
main_body:
%c.1 = mul i32 %c, %d
; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v0
; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec
; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2
-; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-W32-NEXT: ; return to shader part epilog
main_body:
%src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0)
; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v0
; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec
; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2
-; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-W32-NEXT: ; return to shader part epilog
main_body:
%src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0)
; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
; GFX10-W32-NEXT: v_mov_b32_e32 v0, v2
-; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-W32-NEXT: ; return to shader part epilog
main_body:
%cmp = icmp eq i32 %z, 0
; GFX10-W32-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
; GFX10-W32-NEXT: v_mov_b32_e32 v0, v2
-; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-W32-NEXT: ; return to shader part epilog
main_body:
%cmp = icmp eq i32 %z, 0
; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-W32-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
-; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-W32-NEXT: ; return to shader part epilog
main_body:
%idx.1 = extractelement <3 x i32> %idx, i32 0
; GFX10-W32-NEXT: s_cbranch_execnz .LBB30_4
; GFX10-W32-NEXT: .LBB30_2: ; %END
; GFX10-W32-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-W32-NEXT: s_branch .LBB30_5
; GFX10-W32-NEXT: .LBB30_3: ; %ELSE
; GFX10-W32-NEXT: v_mul_f32_e32 v0, 4.0, v1
; GFX10-W32-NEXT: .LBB30_4: ; %IF
; GFX10-W32-NEXT: v_mul_f32_e32 v0, 0x40400000, v1
; GFX10-W32-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-W32-NEXT: s_branch .LBB30_5
; GFX10-W32-NEXT: .LBB30_5:
main_body:
; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
; GFX10-W32-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
-; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-W32-NEXT: ; return to shader part epilog
main_body:
%cond = icmp eq i32 %y, 0
; GFX10-W32-NEXT: v_add_f32_e32 v2, v9, v13
; GFX10-W32-NEXT: v_mov_b32_e32 v1, v4
; GFX10-W32-NEXT: v_mov_b32_e32 v3, v5
-; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-W32-NEXT: s_branch .LBB32_3
; GFX10-W32-NEXT: .LBB32_2:
; GFX10-W32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-W32-NEXT: ; %bb.1: ; %main_body
; GFX10-W32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
-; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-W32-NEXT: s_branch .LBB33_3
; GFX10-W32-NEXT: .LBB33_2:
; GFX10-W32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-W32-NEXT: v_mov_b32_e32 v1, v5
; GFX10-W32-NEXT: v_mov_b32_e32 v2, v6
; GFX10-W32-NEXT: v_mov_b32_e32 v3, v7
-; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-W32-NEXT: ; return to shader part epilog
entry:
call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %in, i32 15, i32 undef, <8 x i32> undef, i32 0, i32 0)
; GFX10-W32-NEXT: v_mov_b32_e32 v5, 1.0
; GFX10-W32-NEXT: buffer_store_dword v5, v4, s[0:3], 0 idxen
; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
-; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-W32-NEXT: ; return to shader part epilog
main_body:
%cc = icmp sgt i32 %sel, 0
; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v0
; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec
; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2
-; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-W32-NEXT: ; return to shader part epilog
main_body:
%src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0)
; GFX10-W32-NEXT: buffer_store_dword v0, v1, s[20:23], 0 idxen
; GFX10-W32-NEXT: buffer_load_dword v0, v1, s[20:23], 0 idxen
; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
-; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-W32-NEXT: ; return to shader part epilog
main_body:
call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %inp, ptr addrspace(8) %res, i32 %idx1, i32 0, i32 0, i32 0)
; GFX10-W32-NEXT: buffer_store_dword v0, v1, s[16:19], 0 idxen
; GFX10-W32-NEXT: buffer_load_dword v0, v1, s[16:19], 0 idxen
; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
-; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-W32-NEXT: ; return to shader part epilog
main_body:
call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %inp, ptr addrspace(8) %res, i32 %idx0, i32 0, i32 0, i32 0)
; GFX10-W32-NEXT: buffer_store_dword v0, v1, s[16:19], 0 idxen
; GFX10-W32-NEXT: buffer_load_dword v0, v1, s[16:19], 0 idxen
; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
-; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-W32-NEXT: ; return to shader part epilog
main_body:
call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %inp, ptr addrspace(8) %res, i32 %idx0, i32 0, i32 0, i32 0)