GIComplexOperandMatcher<s64, "selectSMRDBufferSgprImm">,
GIComplexPatternEquiv<SMRDBufferSgprImm>;
+def gi_vop3_mad_mix_mods :
+ GIComplexOperandMatcher<s64, "selectVOP3PMadMixMods">,
+ GIComplexPatternEquiv<VOP3PMadMixMods>;
+
// Separate load nodes are defined to glue m0 initialization in
// SelectionDAG. The GISel selector can just insert m0 initialization
// directly before selecting a glue-less load, so hide this
return true;
}
+bool AMDGPUInstructionSelector::selectG_FMA_FMAD(MachineInstr &I) const {
+ assert(I.getOpcode() == AMDGPU::G_FMA || I.getOpcode() == AMDGPU::G_FMAD);
+
+ // Try to manually select MAD_MIX/FMA_MIX.
+ Register Dst = I.getOperand(0).getReg();
+ LLT ResultTy = MRI->getType(Dst);
+ bool IsFMA = I.getOpcode() == AMDGPU::G_FMA;
+ if (ResultTy != LLT::scalar(32) ||
+ (IsFMA ? !Subtarget->hasFmaMixInsts() : !Subtarget->hasMadMixInsts()))
+ return false;
+
+ // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
+ // using the conversion from f16.
+ bool MatchedSrc0, MatchedSrc1, MatchedSrc2;
+ auto [Src0, Src0Mods] =
+ selectVOP3PMadMixModsImpl(I.getOperand(1), MatchedSrc0);
+ auto [Src1, Src1Mods] =
+ selectVOP3PMadMixModsImpl(I.getOperand(2), MatchedSrc1);
+ auto [Src2, Src2Mods] =
+ selectVOP3PMadMixModsImpl(I.getOperand(3), MatchedSrc2);
+
+#ifndef NDEBUG
+ const SIMachineFunctionInfo *MFI =
+ I.getMF()->getInfo<SIMachineFunctionInfo>();
+ AMDGPU::SIModeRegisterDefaults Mode = MFI->getMode();
+ assert((IsFMA || !Mode.allFP32Denormals()) &&
+ "fmad selected with denormals enabled");
+#endif
+
+ // TODO: We can select this with f32 denormals enabled if all the sources are
+ // converted from f16 (in which case fmad isn't legal).
+ if (!MatchedSrc0 && !MatchedSrc1 && !MatchedSrc2)
+ return false;
+
+ const unsigned OpC = IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32;
+ MachineInstr *MixInst =
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpC), Dst)
+ .addImm(Src0Mods)
+ .addReg(Src0)
+ .addImm(Src1Mods)
+ .addReg(Src1)
+ .addImm(Src2Mods)
+ .addReg(Src2)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0);
+
+ if (!constrainSelectedInstRegOperands(*MixInst, TII, TRI, RBI))
+ return false;
+
+ I.eraseFromParent();
+ return true;
+}
+
bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
MachineBasicBlock *BB = MI.getParent();
Register DstReg = MI.getOperand(0).getReg();
return selectG_FABS(I);
case TargetOpcode::G_EXTRACT:
return selectG_EXTRACT(I);
+ case TargetOpcode::G_FMA:
+ case TargetOpcode::G_FMAD:
+ if (selectG_FMA_FMAD(I))
+ return true;
+ return selectImpl(I, *CoverageInfo);
case TargetOpcode::G_MERGE_VALUES:
case TargetOpcode::G_CONCAT_VECTORS:
return selectG_MERGE_VALUES(I);
[=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedOffset); }}};
}
+// Variant of stripBitCast that returns the instruction instead of a
+// MachineOperand.
+static MachineInstr *stripBitCast(MachineInstr *MI, MachineRegisterInfo &MRI) {
+ if (MI->getOpcode() == AMDGPU::G_BITCAST)
+ return getDefIgnoringCopies(MI->getOperand(1).getReg(), MRI);
+ return MI;
+}
+
+// Figure out if this is really an extract of the high 16-bits of a dword,
+// returns nullptr if it isn't.
+static MachineInstr *isExtractHiElt(MachineInstr *Inst,
+ MachineRegisterInfo &MRI) {
+ Inst = stripBitCast(Inst, MRI);
+
+ if (Inst->getOpcode() != AMDGPU::G_TRUNC)
+ return nullptr;
+
+ MachineInstr *TruncOp =
+ getDefIgnoringCopies(Inst->getOperand(1).getReg(), MRI);
+ TruncOp = stripBitCast(TruncOp, MRI);
+
+ // G_LSHR x, (G_CONSTANT i32 16)
+ if (TruncOp->getOpcode() == AMDGPU::G_LSHR) {
+ auto SrlAmount = getIConstantVRegValWithLookThrough(
+ TruncOp->getOperand(2).getReg(), MRI);
+ if (SrlAmount && SrlAmount->Value.getZExtValue() == 16) {
+ MachineInstr *SrlOp =
+ getDefIgnoringCopies(TruncOp->getOperand(1).getReg(), MRI);
+ return stripBitCast(SrlOp, MRI);
+ }
+ }
+
+ // G_SHUFFLE_VECTOR x, y, shufflemask(1, 1|0)
+ // 1, 0 swaps the low/high 16 bits.
+ // 1, 1 sets the high 16 bits to be the same as the low 16.
+ // in any case, it selects the high elts.
+ if (TruncOp->getOpcode() == AMDGPU::G_SHUFFLE_VECTOR) {
+ assert(MRI.getType(TruncOp->getOperand(0).getReg()) ==
+ LLT::fixed_vector(2, 16));
+
+ ArrayRef<int> Mask = TruncOp->getOperand(3).getShuffleMask();
+ assert(Mask.size() == 2);
+
+ if (Mask[0] == 1 && Mask[1] <= 1) {
+ MachineInstr *LHS =
+ getDefIgnoringCopies(TruncOp->getOperand(1).getReg(), MRI);
+ return stripBitCast(LHS, MRI);
+ }
+ }
+
+ return nullptr;
+}
+
+std::pair<Register, unsigned>
+AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(MachineOperand &Root,
+ bool &Matched) const {
+ Matched = false;
+
+ Register Src;
+ unsigned Mods;
+ std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
+
+ MachineInstr *MI = getDefIgnoringCopies(Src, *MRI);
+ if (MI->getOpcode() == AMDGPU::G_FPEXT) {
+ MachineOperand *MO = &MI->getOperand(1);
+ Src = MO->getReg();
+ MI = getDefIgnoringCopies(Src, *MRI);
+
+ assert(MRI->getType(Src) == LLT::scalar(16));
+
+ // See through bitcasts.
+ // FIXME: Would be nice to use stripBitCast here.
+ if (MI->getOpcode() == AMDGPU::G_BITCAST) {
+ MO = &MI->getOperand(1);
+ Src = MO->getReg();
+ MI = getDefIgnoringCopies(Src, *MRI);
+ }
+
+ const auto CheckAbsNeg = [&]() {
+ // Be careful about folding modifiers if we already have an abs. fneg is
+ // applied last, so we don't want to apply an earlier fneg.
+ if ((Mods & SISrcMods::ABS) == 0) {
+ unsigned ModsTmp;
+ std::tie(Src, ModsTmp) = selectVOP3ModsImpl(*MO);
+ MI = getDefIgnoringCopies(Src, *MRI);
+
+ if ((ModsTmp & SISrcMods::NEG) != 0)
+ Mods ^= SISrcMods::NEG;
+
+ if ((ModsTmp & SISrcMods::ABS) != 0)
+ Mods |= SISrcMods::ABS;
+ }
+ };
+
+ CheckAbsNeg();
+
+ // op_sel/op_sel_hi decide the source type and source.
+ // If the source's op_sel_hi is set, it indicates to do a conversion from
+ // fp16. If the sources's op_sel is set, it picks the high half of the
+ // source register.
+
+ Mods |= SISrcMods::OP_SEL_1;
+
+ if (MachineInstr *ExtractHiEltMI = isExtractHiElt(MI, *MRI)) {
+ Mods |= SISrcMods::OP_SEL_0;
+ MI = ExtractHiEltMI;
+ MO = &MI->getOperand(0);
+ Src = MO->getReg();
+
+ CheckAbsNeg();
+ }
+
+ Matched = true;
+ }
+
+ return {Src, Mods};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PMadMixMods(MachineOperand &Root) const {
+ Register Src;
+ unsigned Mods;
+ bool Matched;
+ std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
+ }};
+}
+
void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
bool selectG_UADDO_USUBO_UADDE_USUBE(MachineInstr &I) const;
bool selectG_AMDGPU_MAD_64_32(MachineInstr &I) const;
bool selectG_EXTRACT(MachineInstr &I) const;
+ bool selectG_FMA_FMAD(MachineInstr &I) const;
bool selectG_MERGE_VALUES(MachineInstr &I) const;
bool selectG_UNMERGE_VALUES(MachineInstr &I) const;
bool selectG_BUILD_VECTOR(MachineInstr &I) const;
ComplexRendererFns selectSMRDBufferImm32(MachineOperand &Root) const;
ComplexRendererFns selectSMRDBufferSgprImm(MachineOperand &Root) const;
+ std::pair<Register, unsigned> selectVOP3PMadMixModsImpl(MachineOperand &Root,
+ bool &Matched) const;
+ ComplexRendererFns selectVOP3PMadMixMods(MachineOperand &Root) const;
+
void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx = -1) const;
$src1_modifiers, $src1,
$src2_modifiers, $src2,
DSTCLAMP.NONE,
- $elt0))
+ VGPR_32:$elt0))
>;
def : GCNPat <
$src1_modifiers, $src1,
$src2_modifiers, $src2,
DSTCLAMP.ENABLE,
- $elt0))
+ VGPR_32:$elt0))
>;
def : GCNPat <
define amdgpu_vs float @test_f16_f32_add_fma_ext_mul(float %x, float %y, float %z, half %u, half %v) {
; GFX9-DENORM-LABEL: test_f16_f32_add_fma_ext_mul:
; GFX9-DENORM: ; %bb.0: ; %.entry
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX9-DENORM-NEXT: v_mad_f32 v2, v3, v4, v2
+; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v3, v4, v2 op_sel_hi:[1,1,0]
; GFX9-DENORM-NEXT: v_mac_f32_e32 v2, v0, v1
; GFX9-DENORM-NEXT: v_mov_b32_e32 v0, v2
; GFX9-DENORM-NEXT: ; return to shader part epilog
; GFX10-LABEL: test_f16_f32_add_fma_ext_mul:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: v_mul_f16_e32 v3, v3, v4
-; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX10-NEXT: v_fmac_f32_e32 v3, v0, v1
-; GFX10-NEXT: v_add_f32_e32 v0, v3, v2
+; GFX10-NEXT: v_fma_mix_f32 v0, v0, v1, v3 op_sel_hi:[0,0,1]
+; GFX10-NEXT: v_add_f32_e32 v0, v0, v2
; GFX10-NEXT: ; return to shader part epilog
;
; GFX10-CONTRACT-LABEL: test_f16_f32_add_fma_ext_mul:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: v_mul_f16_e32 v3, v3, v4
-; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v3, v0, v1
-; GFX10-CONTRACT-NEXT: v_add_f32_e32 v0, v3, v2
+; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v0, v0, v1, v3 op_sel_hi:[0,0,1]
+; GFX10-CONTRACT-NEXT: v_add_f32_e32 v0, v0, v2
; GFX10-CONTRACT-NEXT: ; return to shader part epilog
;
; GFX10-DENORM-LABEL: test_f16_f32_add_fma_ext_mul:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: v_mul_f16_e32 v3, v3, v4
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX10-DENORM-NEXT: v_fmac_f32_e32 v3, v0, v1
-; GFX10-DENORM-NEXT: v_add_f32_e32 v0, v3, v2
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v0, v1, v3 op_sel_hi:[0,0,1]
+; GFX10-DENORM-NEXT: v_add_f32_e32 v0, v0, v2
; GFX10-DENORM-NEXT: ; return to shader part epilog
.entry:
%a = fmul half %u, %v
define amdgpu_vs float @test_f16_f32_add_ext_fma_mul(half %x, half %y, float %z, half %u, half %v) {
; GFX9-DENORM-LABEL: test_f16_f32_add_ext_fma_mul:
; GFX9-DENORM: ; %bb.0: ; %.entry
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v5, v0
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v3
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v3, v4
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v3, v2
-; GFX9-DENORM-NEXT: v_mac_f32_e32 v0, v5, v1
+; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v3, v4, v2 op_sel_hi:[1,1,0]
+; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; GFX9-DENORM-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: test_f16_f32_add_ext_fma_mul:
define amdgpu_vs float @test_f16_f32_add_fma_ext_mul_rhs(float %x, float %y, float %z, half %u, half %v) {
; GFX9-DENORM-LABEL: test_f16_f32_add_fma_ext_mul_rhs:
; GFX9-DENORM: ; %bb.0: ; %.entry
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX9-DENORM-NEXT: v_mac_f32_e32 v0, v3, v4
+; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v3, v4, v0 op_sel_hi:[1,1,0]
; GFX9-DENORM-NEXT: v_mac_f32_e32 v0, v1, v2
; GFX9-DENORM-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: test_f16_f32_add_fma_ext_mul_rhs:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: v_mul_f16_e32 v3, v3, v4
-; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX10-NEXT: v_fmac_f32_e32 v3, v1, v2
-; GFX10-NEXT: v_add_f32_e32 v0, v0, v3
+; GFX10-NEXT: v_fma_mix_f32 v1, v1, v2, v3 op_sel_hi:[0,0,1]
+; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
; GFX10-NEXT: ; return to shader part epilog
;
; GFX10-CONTRACT-LABEL: test_f16_f32_add_fma_ext_mul_rhs:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: v_mul_f16_e32 v3, v3, v4
-; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v3, v1, v2
-; GFX10-CONTRACT-NEXT: v_add_f32_e32 v0, v0, v3
+; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v1, v1, v2, v3 op_sel_hi:[0,0,1]
+; GFX10-CONTRACT-NEXT: v_add_f32_e32 v0, v0, v1
; GFX10-CONTRACT-NEXT: ; return to shader part epilog
;
; GFX10-DENORM-LABEL: test_f16_f32_add_fma_ext_mul_rhs:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: v_mul_f16_e32 v3, v3, v4
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX10-DENORM-NEXT: v_fmac_f32_e32 v3, v1, v2
-; GFX10-DENORM-NEXT: v_add_f32_e32 v0, v0, v3
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v1, v1, v2, v3 op_sel_hi:[0,0,1]
+; GFX10-DENORM-NEXT: v_add_f32_e32 v0, v0, v1
; GFX10-DENORM-NEXT: ; return to shader part epilog
.entry:
%a = fmul half %u, %v
define amdgpu_vs float @test_f16_f32_add_ext_fma_mul_rhs(float %x, half %y, half %z, half %u, half %v) {
; GFX9-DENORM-LABEL: test_f16_f32_add_ext_fma_mul_rhs:
; GFX9-DENORM: ; %bb.0: ; %.entry
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX9-DENORM-NEXT: v_mac_f32_e32 v0, v3, v4
-; GFX9-DENORM-NEXT: v_mac_f32_e32 v0, v1, v2
+; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v3, v4, v0 op_sel_hi:[1,1,0]
+; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v1, v2, v0 op_sel_hi:[1,1,0]
; GFX9-DENORM-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: test_f16_f32_add_ext_fma_mul_rhs:
; GFX9-DENORM: ; %bb.0: ; %.entry
; GFX9-DENORM-NEXT: v_pk_mul_f16 v12, v12, v14
; GFX9-DENORM-NEXT: v_pk_mul_f16 v13, v13, v15
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v14, v12
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_sdwa v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v15, v13
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_sdwa v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX9-DENORM-NEXT: v_mac_f32_e32 v14, v0, v4
-; GFX9-DENORM-NEXT: v_mac_f32_e32 v12, v1, v5
-; GFX9-DENORM-NEXT: v_mac_f32_e32 v15, v2, v6
-; GFX9-DENORM-NEXT: v_mac_f32_e32 v13, v3, v7
-; GFX9-DENORM-NEXT: v_add_f32_e32 v0, v14, v8
-; GFX9-DENORM-NEXT: v_add_f32_e32 v1, v12, v9
-; GFX9-DENORM-NEXT: v_add_f32_e32 v2, v15, v10
-; GFX9-DENORM-NEXT: v_add_f32_e32 v3, v13, v11
+; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v0, v4, v12 op_sel_hi:[0,0,1]
+; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, v1, v5, v12 op_sel:[0,0,1] op_sel_hi:[0,0,1]
+; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v2, v6, v13 op_sel_hi:[0,0,1]
+; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, v3, v7, v13 op_sel:[0,0,1] op_sel_hi:[0,0,1]
+; GFX9-DENORM-NEXT: v_add_f32_e32 v0, v0, v8
+; GFX9-DENORM-NEXT: v_add_f32_e32 v1, v1, v9
+; GFX9-DENORM-NEXT: v_add_f32_e32 v2, v2, v10
+; GFX9-DENORM-NEXT: v_add_f32_e32 v3, v3, v11
; GFX9-DENORM-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: test_v4f16_v4f32_add_fma_ext_mul:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: v_pk_mul_f16 v12, v12, v14
; GFX10-NEXT: v_pk_mul_f16 v13, v13, v15
-; GFX10-NEXT: v_cvt_f32_f16_e32 v14, v12
-; GFX10-NEXT: v_cvt_f32_f16_sdwa v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-NEXT: v_cvt_f32_f16_e32 v15, v13
-; GFX10-NEXT: v_cvt_f32_f16_sdwa v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-NEXT: v_fmac_f32_e32 v14, v0, v4
-; GFX10-NEXT: v_fmac_f32_e32 v12, v1, v5
-; GFX10-NEXT: v_fmac_f32_e32 v15, v2, v6
-; GFX10-NEXT: v_fmac_f32_e32 v13, v3, v7
-; GFX10-NEXT: v_add_f32_e32 v0, v14, v8
-; GFX10-NEXT: v_add_f32_e32 v1, v12, v9
-; GFX10-NEXT: v_add_f32_e32 v2, v15, v10
-; GFX10-NEXT: v_add_f32_e32 v3, v13, v11
+; GFX10-NEXT: v_fma_mix_f32 v0, v0, v4, v12 op_sel_hi:[0,0,1]
+; GFX10-NEXT: v_fma_mix_f32 v1, v1, v5, v12 op_sel:[0,0,1] op_sel_hi:[0,0,1]
+; GFX10-NEXT: v_fma_mix_f32 v2, v2, v6, v13 op_sel_hi:[0,0,1]
+; GFX10-NEXT: v_fma_mix_f32 v3, v3, v7, v13 op_sel:[0,0,1] op_sel_hi:[0,0,1]
+; GFX10-NEXT: v_add_f32_e32 v0, v0, v8
+; GFX10-NEXT: v_add_f32_e32 v1, v1, v9
+; GFX10-NEXT: v_add_f32_e32 v2, v2, v10
+; GFX10-NEXT: v_add_f32_e32 v3, v3, v11
; GFX10-NEXT: ; return to shader part epilog
;
; GFX10-CONTRACT-LABEL: test_v4f16_v4f32_add_fma_ext_mul:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: v_pk_mul_f16 v12, v12, v14
; GFX10-CONTRACT-NEXT: v_pk_mul_f16 v13, v13, v15
-; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_e32 v14, v12
-; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_sdwa v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_e32 v15, v13
-; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_sdwa v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v14, v0, v4
-; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v12, v1, v5
-; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v15, v2, v6
-; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v13, v3, v7
-; GFX10-CONTRACT-NEXT: v_add_f32_e32 v0, v14, v8
-; GFX10-CONTRACT-NEXT: v_add_f32_e32 v1, v12, v9
-; GFX10-CONTRACT-NEXT: v_add_f32_e32 v2, v15, v10
-; GFX10-CONTRACT-NEXT: v_add_f32_e32 v3, v13, v11
+; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v0, v0, v4, v12 op_sel_hi:[0,0,1]
+; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v1, v1, v5, v12 op_sel:[0,0,1] op_sel_hi:[0,0,1]
+; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v2, v2, v6, v13 op_sel_hi:[0,0,1]
+; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v3, v3, v7, v13 op_sel:[0,0,1] op_sel_hi:[0,0,1]
+; GFX10-CONTRACT-NEXT: v_add_f32_e32 v0, v0, v8
+; GFX10-CONTRACT-NEXT: v_add_f32_e32 v1, v1, v9
+; GFX10-CONTRACT-NEXT: v_add_f32_e32 v2, v2, v10
+; GFX10-CONTRACT-NEXT: v_add_f32_e32 v3, v3, v11
; GFX10-CONTRACT-NEXT: ; return to shader part epilog
;
; GFX10-DENORM-LABEL: test_v4f16_v4f32_add_fma_ext_mul:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: v_pk_mul_f16 v12, v12, v14
; GFX10-DENORM-NEXT: v_pk_mul_f16 v13, v13, v15
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v14, v12
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v15, v13
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_fmac_f32_e32 v14, v0, v4
-; GFX10-DENORM-NEXT: v_fmac_f32_e32 v12, v1, v5
-; GFX10-DENORM-NEXT: v_fmac_f32_e32 v15, v2, v6
-; GFX10-DENORM-NEXT: v_fmac_f32_e32 v13, v3, v7
-; GFX10-DENORM-NEXT: v_add_f32_e32 v0, v14, v8
-; GFX10-DENORM-NEXT: v_add_f32_e32 v1, v12, v9
-; GFX10-DENORM-NEXT: v_add_f32_e32 v2, v15, v10
-; GFX10-DENORM-NEXT: v_add_f32_e32 v3, v13, v11
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v0, v4, v12 op_sel_hi:[0,0,1]
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v1, v1, v5, v12 op_sel:[0,0,1] op_sel_hi:[0,0,1]
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, v2, v6, v13 op_sel_hi:[0,0,1]
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, v3, v7, v13 op_sel:[0,0,1] op_sel_hi:[0,0,1]
+; GFX10-DENORM-NEXT: v_add_f32_e32 v0, v0, v8
+; GFX10-DENORM-NEXT: v_add_f32_e32 v1, v1, v9
+; GFX10-DENORM-NEXT: v_add_f32_e32 v2, v2, v10
+; GFX10-DENORM-NEXT: v_add_f32_e32 v3, v3, v11
; GFX10-DENORM-NEXT: ; return to shader part epilog
.entry:
%a = fmul <4 x half> %u, %v
; GFX9-DENORM: ; %bb.0: ; %.entry
; GFX9-DENORM-NEXT: v_pk_mul_f16 v12, v12, v14
; GFX9-DENORM-NEXT: v_pk_mul_f16 v13, v13, v15
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v14, v12
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_sdwa v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v15, v13
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_sdwa v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX9-DENORM-NEXT: v_mac_f32_e32 v14, v4, v8
-; GFX9-DENORM-NEXT: v_mac_f32_e32 v12, v5, v9
-; GFX9-DENORM-NEXT: v_mac_f32_e32 v15, v6, v10
-; GFX9-DENORM-NEXT: v_mac_f32_e32 v13, v7, v11
-; GFX9-DENORM-NEXT: v_add_f32_e32 v0, v0, v14
-; GFX9-DENORM-NEXT: v_add_f32_e32 v1, v1, v12
-; GFX9-DENORM-NEXT: v_add_f32_e32 v2, v2, v15
-; GFX9-DENORM-NEXT: v_add_f32_e32 v3, v3, v13
+; GFX9-DENORM-NEXT: v_mad_mix_f32 v4, v4, v8, v12 op_sel_hi:[0,0,1]
+; GFX9-DENORM-NEXT: v_mad_mix_f32 v5, v5, v9, v12 op_sel:[0,0,1] op_sel_hi:[0,0,1]
+; GFX9-DENORM-NEXT: v_mad_mix_f32 v6, v6, v10, v13 op_sel_hi:[0,0,1]
+; GFX9-DENORM-NEXT: v_mad_mix_f32 v7, v7, v11, v13 op_sel:[0,0,1] op_sel_hi:[0,0,1]
+; GFX9-DENORM-NEXT: v_add_f32_e32 v0, v0, v4
+; GFX9-DENORM-NEXT: v_add_f32_e32 v1, v1, v5
+; GFX9-DENORM-NEXT: v_add_f32_e32 v2, v2, v6
+; GFX9-DENORM-NEXT: v_add_f32_e32 v3, v3, v7
; GFX9-DENORM-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: test_v4f16_v4f32_add_fma_ext_mul_rhs:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: v_pk_mul_f16 v12, v12, v14
; GFX10-NEXT: v_pk_mul_f16 v13, v13, v15
-; GFX10-NEXT: v_cvt_f32_f16_e32 v14, v12
-; GFX10-NEXT: v_cvt_f32_f16_sdwa v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-NEXT: v_cvt_f32_f16_e32 v15, v13
-; GFX10-NEXT: v_cvt_f32_f16_sdwa v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-NEXT: v_fmac_f32_e32 v14, v4, v8
-; GFX10-NEXT: v_fmac_f32_e32 v12, v5, v9
-; GFX10-NEXT: v_fmac_f32_e32 v15, v6, v10
-; GFX10-NEXT: v_fmac_f32_e32 v13, v7, v11
-; GFX10-NEXT: v_add_f32_e32 v0, v0, v14
-; GFX10-NEXT: v_add_f32_e32 v1, v1, v12
-; GFX10-NEXT: v_add_f32_e32 v2, v2, v15
-; GFX10-NEXT: v_add_f32_e32 v3, v3, v13
+; GFX10-NEXT: v_fma_mix_f32 v4, v4, v8, v12 op_sel_hi:[0,0,1]
+; GFX10-NEXT: v_fma_mix_f32 v5, v5, v9, v12 op_sel:[0,0,1] op_sel_hi:[0,0,1]
+; GFX10-NEXT: v_fma_mix_f32 v6, v6, v10, v13 op_sel_hi:[0,0,1]
+; GFX10-NEXT: v_fma_mix_f32 v7, v7, v11, v13 op_sel:[0,0,1] op_sel_hi:[0,0,1]
+; GFX10-NEXT: v_add_f32_e32 v0, v0, v4
+; GFX10-NEXT: v_add_f32_e32 v1, v1, v5
+; GFX10-NEXT: v_add_f32_e32 v2, v2, v6
+; GFX10-NEXT: v_add_f32_e32 v3, v3, v7
; GFX10-NEXT: ; return to shader part epilog
;
; GFX10-CONTRACT-LABEL: test_v4f16_v4f32_add_fma_ext_mul_rhs:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: v_pk_mul_f16 v12, v12, v14
; GFX10-CONTRACT-NEXT: v_pk_mul_f16 v13, v13, v15
-; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_e32 v14, v12
-; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_sdwa v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_e32 v15, v13
-; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_sdwa v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v14, v4, v8
-; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v12, v5, v9
-; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v15, v6, v10
-; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v13, v7, v11
-; GFX10-CONTRACT-NEXT: v_add_f32_e32 v0, v0, v14
-; GFX10-CONTRACT-NEXT: v_add_f32_e32 v1, v1, v12
-; GFX10-CONTRACT-NEXT: v_add_f32_e32 v2, v2, v15
-; GFX10-CONTRACT-NEXT: v_add_f32_e32 v3, v3, v13
+; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v4, v4, v8, v12 op_sel_hi:[0,0,1]
+; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v5, v5, v9, v12 op_sel:[0,0,1] op_sel_hi:[0,0,1]
+; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v6, v6, v10, v13 op_sel_hi:[0,0,1]
+; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v7, v7, v11, v13 op_sel:[0,0,1] op_sel_hi:[0,0,1]
+; GFX10-CONTRACT-NEXT: v_add_f32_e32 v0, v0, v4
+; GFX10-CONTRACT-NEXT: v_add_f32_e32 v1, v1, v5
+; GFX10-CONTRACT-NEXT: v_add_f32_e32 v2, v2, v6
+; GFX10-CONTRACT-NEXT: v_add_f32_e32 v3, v3, v7
; GFX10-CONTRACT-NEXT: ; return to shader part epilog
;
; GFX10-DENORM-LABEL: test_v4f16_v4f32_add_fma_ext_mul_rhs:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: v_pk_mul_f16 v12, v12, v14
; GFX10-DENORM-NEXT: v_pk_mul_f16 v13, v13, v15
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v14, v12
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v15, v13
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_fmac_f32_e32 v14, v4, v8
-; GFX10-DENORM-NEXT: v_fmac_f32_e32 v12, v5, v9
-; GFX10-DENORM-NEXT: v_fmac_f32_e32 v15, v6, v10
-; GFX10-DENORM-NEXT: v_fmac_f32_e32 v13, v7, v11
-; GFX10-DENORM-NEXT: v_add_f32_e32 v0, v0, v14
-; GFX10-DENORM-NEXT: v_add_f32_e32 v1, v1, v12
-; GFX10-DENORM-NEXT: v_add_f32_e32 v2, v2, v15
-; GFX10-DENORM-NEXT: v_add_f32_e32 v3, v3, v13
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v4, v4, v8, v12 op_sel_hi:[0,0,1]
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v5, v5, v9, v12 op_sel:[0,0,1] op_sel_hi:[0,0,1]
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v6, v6, v10, v13 op_sel_hi:[0,0,1]
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v7, v7, v11, v13 op_sel:[0,0,1] op_sel_hi:[0,0,1]
+; GFX10-DENORM-NEXT: v_add_f32_e32 v0, v0, v4
+; GFX10-DENORM-NEXT: v_add_f32_e32 v1, v1, v5
+; GFX10-DENORM-NEXT: v_add_f32_e32 v2, v2, v6
+; GFX10-DENORM-NEXT: v_add_f32_e32 v3, v3, v7
; GFX10-DENORM-NEXT: ; return to shader part epilog
.entry:
%a = fmul <4 x half> %u, %v
define amdgpu_vs float @test_f16_f32_add_ext_mul(half inreg %x, half inreg %y, float inreg %z) {
; GFX9-FAST-DENORM-LABEL: test_f16_f32_add_ext_mul:
; GFX9-FAST-DENORM: ; %bb.0: ; %.entry
-; GFX9-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v0, s0
-; GFX9-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v1, s1
-; GFX9-FAST-DENORM-NEXT: v_mad_f32 v0, v0, v1, s2
+; GFX9-FAST-DENORM-NEXT: v_mov_b32_e32 v0, s1
+; GFX9-FAST-DENORM-NEXT: v_mov_b32_e32 v1, s2
+; GFX9-FAST-DENORM-NEXT: v_mad_mix_f32 v0, s0, v0, v1 op_sel_hi:[1,1,0]
; GFX9-FAST-DENORM-NEXT: ; return to shader part epilog
;
; GFX10-FAST-DENORM-LABEL: test_f16_f32_add_ext_mul:
; GFX10-FAST-DENORM: ; %bb.0: ; %.entry
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v0, s0
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v1, s1
-; GFX10-FAST-DENORM-NEXT: v_fma_f32 v0, v0, v1, s2
+; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v0, s2
+; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v0, s0, s1, v0 op_sel_hi:[1,1,0]
; GFX10-FAST-DENORM-NEXT: ; return to shader part epilog
.entry:
%a = fmul fast half %x, %y
define amdgpu_vs float @test_f16_f32_add_ext_mul_rhs(half inreg %x, half inreg %y, float inreg %z) {
; GFX9-FAST-DENORM-LABEL: test_f16_f32_add_ext_mul_rhs:
; GFX9-FAST-DENORM: ; %bb.0: ; %.entry
-; GFX9-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v0, s0
-; GFX9-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v1, s1
-; GFX9-FAST-DENORM-NEXT: v_mad_f32 v0, v0, v1, s2
+; GFX9-FAST-DENORM-NEXT: v_mov_b32_e32 v0, s1
+; GFX9-FAST-DENORM-NEXT: v_mov_b32_e32 v1, s2
+; GFX9-FAST-DENORM-NEXT: v_mad_mix_f32 v0, s0, v0, v1 op_sel_hi:[1,1,0]
; GFX9-FAST-DENORM-NEXT: ; return to shader part epilog
;
; GFX10-FAST-DENORM-LABEL: test_f16_f32_add_ext_mul_rhs:
; GFX10-FAST-DENORM: ; %bb.0: ; %.entry
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v0, s0
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v1, s1
-; GFX10-FAST-DENORM-NEXT: v_fma_f32 v0, v0, v1, s2
+; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v0, s2
+; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v0, s0, s1, v0 op_sel_hi:[1,1,0]
; GFX10-FAST-DENORM-NEXT: ; return to shader part epilog
.entry:
%a = fmul fast half %x, %y
;
; GFX10-FAST-DENORM-LABEL: test_5xf16_5xf32_add_ext_mul:
; GFX10-FAST-DENORM: ; %bb.0: ; %.entry
-; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s11, s0, 16
-; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s12, s1, 16
-; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s13, s3, 16
-; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s14, s4, 16
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v0, s0
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v1, s11
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v2, s1
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v3, s12
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v4, s2
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v5, s3
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v6, s13
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v7, s4
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v8, s14
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v9, s5
-; GFX10-FAST-DENORM-NEXT: v_fma_f32 v0, v0, v5, s6
-; GFX10-FAST-DENORM-NEXT: v_fma_f32 v1, v1, v6, s7
-; GFX10-FAST-DENORM-NEXT: v_fma_f32 v2, v2, v7, s8
-; GFX10-FAST-DENORM-NEXT: v_fma_f32 v3, v3, v8, s9
-; GFX10-FAST-DENORM-NEXT: v_fma_f32 v4, v4, v9, s10
+; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v0, s6
+; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v1, s7
+; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v2, s8
+; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v3, s9
+; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v4, s10
+; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v0, s0, s3, v0 op_sel_hi:[1,1,0]
+; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v1, s0, s3, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v2, s1, s4, v2 op_sel_hi:[1,1,0]
+; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v3, s1, s4, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v4, s2, s5, v4 op_sel_hi:[1,1,0]
; GFX10-FAST-DENORM-NEXT: ; return to shader part epilog
.entry:
%a = fmul fast <5 x half> %x, %y
;
; GFX10-FAST-DENORM-LABEL: test_6xf16_6xf32_add_ext_mul_rhs:
; GFX10-FAST-DENORM: ; %bb.0: ; %.entry
-; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s12, s0, 16
-; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s13, s1, 16
-; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s14, s2, 16
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v0, s0
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v2, s1
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v4, s2
-; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s0, s3, 16
-; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s1, s4, 16
-; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s2, s5, 16
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v1, s12
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v3, s13
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v5, s14
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v6, s3
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v7, s0
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v8, s4
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v9, s1
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v10, s5
-; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v11, s2
-; GFX10-FAST-DENORM-NEXT: v_fma_f32 v0, v0, v6, s6
-; GFX10-FAST-DENORM-NEXT: v_fma_f32 v1, v1, v7, s7
-; GFX10-FAST-DENORM-NEXT: v_fma_f32 v2, v2, v8, s8
-; GFX10-FAST-DENORM-NEXT: v_fma_f32 v3, v3, v9, s9
-; GFX10-FAST-DENORM-NEXT: v_fma_f32 v4, v4, v10, s10
-; GFX10-FAST-DENORM-NEXT: v_fma_f32 v5, v5, v11, s11
+; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v0, s6
+; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v1, s7
+; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v2, s8
+; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v3, s9
+; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v4, s10
+; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v5, s11
+; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v0, s0, s3, v0 op_sel_hi:[1,1,0]
+; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v1, s0, s3, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v2, s1, s4, v2 op_sel_hi:[1,1,0]
+; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v3, s1, s4, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v4, s2, s5, v4 op_sel_hi:[1,1,0]
+; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v5, s2, s5, v5 op_sel:[1,1,0] op_sel_hi:[1,1,0]
; GFX10-FAST-DENORM-NEXT: ; return to shader part epilog
.entry:
%a = fmul fast <6 x half> %x, %y
define amdgpu_vs float @test_f16_to_f32_sub_ext_mul(half %x, half %y, float %z) {
; GFX9-DENORM-LABEL: test_f16_to_f32_sub_ext_mul:
; GFX9-DENORM: ; %bb.0: ; %entry
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, -v2
+; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]
; GFX9-DENORM-NEXT: ; return to shader part epilog
;
; GFX10-DENORM-LABEL: test_f16_to_f32_sub_ext_mul:
; GFX10-DENORM: ; %bb.0: ; %entry
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX10-DENORM-NEXT: v_fma_f32 v0, v0, v1, -v2
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]
; GFX10-DENORM-NEXT: ; return to shader part epilog
entry:
%a = fmul fast half %x, %y
define amdgpu_vs float @test_f16_to_f32_sub_ext_mul_rhs(float %x, half %y, half %z) {
; GFX9-DENORM-LABEL: test_f16_to_f32_sub_ext_mul_rhs:
; GFX9-DENORM: ; %bb.0: ; %.entry
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX9-DENORM-NEXT: v_mad_f32 v0, -v1, v2, v0
+; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, -v1, v2, v0 op_sel_hi:[1,1,0]
; GFX9-DENORM-NEXT: ; return to shader part epilog
;
; GFX10-DENORM-LABEL: test_f16_to_f32_sub_ext_mul_rhs:
; GFX10-DENORM: ; %bb.0: ; %.entry
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX10-DENORM-NEXT: v_fma_f32 v0, -v1, v2, v0
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, -v1, v2, v0 op_sel_hi:[1,1,0]
; GFX10-DENORM-NEXT: ; return to shader part epilog
.entry:
%a = fmul fast half %y, %z
;
; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_mul:
; GFX10-DENORM: ; %bb.0: ; %entry
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v8, v0
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v10, v1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v11, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v2
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v2, v3
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_fma_f32 v0, v8, v0, -v4
-; GFX10-DENORM-NEXT: v_fma_f32 v1, v9, v1, -v5
-; GFX10-DENORM-NEXT: v_fma_f32 v2, v10, v2, -v6
-; GFX10-DENORM-NEXT: v_fma_f32 v3, v11, v3, -v7
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v4, v0, v2, -v4 op_sel_hi:[1,1,0]
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v5, v0, v2, -v5 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, v1, v3, -v6 op_sel_hi:[1,1,0]
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, v1, v3, -v7 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v4
+; GFX10-DENORM-NEXT: v_mov_b32_e32 v1, v5
; GFX10-DENORM-NEXT: ; return to shader part epilog
entry:
%a = fmul fast <4 x half> %x, %y
;
; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_mul_rhs:
; GFX10-DENORM: ; %bb.0: ; %.entry
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v8, v4
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v9, v5
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v10, v6
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v11, v7
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_fma_f32 v0, -v8, v10, v0
-; GFX10-DENORM-NEXT: v_fma_f32 v1, -v4, v6, v1
-; GFX10-DENORM-NEXT: v_fma_f32 v2, -v9, v11, v2
-; GFX10-DENORM-NEXT: v_fma_f32 v3, -v5, v7, v3
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, -v4, v6, v0 op_sel_hi:[1,1,0]
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v1, -v4, v6, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, -v5, v7, v2 op_sel_hi:[1,1,0]
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, -v5, v7, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
; GFX10-DENORM-NEXT: ; return to shader part epilog
.entry:
%a = fmul fast <4 x half> %y, %z
define amdgpu_vs float @test_f16_to_f32_sub_ext_neg_mul(half %x, half %y, float %z) {
; GFX9-DENORM-LABEL: test_f16_to_f32_sub_ext_neg_mul:
; GFX9-DENORM: ; %bb.0: ; %entry
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e64 v1, -v1
-; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, -v2
+; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0]
; GFX9-DENORM-NEXT: ; return to shader part epilog
;
; GFX10-DENORM-LABEL: test_f16_to_f32_sub_ext_neg_mul:
; GFX10-DENORM: ; %bb.0: ; %entry
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e64 v1, -v1
-; GFX10-DENORM-NEXT: v_fma_f32 v0, v0, v1, -v2
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0]
; GFX10-DENORM-NEXT: ; return to shader part epilog
entry:
%a = fmul fast half %x, %y
define amdgpu_vs float @test_f16_to_f32_sub_neg_ext_mul(half %x, half %y, float %z) {
; GFX9-DENORM-LABEL: test_f16_to_f32_sub_neg_ext_mul:
; GFX9-DENORM: ; %bb.0: ; %entry
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e64 v1, -v1
-; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, -v2
+; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0]
; GFX9-DENORM-NEXT: ; return to shader part epilog
;
; GFX10-DENORM-LABEL: test_f16_to_f32_sub_neg_ext_mul:
; GFX10-DENORM: ; %bb.0: ; %entry
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e64 v1, -v1
-; GFX10-DENORM-NEXT: v_fma_f32 v0, v0, v1, -v2
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0]
; GFX10-DENORM-NEXT: ; return to shader part epilog
entry:
%a = fmul fast half %x, %y
define amdgpu_vs float @test_f16_to_f32_sub_ext_neg_mul2(float %x, half %y, half %z) {
; GFX9-DENORM-LABEL: test_f16_to_f32_sub_ext_neg_mul2:
; GFX9-DENORM: ; %bb.0: ; %entry
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e64 v2, -v2
-; GFX9-DENORM-NEXT: v_mad_f32 v0, -v1, v2, v0
+; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, -v1, -v2, v0 op_sel_hi:[1,1,0]
; GFX9-DENORM-NEXT: ; return to shader part epilog
;
; GFX10-DENORM-LABEL: test_f16_to_f32_sub_ext_neg_mul2:
; GFX10-DENORM: ; %bb.0: ; %entry
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e64 v2, -v2
-; GFX10-DENORM-NEXT: v_fma_f32 v0, -v1, v2, v0
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, -v1, -v2, v0 op_sel_hi:[1,1,0]
; GFX10-DENORM-NEXT: ; return to shader part epilog
entry:
%a = fmul fast half %y, %z
define amdgpu_vs float @test_f16_to_f32_sub_neg_ext_mul2(float %x, half %y, half %z) {
; GFX9-DENORM-LABEL: test_f16_to_f32_sub_neg_ext_mul2:
; GFX9-DENORM: ; %bb.0: ; %entry
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX9-DENORM-NEXT: v_cvt_f32_f16_e64 v2, -v2
-; GFX9-DENORM-NEXT: v_mad_f32 v0, -v1, v2, v0
+; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, -v1, -v2, v0 op_sel_hi:[1,1,0]
; GFX9-DENORM-NEXT: ; return to shader part epilog
;
; GFX10-DENORM-LABEL: test_f16_to_f32_sub_neg_ext_mul2:
; GFX10-DENORM: ; %bb.0: ; %entry
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e64 v2, -v2
-; GFX10-DENORM-NEXT: v_fma_f32 v0, -v1, v2, v0
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, -v1, -v2, v0 op_sel_hi:[1,1,0]
; GFX10-DENORM-NEXT: ; return to shader part epilog
entry:
%a = fmul fast half %y, %z
;
; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_neg_mul:
; GFX10-DENORM: ; %bb.0: ; %entry
-; GFX10-DENORM-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX10-DENORM-NEXT: v_xor_b32_e32 v3, 0x80008000, v3
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v8, v0
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v10, v1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v11, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v2
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v2, v3
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_fma_f32 v0, v8, v0, -v4
-; GFX10-DENORM-NEXT: v_fma_f32 v1, v9, v1, -v5
-; GFX10-DENORM-NEXT: v_fma_f32 v2, v10, v2, -v6
-; GFX10-DENORM-NEXT: v_fma_f32 v3, v11, v3, -v7
+; GFX10-DENORM-NEXT: v_xor_b32_e32 v8, 0x80008000, v2
+; GFX10-DENORM-NEXT: v_xor_b32_e32 v9, 0x80008000, v3
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v5, v0, -v2, -v5 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, v1, -v3, -v7 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v0, v8, -v4 op_sel_hi:[1,1,0]
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, v1, v9, -v6 op_sel_hi:[1,1,0]
+; GFX10-DENORM-NEXT: v_mov_b32_e32 v1, v5
; GFX10-DENORM-NEXT: ; return to shader part epilog
entry:
%a = fmul fast <4 x half> %x, %y
;
; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_neg_ext_mul:
; GFX10-DENORM: ; %bb.0: ; %entry
-; GFX10-DENORM-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX10-DENORM-NEXT: v_xor_b32_e32 v3, 0x80008000, v3
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v8, v0
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v10, v1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v11, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v2
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v2, v3
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_fma_f32 v0, v8, v0, -v4
-; GFX10-DENORM-NEXT: v_fma_f32 v1, v9, v1, -v5
-; GFX10-DENORM-NEXT: v_fma_f32 v2, v10, v2, -v6
-; GFX10-DENORM-NEXT: v_fma_f32 v3, v11, v3, -v7
+; GFX10-DENORM-NEXT: v_xor_b32_e32 v8, 0x80008000, v2
+; GFX10-DENORM-NEXT: v_xor_b32_e32 v9, 0x80008000, v3
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v5, v0, -v2, -v5 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, v1, -v3, -v7 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v0, v8, -v4 op_sel_hi:[1,1,0]
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, v1, v9, -v6 op_sel_hi:[1,1,0]
+; GFX10-DENORM-NEXT: v_mov_b32_e32 v1, v5
; GFX10-DENORM-NEXT: ; return to shader part epilog
entry:
%a = fmul fast <4 x half> %x, %y
;
; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_neg_mul2:
; GFX10-DENORM: ; %bb.0: ; %entry
-; GFX10-DENORM-NEXT: v_xor_b32_e32 v6, 0x80008000, v6
-; GFX10-DENORM-NEXT: v_xor_b32_e32 v7, 0x80008000, v7
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v8, v4
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v9, v5
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v10, v6
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v11, v7
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_fma_f32 v0, -v8, v10, v0
-; GFX10-DENORM-NEXT: v_fma_f32 v1, -v4, v6, v1
-; GFX10-DENORM-NEXT: v_fma_f32 v2, -v9, v11, v2
-; GFX10-DENORM-NEXT: v_fma_f32 v3, -v5, v7, v3
+; GFX10-DENORM-NEXT: v_xor_b32_e32 v8, 0x80008000, v6
+; GFX10-DENORM-NEXT: v_xor_b32_e32 v9, 0x80008000, v7
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v1, -v4, -v6, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, -v5, -v7, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, -v4, v8, v0 op_sel_hi:[1,1,0]
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, -v5, v9, v2 op_sel_hi:[1,1,0]
; GFX10-DENORM-NEXT: ; return to shader part epilog
entry:
%a = fmul fast <4 x half> %y, %z
;
; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_neg_ext_mul2:
; GFX10-DENORM: ; %bb.0: ; %entry
-; GFX10-DENORM-NEXT: v_xor_b32_e32 v6, 0x80008000, v6
-; GFX10-DENORM-NEXT: v_xor_b32_e32 v7, 0x80008000, v7
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v8, v4
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v9, v5
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v10, v6
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v11, v7
-; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-DENORM-NEXT: v_fma_f32 v0, -v8, v10, v0
-; GFX10-DENORM-NEXT: v_fma_f32 v1, -v4, v6, v1
-; GFX10-DENORM-NEXT: v_fma_f32 v2, -v9, v11, v2
-; GFX10-DENORM-NEXT: v_fma_f32 v3, -v5, v7, v3
+; GFX10-DENORM-NEXT: v_xor_b32_e32 v8, 0x80008000, v6
+; GFX10-DENORM-NEXT: v_xor_b32_e32 v9, 0x80008000, v7
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v1, -v4, -v6, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, -v5, -v7, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, -v4, v8, v0 op_sel_hi:[1,1,0]
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, -v5, v9, v2 op_sel_hi:[1,1,0]
; GFX10-DENORM-NEXT: ; return to shader part epilog
entry:
%a = fmul fast <4 x half> %y, %z
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
-; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=VI %s
-; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=CI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,SDAG-GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,SDAG-VI %s
+; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,SDAG-CI %s
+
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GISEL-GFX9 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,GISEL-VI %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,GISEL-CI %s
define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo(half %src0, half %src1, half %src2) #0 {
; GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo:
; GFX9-NEXT: v_mad_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT: s_setpc_b64 s[30:31]
-;
-; CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mac_f32_e32 v2, v0, v1
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v2
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v0
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v0
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16
+; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v2
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%src2.ext = fpext half %src2 to float
; GFX9-NEXT: v_mov_b32_e32 v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT: v_or_b32_e32 v0, 0x3c00, v0
-; VI-NEXT: s_setpc_b64 s[30:31]
-;
-; CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mac_f32_e32 v2, v0, v1
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v2
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v0
-; CI-NEXT: v_mov_b32_e32 v0, 1.0
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; SDAG-VI-NEXT: v_or_b32_e32 v0, 0x3c00, v0
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v0
+; SDAG-CI-NEXT: v_mov_b32_e32 v0, 1.0
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-VI-NEXT: s_movk_i32 s4, 0x3c00
+; GISEL-VI-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16
+; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: v_or_b32_e32 v0, s4, v0
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v2
+; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x3c00
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%src2.ext = fpext half %src2 to float
; GFX9-NEXT: v_mov_b32_e32 v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NEXT: s_setpc_b64 s[30:31]
-;
-; CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mac_f32_e32 v2, v0, v1
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v2
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v0
-; CI-NEXT: v_mov_b32_e32 v0, v3
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; SDAG-VI-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v0
+; SDAG-CI-NEXT: v_mov_b32_e32 v0, v3
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16
+; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v2
+; GISEL-CI-NEXT: v_mov_b32_e32 v0, v3
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%src2.ext = fpext half %src2 to float
}
define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack(half %src0, half %src1, half %src2) #0 {
-; GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack:
+; SDAG-GFX9: ; %bb.0:
+; SDAG-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; SDAG-GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; SDAG-CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack:
+; GISEL-GFX9: ; %bb.0:
+; GISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, 16
+; GISEL-GFX9-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16
+; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT: s_setpc_b64 s[30:31]
-;
-; CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mac_f32_e32 v2, v0, v1
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v2
-; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; CI-NEXT: s_setpc_b64 s[30:31]
+; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; GISEL-CI-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GISEL-CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%src2.ext = fpext half %src2 to float
}
define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext(half %src0, half %src1, half %src2) #0 {
-; GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext:
+; SDAG-GFX9: ; %bb.0:
+; SDAG-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; SDAG-GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT: s_setpc_b64 s[30:31]
-;
-; CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mac_f32_e32 v2, v0, v1
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v2
-; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; SDAG-CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext:
+; GISEL-GFX9: ; %bb.0:
+; GISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, 16
+; GISEL-GFX9-NEXT: v_lshlrev_b32_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16
+; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; GISEL-CI-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GISEL-CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%src2.ext = fpext half %src2 to float
; GFX9-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
-; VI-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT: s_setpc_b64 s[30:31]
-;
-; CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v0
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
+; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v0
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16
+; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v0
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%src2.ext = fpext half %src2 to float
; GFX9-NEXT: v_mad_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT: s_setpc_b64 s[30:31]
-;
-; CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mac_f32_e32 v2, v0, v1
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v2
-; CI-NEXT: v_cvt_f32_f16_e64 v1, v0 clamp
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v1, v0 clamp
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v0, v2 clamp
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16
+; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, 0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_max_f32_e32 v0, v0, v1
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, 1.0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_min_f32_e32 v0, v0, v1
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v0
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%src2.ext = fpext half %src2 to float
; GFX9-NEXT: v_mad_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; VI-NEXT: v_cvt_f16_f32_e32 v0, v2
-; VI-NEXT: flat_store_short v[0:1], v0
-; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: v_max_f16_sdwa v0, v0, v0 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT: s_setpc_b64 s[30:31]
-;
-; CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mac_f32_e32 v2, v0, v1
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v2
-; CI-NEXT: s_mov_b32 s7, 0xf000
-; CI-NEXT: s_mov_b32 s6, -1
-; CI-NEXT: v_cvt_f32_f16_e64 v1, v0 clamp
-; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
-; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; SDAG-VI-NEXT: flat_store_short v[0:1], v0
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0)
+; SDAG-VI-NEXT: v_max_f16_sdwa v0, v0, v0 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; SDAG-CI-NEXT: s_mov_b32 s7, 0xf000
+; SDAG-CI-NEXT: s_mov_b32 s6, -1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v1, v0 clamp
+; SDAG-CI-NEXT: buffer_store_short v0, off, s[4:7], 0
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0)
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; GISEL-VI-NEXT: flat_store_short v[0:1], v0
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0)
+; GISEL-VI-NEXT: v_max_f16_e64 v0, v0, v0 clamp
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16
+; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: s_mov_b32 s6, -1
+; GISEL-CI-NEXT: s_mov_b32 s7, 0xf000
+; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, 0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GISEL-CI-NEXT: buffer_store_short v0, off, s[4:7], 0
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0)
+; GISEL-CI-NEXT: v_max_f32_e32 v1, v2, v1
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 1.0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v2
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%src2.ext = fpext half %src2 to float
attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
attributes #1 = { nounwind readnone speculatable }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CI: {{.*}}
+; VI: {{.*}}
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=GFX906 %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=GFX900 %s
-; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=VI %s
-; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=CI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,SDAG-GFX900 %s
+; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,SDAG-GFX906 %s
+; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,SDAG-VI %s
+; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,SDAG-CI %s
+
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,GISEL-GFX900 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,GISEL-GFX906 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,GISEL-VI %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,GISEL-CI %s
define half @mixlo_simple(float %src0, float %src1, float %src2) #0 {
-; GFX906-LABEL: mixlo_simple:
-; GFX906: ; %bb.0:
-; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2
-; GFX906-NEXT: s_setpc_b64 s[30:31]
-;
; GFX900-LABEL: mixlo_simple:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
+; GFX906-LABEL: mixlo_simple:
+; GFX906: ; %bb.0:
+; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2
+; GFX906-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: mixlo_simple:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_cvt_f16_f32_e32 v0, v2
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: mixlo_simple:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mac_f32_e32 v2, v0, v1
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v2
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: mixlo_simple:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: mixlo_simple:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2)
%cvt.result = fptrunc float %result to half
ret half %cvt.result
}
define half @v_mad_mixlo_f16_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
-; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo:
-; GFX906: ; %bb.0:
-; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
-; GFX906-NEXT: s_setpc_b64 s[30:31]
-;
; GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
+; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo:
+; GFX906: ; %bb.0:
+; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; GFX906-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_cvt_f16_f32_e32 v0, v2
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mac_f32_e32 v2, v0, v1
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v2
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%src2.ext = fpext half %src2 to float
}
define half @v_mad_mixlo_f16_f16lo_f16lo_f32(half %src0, half %src1, float %src2) #0 {
-; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32:
-; GFX906: ; %bb.0:
-; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0]
-; GFX906-NEXT: s_setpc_b64 s[30:31]
-;
; GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
+; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32:
+; GFX906: ; %bb.0:
+; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0]
+; GFX906-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_cvt_f16_f32_e32 v0, v2
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mac_f32_e32 v2, v0, v1
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v2
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
}
define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt(half %src0, half %src1, float %src2) #0 {
-; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
-; GFX906: ; %bb.0:
-; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
-; GFX906-NEXT: s_setpc_b64 s[30:31]
-;
; GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
+; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
+; GFX906: ; %bb.0:
+; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
+; GFX906-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_cvt_f16_f32_e64 v0, v2 clamp
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mac_f32_e32 v2, v0, v1
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v2
-; CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, 0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_max_f32_e32 v0, v0, v1
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, 1.0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_min_f32_e32 v0, v0, v1
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
}
define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt(half %src0, half %src1, float %src2) #0 {
-; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
-; GFX906: ; %bb.0:
-; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
-; GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX906-NEXT: s_setpc_b64 s[30:31]
-;
; GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
+; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
+; GFX906: ; %bb.0:
+; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
+; GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX906-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_cvt_f16_f32_e32 v0, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
; operation only clobbers relevant lane.
define <2 x half> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
+; GFX900-LABEL: v_mad_mix_v2f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GFX900-NEXT: v_mov_b32_e32 v0, v3
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
; GFX906-LABEL: v_mad_mix_v2f32:
; GFX906: ; %bb.0:
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-NEXT: v_mov_b32_e32 v0, v3
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_mad_mix_v2f32:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
-; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mix_v2f32:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT: v_mac_f32_e32 v5, v3, v4
+; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v1, v2
+; SDAG-VI-NEXT: v_or_b32_e32 v0, v1, v0
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_mad_mix_v2f32:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT: v_mac_f32_e32 v5, v3, v4
-; VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT: v_cvt_f16_f32_e32 v1, v2
-; VI-NEXT: v_or_b32_e32 v0, v1, v0
-; VI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mix_v2f32:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: v_mac_f32_e32 v5, v1, v3
+; SDAG-CI-NEXT: v_mac_f32_e32 v4, v0, v2
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v4
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v5
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_v2f32:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
-; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
-; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
-; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
-; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; CI-NEXT: v_mac_f32_e32 v4, v0, v2
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v4
-; CI-NEXT: v_mac_f32_e32 v5, v1, v3
-; CI-NEXT: v_cvt_f16_f32_e32 v1, v5
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; CI-NEXT: s_setpc_b64 s[30:31]
+; GISEL-VI-LABEL: v_mad_mix_v2f32:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v4, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v2
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_mac_f32_e32 v5, v3, v4
+; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v5
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, 16
+; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_v2f32:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GISEL-CI-NEXT: v_mac_f32_e32 v4, v0, v2
+; GISEL-CI-NEXT: v_mac_f32_e32 v5, v1, v3
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v4
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v5
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext <2 x half> %src0 to <2 x float>
%src1.ext = fpext <2 x half> %src1 to <2 x float>
%src2.ext = fpext <2 x half> %src2 to <2 x float>
}
define <3 x half> @v_mad_mix_v3f32(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 {
-; GFX906-LABEL: v_mad_mix_v3f32:
-; GFX906: ; %bb.0:
-; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1]
-; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GFX906-NEXT: v_mov_b32_e32 v0, v3
-; GFX906-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX900-LABEL: v_mad_mix_v3f32:
+; SDAG-GFX900: ; %bb.0:
+; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1]
+; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v3
+; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_mad_mix_v3f32:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1]
-; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX906-LABEL: v_mad_mix_v3f32:
+; SDAG-GFX906: ; %bb.0:
+; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1]
+; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v3
+; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_mad_mix_v3f32:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT: v_cvt_f32_f16_sdwa v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v4, v4
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; VI-NEXT: v_cvt_f32_f16_e32 v5, v5
-; VI-NEXT: v_mac_f32_e32 v8, v6, v7
-; VI-NEXT: v_mac_f32_e32 v4, v0, v2
-; VI-NEXT: v_cvt_f16_f32_sdwa v0, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT: v_mac_f32_e32 v5, v1, v3
-; VI-NEXT: v_cvt_f16_f32_e32 v2, v4
-; VI-NEXT: v_cvt_f16_f32_e32 v1, v5
-; VI-NEXT: v_or_b32_e32 v0, v2, v0
-; VI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mix_v3f32:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; SDAG-VI-NEXT: v_mac_f32_e32 v8, v6, v7
+; SDAG-VI-NEXT: v_mac_f32_e32 v4, v0, v2
+; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v2, v4
+; SDAG-VI-NEXT: v_mac_f32_e32 v5, v1, v3
+; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v1, v5
+; SDAG-VI-NEXT: v_or_b32_e32 v0, v2, v0
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mix_v3f32:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v6
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v7, v7
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v8, v8
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v6, v6
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v7, v7
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v8, v8
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: v_mac_f32_e32 v8, v2, v5
+; SDAG-CI-NEXT: v_mac_f32_e32 v7, v1, v4
+; SDAG-CI-NEXT: v_mac_f32_e32 v6, v0, v3
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v6
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v7
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v8
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX900-LABEL: v_mad_mix_v3f32:
+; GISEL-GFX900: ; %bb.0:
+; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v6
+; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX906-LABEL: v_mad_mix_v3f32:
+; GISEL-GFX906: ; %bb.0:
+; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v6
+; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_v3f32:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
-; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
-; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
-; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CI-NEXT: v_cvt_f16_f32_e32 v8, v8
-; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
-; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NEXT: v_cvt_f32_f16_e32 v6, v6
-; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v7, v7
-; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; CI-NEXT: v_cvt_f32_f16_e32 v8, v8
-; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
-; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NEXT: v_mac_f32_e32 v7, v1, v4
-; CI-NEXT: v_mac_f32_e32 v6, v0, v3
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v6
-; CI-NEXT: v_mac_f32_e32 v8, v2, v5
-; CI-NEXT: v_cvt_f16_f32_e32 v1, v7
-; CI-NEXT: v_cvt_f16_f32_e32 v2, v8
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NEXT: s_setpc_b64 s[30:31]
+; GISEL-VI-LABEL: v_mad_mix_v3f32:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v2
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v4
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GISEL-VI-NEXT: v_mac_f32_e32 v4, v0, v2
+; GISEL-VI-NEXT: v_mac_f32_e32 v8, v6, v7
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v4
+; GISEL-VI-NEXT: v_mac_f32_e32 v5, v1, v3
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v8
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v2, v5
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, 16
+; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GISEL-VI-NEXT: v_bfe_u32 v1, v2, 0, 16
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_v3f32:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v8, v8
+; GISEL-CI-NEXT: v_mac_f32_e32 v6, v0, v3
+; GISEL-CI-NEXT: v_mac_f32_e32 v7, v1, v4
+; GISEL-CI-NEXT: v_mac_f32_e32 v8, v2, v5
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v6
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v7
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v8
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext <3 x half> %src0 to <3 x float>
%src1.ext = fpext <3 x half> %src1 to <3 x float>
%src2.ext = fpext <3 x half> %src2 to <3 x float>
}
define <4 x half> @v_mad_mix_v4f32(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 {
-; GFX906-LABEL: v_mad_mix_v4f32:
-; GFX906: ; %bb.0:
-; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT: v_fma_mixlo_f16 v6, v1, v3, v5 op_sel_hi:[1,1,1]
-; GFX906-NEXT: v_fma_mixlo_f16 v7, v0, v2, v4 op_sel_hi:[1,1,1]
-; GFX906-NEXT: v_fma_mixhi_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GFX906-NEXT: v_fma_mixhi_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GFX906-NEXT: v_mov_b32_e32 v0, v7
-; GFX906-NEXT: v_mov_b32_e32 v1, v6
-; GFX906-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX900-LABEL: v_mad_mix_v4f32:
+; SDAG-GFX900: ; %bb.0:
+; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v6, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v7, v0, v2, v4 op_sel_hi:[1,1,1]
+; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v7
+; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v6
+; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_mad_mix_v4f32:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mad_mixlo_f16 v6, v1, v3, v5 op_sel_hi:[1,1,1]
-; GFX900-NEXT: v_mad_mixlo_f16 v7, v0, v2, v4 op_sel_hi:[1,1,1]
-; GFX900-NEXT: v_mad_mixhi_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GFX900-NEXT: v_mad_mixhi_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX906-LABEL: v_mad_mix_v4f32:
+; SDAG-GFX906: ; %bb.0:
+; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v6, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v7, v0, v2, v4 op_sel_hi:[1,1,1]
+; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v7
+; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v6
+; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_mad_mix_v4f32:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_sdwa v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_sdwa v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_sdwa v9, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT: v_cvt_f32_f16_sdwa v10, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_sdwa v11, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v5, v5
-; VI-NEXT: v_cvt_f32_f16_e32 v4, v4
-; VI-NEXT: v_mac_f32_e32 v10, v6, v8
-; VI-NEXT: v_mac_f32_e32 v11, v7, v9
-; VI-NEXT: v_mac_f32_e32 v5, v1, v3
-; VI-NEXT: v_mac_f32_e32 v4, v0, v2
-; VI-NEXT: v_cvt_f16_f32_sdwa v1, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT: v_cvt_f16_f32_sdwa v0, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT: v_cvt_f16_f32_e32 v2, v5
-; VI-NEXT: v_cvt_f16_f32_e32 v3, v4
-; VI-NEXT: v_or_b32_e32 v1, v2, v1
-; VI-NEXT: v_or_b32_e32 v0, v3, v0
-; VI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mix_v4f32:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v9, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v10, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v11, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; SDAG-VI-NEXT: v_mac_f32_e32 v10, v7, v9
+; SDAG-VI-NEXT: v_mac_f32_e32 v11, v6, v8
+; SDAG-VI-NEXT: v_mac_f32_e32 v4, v0, v2
+; SDAG-VI-NEXT: v_mac_f32_e32 v5, v1, v3
+; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v1, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v2, v4
+; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v3, v5
+; SDAG-VI-NEXT: v_or_b32_e32 v0, v2, v0
+; SDAG-VI-NEXT: v_or_b32_e32 v1, v3, v1
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mix_v4f32:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v8, v8
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v9, v9
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v10, v10
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v6
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v11, v11
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v7, v7
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v8, v8
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v9, v9
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v10, v10
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v6, v6
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v11, v11
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v7, v7
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: v_mac_f32_e32 v11, v3, v7
+; SDAG-CI-NEXT: v_mac_f32_e32 v10, v2, v6
+; SDAG-CI-NEXT: v_mac_f32_e32 v9, v1, v5
+; SDAG-CI-NEXT: v_mac_f32_e32 v8, v0, v4
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v8
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v9
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v10
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v11
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_v4f32:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_cvt_f16_f32_e32 v8, v8
-; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CI-NEXT: v_cvt_f16_f32_e32 v9, v9
-; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
-; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CI-NEXT: v_cvt_f16_f32_e32 v10, v10
-; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
-; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NEXT: v_cvt_f16_f32_e32 v11, v11
-; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
-; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NEXT: v_cvt_f32_f16_e32 v8, v8
-; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v9, v9
-; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; CI-NEXT: v_cvt_f32_f16_e32 v10, v10
-; CI-NEXT: v_cvt_f32_f16_e32 v6, v6
-; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NEXT: v_cvt_f32_f16_e32 v11, v11
-; CI-NEXT: v_cvt_f32_f16_e32 v7, v7
-; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NEXT: v_mac_f32_e32 v10, v2, v6
-; CI-NEXT: v_mac_f32_e32 v9, v1, v5
-; CI-NEXT: v_mac_f32_e32 v8, v0, v4
-; CI-NEXT: v_mac_f32_e32 v11, v3, v7
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v8
-; CI-NEXT: v_cvt_f16_f32_e32 v1, v9
-; CI-NEXT: v_cvt_f16_f32_e32 v2, v10
-; CI-NEXT: v_cvt_f16_f32_e32 v3, v11
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NEXT: s_setpc_b64 s[30:31]
+; GISEL-GFX900-LABEL: v_mad_mix_v4f32:
+; GISEL-GFX900: ; %bb.0:
+; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v6
+; GISEL-GFX900-NEXT: v_mov_b32_e32 v1, v7
+; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX906-LABEL: v_mad_mix_v4f32:
+; GISEL-GFX906: ; %bb.0:
+; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v6
+; GISEL-GFX906-NEXT: v_mov_b32_e32 v1, v7
+; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mix_v4f32:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v2
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v10, v4
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v9, v3
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v11, v5
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_mac_f32_e32 v4, v0, v2
+; GISEL-VI-NEXT: v_mac_f32_e32 v10, v6, v8
+; GISEL-VI-NEXT: v_mac_f32_e32 v11, v7, v9
+; GISEL-VI-NEXT: v_mac_f32_e32 v5, v1, v3
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v4
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v10
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v3, v5
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v2, v11
+; GISEL-VI-NEXT: v_mov_b32_e32 v4, 16
+; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v1, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_v4f32:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v8, v8
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v9, v9
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v10, v10
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v11, v11
+; GISEL-CI-NEXT: v_mac_f32_e32 v8, v0, v4
+; GISEL-CI-NEXT: v_mac_f32_e32 v9, v1, v5
+; GISEL-CI-NEXT: v_mac_f32_e32 v10, v2, v6
+; GISEL-CI-NEXT: v_mac_f32_e32 v11, v3, v7
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v8
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v9
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v10
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v3, v11
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext <4 x half> %src0 to <4 x float>
%src1.ext = fpext <4 x half> %src1 to <4 x float>
%src2.ext = fpext <4 x half> %src2 to <4 x float>
; FIXME (DAG): Fold clamp
define <2 x half> @v_mad_mix_v2f32_clamp_postcvt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
+; GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
+; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX900-NEXT: v_mov_b32_e32 v0, v3
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
; GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt:
; GFX906: ; %bb.0:
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-NEXT: v_mov_b32_e32 v0, v3
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT: v_mac_f32_e32 v5, v3, v4
+; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; SDAG-VI-NEXT: v_cvt_f16_f32_e64 v1, v2 clamp
+; SDAG-VI-NEXT: v_or_b32_e32 v0, v1, v0
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_mad_mix_v2f32_clamp_postcvt:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT: v_mac_f32_e32 v5, v3, v4
-; VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT: v_cvt_f16_f32_e64 v1, v2 clamp
-; VI-NEXT: v_or_b32_e32 v0, v1, v0
-; VI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-CI-NEXT: v_mac_f32_e32 v4, v0, v2
+; SDAG-CI-NEXT: v_mac_f32_e32 v5, v1, v3
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v4
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v5
+; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp
+; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v1, v1 clamp
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_v2f32_clamp_postcvt:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
-; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
-; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
-; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
-; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: v_mac_f32_e32 v5, v1, v3
-; CI-NEXT: v_mac_f32_e32 v4, v0, v2
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v5
-; CI-NEXT: v_cvt_f32_f16_e64 v1, v0 clamp
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v4
-; CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp
-; CI-NEXT: s_setpc_b64 s[30:31]
+; GISEL-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v4, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v2
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_mac_f32_e32 v5, v3, v4
+; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v0, v2 clamp
+; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v1, v5 clamp
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, 16
+; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GISEL-CI-NEXT: v_mac_f32_e32 v4, v0, v2
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v4
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 0
+; GISEL-CI-NEXT: v_mac_f32_e32 v5, v1, v3
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v5
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_max_f32_e32 v0, v0, v2
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-CI-NEXT: v_max_f32_e32 v1, v1, v2
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 1.0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_min_f32_e32 v0, v0, v2
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v2
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext <2 x half> %src0 to <2 x float>
%src1.ext = fpext <2 x half> %src1 to <2 x float>
%src2.ext = fpext <2 x half> %src2 to <2 x float>
}
; FIXME (DAG): Should be packed into 2 registers per argument?
+; FIXME (GIsel): V_PK_MAX clamp could be folded into mixlo
define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 {
-; GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
-; GFX906: ; %bb.0:
-; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GFX906-NEXT: v_pack_b32_f16 v1, v1, 0
-; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
-; GFX906-NEXT: v_mov_b32_e32 v0, v3
-; GFX906-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
+; SDAG-GFX900: ; %bb.0:
+; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX900-NEXT: v_pack_b32_f16 v1, v1, 0
+; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v3
+; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT: v_pack_b32_f16 v1, v1, 0
-; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
+; SDAG-GFX906: ; %bb.0:
+; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX906-NEXT: v_pack_b32_f16 v1, v1, 0
+; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v3
+; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT: v_cvt_f32_f16_sdwa v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v4, v4
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; VI-NEXT: v_cvt_f32_f16_e32 v5, v5
-; VI-NEXT: v_mac_f32_e32 v8, v6, v7
-; VI-NEXT: v_mac_f32_e32 v4, v0, v2
-; VI-NEXT: v_cvt_f16_f32_sdwa v0, v8 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT: v_mac_f32_e32 v5, v1, v3
-; VI-NEXT: v_cvt_f16_f32_e64 v2, v4 clamp
-; VI-NEXT: v_cvt_f16_f32_e64 v1, v5 clamp
-; VI-NEXT: v_or_b32_e32 v0, v2, v0
-; VI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; SDAG-VI-NEXT: v_mac_f32_e32 v8, v6, v7
+; SDAG-VI-NEXT: v_mac_f32_e32 v4, v0, v2
+; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v8 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; SDAG-VI-NEXT: v_cvt_f16_f32_e64 v2, v4 clamp
+; SDAG-VI-NEXT: v_mac_f32_e32 v5, v1, v3
+; SDAG-VI-NEXT: v_cvt_f16_f32_e64 v1, v5 clamp
+; SDAG-VI-NEXT: v_or_b32_e32 v0, v2, v0
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v8, v8
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v7, v7
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v6
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v8, v8
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v7, v7
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v6, v6
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-CI-NEXT: v_mac_f32_e32 v6, v0, v3
+; SDAG-CI-NEXT: v_mac_f32_e32 v7, v1, v4
+; SDAG-CI-NEXT: v_mac_f32_e32 v8, v2, v5
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v6
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v7
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v8
+; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp
+; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v1, v1 clamp
+; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v2, v2 clamp
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
+; GISEL-GFX900: ; %bb.0:
+; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GISEL-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v3
+; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_cvt_f16_f32_e32 v8, v8
-; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
-; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
-; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v8, v8
-; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
-; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
-; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
-; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CI-NEXT: v_cvt_f32_f16_e32 v6, v6
-; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v7, v7
-; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; CI-NEXT: v_mac_f32_e32 v8, v2, v5
-; CI-NEXT: v_mac_f32_e32 v6, v0, v3
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v8
-; CI-NEXT: v_mac_f32_e32 v7, v1, v4
-; CI-NEXT: v_cvt_f32_f16_e64 v2, v0 clamp
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v7
-; CI-NEXT: v_cvt_f32_f16_e64 v1, v0 clamp
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v6
-; CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp
-; CI-NEXT: s_setpc_b64 s[30:31]
+; GISEL-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
+; GISEL-GFX906: ; %bb.0:
+; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GISEL-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v3
+; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v2
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v4
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GISEL-VI-NEXT: v_mac_f32_e32 v4, v0, v2
+; GISEL-VI-NEXT: v_mac_f32_e32 v8, v6, v7
+; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v0, v4 clamp
+; GISEL-VI-NEXT: v_mac_f32_e32 v5, v1, v3
+; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v1, v8 clamp
+; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v2, v5 clamp
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, 16
+; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GISEL-VI-NEXT: v_bfe_u32 v1, v2, 0, 16
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v8, v8
+; GISEL-CI-NEXT: v_mac_f32_e32 v6, v0, v3
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v6
+; GISEL-CI-NEXT: v_mac_f32_e32 v7, v1, v4
+; GISEL-CI-NEXT: v_mac_f32_e32 v8, v2, v5
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v7
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v3, v8
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GISEL-CI-NEXT: v_max_f32_e32 v0, v0, v2
+; GISEL-CI-NEXT: v_max_f32_e32 v1, v1, v2
+; GISEL-CI-NEXT: v_max_f32_e32 v2, v3, v2
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, 1.0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_min_f32_e32 v0, v0, v3
+; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v3
+; GISEL-CI-NEXT: v_min_f32_e32 v2, v2, v3
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext <3 x half> %src0 to <3 x float>
%src1.ext = fpext <3 x half> %src1 to <3 x float>
%src2.ext = fpext <3 x half> %src2 to <3 x float>
}
define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 {
+; GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX900-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; GFX900-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX900-NEXT: v_mov_b32_e32 v0, v6
+; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
; GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt:
; GFX906: ; %bb.0:
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-NEXT: v_mov_b32_e32 v1, v2
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mix_v4f32_clamp_postcvt:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v9, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v10, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v11, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; SDAG-VI-NEXT: v_mac_f32_e32 v10, v7, v9
+; SDAG-VI-NEXT: v_mac_f32_e32 v11, v6, v8
+; SDAG-VI-NEXT: v_mac_f32_e32 v5, v1, v3
+; SDAG-VI-NEXT: v_mac_f32_e32 v4, v0, v2
+; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v11 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v1, v10 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; SDAG-VI-NEXT: v_cvt_f16_f32_e64 v2, v4 clamp
+; SDAG-VI-NEXT: v_cvt_f16_f32_e64 v3, v5 clamp
+; SDAG-VI-NEXT: v_or_b32_e32 v0, v2, v0
+; SDAG-VI-NEXT: v_or_b32_e32 v1, v3, v1
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_mad_mix_v4f32_clamp_postcvt:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_sdwa v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_cvt_f32_f16_sdwa v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_sdwa v9, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; VI-NEXT: v_cvt_f32_f16_sdwa v10, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_sdwa v11, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v4, v4
-; VI-NEXT: v_cvt_f32_f16_e32 v5, v5
-; VI-NEXT: v_mac_f32_e32 v10, v6, v8
-; VI-NEXT: v_mac_f32_e32 v11, v7, v9
-; VI-NEXT: v_mac_f32_e32 v4, v0, v2
-; VI-NEXT: v_mac_f32_e32 v5, v1, v3
-; VI-NEXT: v_cvt_f16_f32_sdwa v0, v10 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT: v_cvt_f16_f32_sdwa v1, v11 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT: v_cvt_f16_f32_e64 v2, v4 clamp
-; VI-NEXT: v_cvt_f16_f32_e64 v3, v5 clamp
-; VI-NEXT: v_or_b32_e32 v0, v2, v0
-; VI-NEXT: v_or_b32_e32 v1, v3, v1
-; VI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mix_v4f32_clamp_postcvt:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v11, v11
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v7, v7
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v10, v10
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v6
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v9, v9
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v8, v8
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v11, v11
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v7, v7
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v10, v10
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v6, v6
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v9, v9
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v8, v8
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SDAG-CI-NEXT: v_mac_f32_e32 v8, v0, v4
+; SDAG-CI-NEXT: v_mac_f32_e32 v9, v1, v5
+; SDAG-CI-NEXT: v_mac_f32_e32 v10, v2, v6
+; SDAG-CI-NEXT: v_mac_f32_e32 v11, v3, v7
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v8
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v9
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v10
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v11
+; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp
+; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v1, v1 clamp
+; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v2, v2 clamp
+; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v3, v3 clamp
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mix_v4f32_clamp_postcvt:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v2
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v10, v4
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v9, v3
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v11, v5
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_mac_f32_e32 v4, v0, v2
+; GISEL-VI-NEXT: v_mac_f32_e32 v10, v6, v8
+; GISEL-VI-NEXT: v_mac_f32_e32 v11, v7, v9
+; GISEL-VI-NEXT: v_mac_f32_e32 v5, v1, v3
+; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v1, v4 clamp
+; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v0, v10 clamp
+; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v3, v5 clamp
+; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v2, v11 clamp
+; GISEL-VI-NEXT: v_mov_b32_e32 v4, 16
+; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v1, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_v4f32_clamp_postcvt:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_cvt_f16_f32_e32 v11, v11
-; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
-; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NEXT: v_cvt_f16_f32_e32 v8, v8
-; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v11, v11
-; CI-NEXT: v_cvt_f32_f16_e32 v7, v7
-; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NEXT: v_cvt_f16_f32_e32 v10, v10
-; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
-; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NEXT: v_cvt_f32_f16_e32 v8, v8
-; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v10, v10
-; CI-NEXT: v_cvt_f32_f16_e32 v6, v6
-; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NEXT: v_cvt_f16_f32_e32 v9, v9
-; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
-; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CI-NEXT: v_mac_f32_e32 v11, v3, v7
-; CI-NEXT: v_mac_f32_e32 v8, v0, v4
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v11
-; CI-NEXT: v_cvt_f32_f16_e32 v9, v9
-; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; CI-NEXT: v_mac_f32_e32 v10, v2, v6
-; CI-NEXT: v_cvt_f32_f16_e64 v3, v0 clamp
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v10
-; CI-NEXT: v_mac_f32_e32 v9, v1, v5
-; CI-NEXT: v_cvt_f32_f16_e64 v2, v0 clamp
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v9
-; CI-NEXT: v_cvt_f32_f16_e64 v1, v0 clamp
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v8
-; CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp
-; CI-NEXT: s_setpc_b64 s[30:31]
+; GISEL-CI-LABEL: v_mad_mix_v4f32_clamp_postcvt:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v8, v8
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v9, v9
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v10, v10
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v11, v11
+; GISEL-CI-NEXT: v_mac_f32_e32 v8, v0, v4
+; GISEL-CI-NEXT: v_mac_f32_e32 v9, v1, v5
+; GISEL-CI-NEXT: v_mac_f32_e32 v10, v2, v6
+; GISEL-CI-NEXT: v_mac_f32_e32 v11, v3, v7
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v8
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v9
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v3, v10
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v4, v11
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GISEL-CI-NEXT: v_max_f32_e32 v0, v0, v2
+; GISEL-CI-NEXT: v_max_f32_e32 v1, v1, v2
+; GISEL-CI-NEXT: v_max_f32_e32 v3, v3, v2
+; GISEL-CI-NEXT: v_max_f32_e32 v2, v4, v2
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, 1.0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v2
+; GISEL-CI-NEXT: v_min_f32_e32 v0, v0, v5
+; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v5
+; GISEL-CI-NEXT: v_min_f32_e32 v2, v3, v5
+; GISEL-CI-NEXT: v_min_f32_e32 v3, v4, v5
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext <4 x half> %src0 to <4 x float>
%src1.ext = fpext <4 x half> %src1 to <4 x float>
%src2.ext = fpext <4 x half> %src2 to <4 x float>
ret <4 x half> %clamp
}
+; FIXME (GISel): Packed Vectors handling isn't great for now, so we don't end up with
+; a build_vector to select the mixhi. Issue is more specifically with how insert_vector_elt is being
+; legalized (bitwise ops instead of shuffle/build_vector for instance).
define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_lo(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
-; GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
-; GFX906: ; %bb.0:
-; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
-; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GFX906-NEXT: v_mov_b32_e32 v0, v3
-; GFX906-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
+; SDAG-GFX900: ; %bb.0:
+; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v3
+; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
+; SDAG-GFX906: ; %bb.0:
+; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v3
+; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT: v_mac_f32_e32 v5, v3, v4
-; VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT: v_cvt_f16_f32_e64 v1, v2 clamp
-; VI-NEXT: v_or_b32_e32 v0, v1, v0
-; VI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT: v_mac_f32_e32 v5, v3, v4
+; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; SDAG-VI-NEXT: v_cvt_f16_f32_e64 v1, v2 clamp
+; SDAG-VI-NEXT: v_or_b32_e32 v0, v1, v0
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-CI-NEXT: v_mac_f32_e32 v4, v0, v2
+; SDAG-CI-NEXT: v_mac_f32_e32 v5, v1, v3
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v4
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v5
+; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
+; GISEL-GFX900: ; %bb.0:
+; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT: v_max_f16_e64 v0, v3, v3 clamp
+; GISEL-GFX900-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GISEL-GFX900-NEXT: v_mov_b32_e32 v1, 0xffff0000
+; GISEL-GFX900-NEXT: v_and_or_b32 v0, v4, v1, v0
+; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
-; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
-; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
-; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
-; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: v_mac_f32_e32 v5, v1, v3
-; CI-NEXT: v_mac_f32_e32 v4, v0, v2
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v5
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v0
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v4
-; CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp
-; CI-NEXT: s_setpc_b64 s[30:31]
+; GISEL-GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
+; GISEL-GFX906: ; %bb.0:
+; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT: v_mov_b32_e32 v4, v3
+; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT: v_max_f16_e64 v0, v3, v3 clamp
+; GISEL-GFX906-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GISEL-GFX906-NEXT: v_mov_b32_e32 v1, 0xffff0000
+; GISEL-GFX906-NEXT: v_and_or_b32 v0, v4, v1, v0
+; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v4, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v2
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_mac_f32_e32 v5, v3, v4
+; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v5
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, 16
+; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GISEL-VI-NEXT: v_max_f16_e64 v1, v0, v0 clamp
+; GISEL-VI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GISEL-VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GISEL-CI-NEXT: v_mac_f32_e32 v5, v1, v3
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v5
+; GISEL-CI-NEXT: v_mac_f32_e32 v4, v0, v2
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v4
+; GISEL-CI-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 0
+; GISEL-CI-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GISEL-CI-NEXT: v_or_b32_e32 v0, v0, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v0
+; GISEL-CI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GISEL-CI-NEXT: v_max_f32_e32 v1, v1, v2
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 1.0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v2
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-CI-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GISEL-CI-NEXT: v_or_b32_e32 v0, v0, v1
+; GISEL-CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext <2 x half> %src0 to <2 x float>
%src1.ext = fpext <2 x half> %src1 to <2 x float>
%src2.ext = fpext <2 x half> %src2 to <2 x float>
}
define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_hi(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
-; GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
-; GFX906: ; %bb.0:
-; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
-; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX906-NEXT: v_mov_b32_e32 v0, v3
-; GFX906-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
+; SDAG-GFX900: ; %bb.0:
+; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v3
+; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
-; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
+; SDAG-GFX906: ; %bb.0:
+; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v3
+; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT: v_mac_f32_e32 v5, v3, v4
-; VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT: v_cvt_f16_f32_e32 v1, v2
-; VI-NEXT: v_or_b32_e32 v0, v1, v0
-; VI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT: v_mac_f32_e32 v5, v3, v4
+; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v1, v2
+; SDAG-VI-NEXT: v_or_b32_e32 v0, v1, v0
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: v_mac_f32_e32 v5, v1, v3
+; SDAG-CI-NEXT: v_mac_f32_e32 v4, v0, v2
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v4
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v5
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v1, v1 clamp
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
+; GISEL-GFX900: ; %bb.0:
+; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, 16
+; GISEL-GFX900-NEXT: v_lshlrev_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-GFX900-NEXT: v_mov_b32_e32 v1, 0xffff
+; GISEL-GFX900-NEXT: v_and_or_b32 v0, v3, v1, v0
+; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
+; GISEL-GFX906: ; %bb.0:
+; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, 16
+; GISEL-GFX906-NEXT: v_lshlrev_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-GFX906-NEXT: v_mov_b32_e32 v1, 0xffff
+; GISEL-GFX906-NEXT: v_and_or_b32 v0, v3, v1, v0
+; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
-; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
-; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
-; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
-; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; CI-NEXT: v_mac_f32_e32 v4, v0, v2
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v4
-; CI-NEXT: v_mac_f32_e32 v5, v1, v3
-; CI-NEXT: v_cvt_f16_f32_e32 v1, v5
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e64 v1, v1 clamp
-; CI-NEXT: s_setpc_b64 s[30:31]
+; GISEL-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v4, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v2
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_mac_f32_e32 v5, v3, v4
+; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v5
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, 16
+; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GISEL-VI-NEXT: v_max_f16_sdwa v1, v0, v0 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, 16
+; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GISEL-CI-NEXT: v_mac_f32_e32 v5, v1, v3
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v5
+; GISEL-CI-NEXT: v_mac_f32_e32 v4, v0, v2
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v4
+; GISEL-CI-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 0
+; GISEL-CI-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GISEL-CI-NEXT: v_or_b32_e32 v0, v0, v1
+; GISEL-CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GISEL-CI-NEXT: v_max_f32_e32 v1, v1, v2
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 1.0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v2
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-CI-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GISEL-CI-NEXT: v_or_b32_e32 v0, v0, v1
+; GISEL-CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext <2 x half> %src0 to <2 x float>
%src1.ext = fpext <2 x half> %src1 to <2 x float>
%src2.ext = fpext <2 x half> %src2 to <2 x float>
; FIXME (DAG): Should be able to use mixlo/mixhi
define <2 x half> @v_mad_mix_v2f32_clamp_precvt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
-; GFX906-LABEL: v_mad_mix_v2f32_clamp_precvt:
-; GFX906: ; %bb.0:
-; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
-; GFX906-NEXT: v_cvt_f16_f32_e32 v1, v3
-; GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX906-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX906-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX900-LABEL: v_mad_mix_v2f32_clamp_precvt:
+; SDAG-GFX900: ; %bb.0:
+; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v1, v3
+; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-GFX900-NEXT: v_pack_b32_f16 v0, v0, v1
+; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_mad_mix_v2f32_clamp_precvt:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v3
-; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX900-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX900-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX906-LABEL: v_mad_mix_v2f32_clamp_precvt:
+; SDAG-GFX906: ; %bb.0:
+; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v1, v3
+; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-GFX906-NEXT: v_pack_b32_f16 v0, v0, v1
+; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_mad_mix_v2f32_clamp_precvt:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT: v_mad_f32 v3, v3, v4, v5 clamp
-; VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
-; VI-NEXT: v_cvt_f16_f32_sdwa v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; VI-NEXT: v_or_b32_e32 v0, v0, v1
-; VI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mix_v2f32_clamp_precvt:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT: v_mad_f32 v3, v3, v4, v5 clamp
+; SDAG-VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
+; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-VI-NEXT: v_or_b32_e32 v0, v0, v1
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mix_v2f32_clamp_precvt:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-CI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp
+; SDAG-CI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_v2f32_clamp_precvt:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
-; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
-; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
-; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
-; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp
-; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: s_setpc_b64 s[30:31]
+; GISEL-GFX900-LABEL: v_mad_mix_v2f32_clamp_precvt:
+; GISEL-GFX900: ; %bb.0:
+; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
+; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v1, v3
+; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-GFX900-NEXT: v_pack_b32_f16 v0, v1, v0
+; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX906-LABEL: v_mad_mix_v2f32_clamp_precvt:
+; GISEL-GFX906: ; %bb.0:
+; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
+; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v1, v3
+; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-GFX906-NEXT: v_pack_b32_f16 v0, v1, v0
+; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mix_v2f32_clamp_precvt:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v4, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v2
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_mad_f32 v3, v3, v4, v5 clamp
+; GISEL-VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, 16
+; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_v2f32_clamp_precvt:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GISEL-CI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp
+; GISEL-CI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext <2 x half> %src0 to <2 x float>
%src1.ext = fpext <2 x half> %src1 to <2 x float>
%src2.ext = fpext <2 x half> %src2 to <2 x float>
; FIXME (DAG): Handling undef 4th component
define <3 x half> @v_mad_mix_v3f32_clamp_precvt(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 {
-; GFX906-LABEL: v_mad_mix_v3f32_clamp_precvt:
-; GFX906: ; %bb.0:
-; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
-; GFX906-NEXT: v_fma_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX906-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GFX906-NEXT: v_cvt_f16_f32_e32 v2, v3
-; GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX906-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX906-NEXT: v_pack_b32_f16 v0, v0, v2
-; GFX906-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX900-LABEL: v_mad_mix_v3f32_clamp_precvt:
+; SDAG-GFX900: ; %bb.0:
+; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v2, v6
+; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-GFX900-NEXT: v_pack_b32_f16 v0, v0, v2
+; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_mad_mix_v3f32_clamp_precvt:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT: v_mad_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT: v_cvt_f16_f32_e32 v2, v3
-; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX900-NEXT: v_pack_b32_f16 v0, v0, v2
-; GFX900-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX906-LABEL: v_mad_mix_v3f32_clamp_precvt:
+; SDAG-GFX906: ; %bb.0:
+; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v2, v6
+; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-GFX906-NEXT: v_pack_b32_f16 v0, v0, v2
+; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_mad_mix_v3f32_clamp_precvt:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT: v_cvt_f32_f16_sdwa v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v4, v4
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; VI-NEXT: v_cvt_f32_f16_e32 v5, v5
-; VI-NEXT: v_mad_f32 v6, v6, v7, v8 clamp
-; VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp
-; VI-NEXT: v_cvt_f16_f32_sdwa v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp
-; VI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; VI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; VI-NEXT: v_or_b32_e32 v0, v0, v2
-; VI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mix_v3f32_clamp_precvt:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; SDAG-VI-NEXT: v_mad_f32 v6, v6, v7, v8 clamp
+; SDAG-VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp
+; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp
+; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-VI-NEXT: v_or_b32_e32 v0, v0, v2
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mix_v3f32_clamp_precvt:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v8, v8
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v7, v7
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v6
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v8, v8
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v7, v7
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v6, v6
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-CI-NEXT: v_mad_f32 v0, v0, v3, v6 clamp
+; SDAG-CI-NEXT: v_mad_f32 v1, v1, v4, v7 clamp
+; SDAG-CI-NEXT: v_mad_f32 v2, v2, v5, v8 clamp
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_v3f32_clamp_precvt:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_cvt_f16_f32_e32 v8, v8
-; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
-; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
-; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
-; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
-; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v8, v8
-; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
-; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NEXT: v_cvt_f32_f16_e32 v7, v7
-; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; CI-NEXT: v_cvt_f32_f16_e32 v6, v6
-; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: v_mad_f32 v1, v1, v4, v7 clamp
-; CI-NEXT: v_mad_f32 v2, v2, v5, v8 clamp
-; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CI-NEXT: v_mad_f32 v0, v0, v3, v6 clamp
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NEXT: s_setpc_b64 s[30:31]
+; GISEL-GFX900-LABEL: v_mad_mix_v3f32_clamp_precvt:
+; GISEL-GFX900: ; %bb.0:
+; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX900-NEXT: v_mad_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v2, v6
+; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-GFX900-NEXT: v_pack_b32_f16 v0, v2, v0
+; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX906-LABEL: v_mad_mix_v3f32_clamp_precvt:
+; GISEL-GFX906: ; %bb.0:
+; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX906-NEXT: v_fma_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v2, v6
+; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-GFX906-NEXT: v_pack_b32_f16 v0, v2, v0
+; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mix_v3f32_clamp_precvt:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v2
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v4
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GISEL-VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp
+; GISEL-VI-NEXT: v_mad_f32 v6, v6, v7, v8 clamp
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v2, v6
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, 16
+; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GISEL-VI-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_v3f32_clamp_precvt:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v8, v8
+; GISEL-CI-NEXT: v_mad_f32 v0, v0, v3, v6 clamp
+; GISEL-CI-NEXT: v_mad_f32 v1, v1, v4, v7 clamp
+; GISEL-CI-NEXT: v_mad_f32 v2, v2, v5, v8 clamp
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext <3 x half> %src0 to <3 x float>
%src1.ext = fpext <3 x half> %src1 to <3 x float>
%src2.ext = fpext <3 x half> %src2 to <3 x float>
}
define <4 x half> @v_mad_mix_v4f32_clamp_precvt(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 {
-; GFX906-LABEL: v_mad_mix_v4f32_clamp_precvt:
-; GFX906: ; %bb.0:
-; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT: v_fma_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX906-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
-; GFX906-NEXT: v_fma_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX906-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GFX906-NEXT: v_cvt_f16_f32_e32 v2, v6
-; GFX906-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX906-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX906-NEXT: v_pack_b32_f16 v1, v1, v2
-; GFX906-NEXT: v_pack_b32_f16 v0, v0, v3
-; GFX906-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX900-LABEL: v_mad_mix_v4f32_clamp_precvt:
+; SDAG-GFX900: ; %bb.0:
+; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v2, v6
+; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-GFX900-NEXT: v_pack_b32_f16 v0, v0, v3
+; SDAG-GFX900-NEXT: v_pack_b32_f16 v1, v1, v2
+; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_mad_mix_v4f32_clamp_precvt:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mad_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT: v_mad_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT: v_cvt_f16_f32_e32 v2, v6
-; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX900-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX900-NEXT: v_pack_b32_f16 v1, v1, v2
-; GFX900-NEXT: v_pack_b32_f16 v0, v0, v3
-; GFX900-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX906-LABEL: v_mad_mix_v4f32_clamp_precvt:
+; SDAG-GFX906: ; %bb.0:
+; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v2, v6
+; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-GFX906-NEXT: v_pack_b32_f16 v0, v0, v3
+; SDAG-GFX906-NEXT: v_pack_b32_f16 v1, v1, v2
+; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_mad_mix_v4f32_clamp_precvt:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_sdwa v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_cvt_f32_f16_sdwa v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_sdwa v9, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; VI-NEXT: v_cvt_f32_f16_sdwa v10, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_sdwa v11, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v4, v4
-; VI-NEXT: v_cvt_f32_f16_e32 v5, v5
-; VI-NEXT: v_mad_f32 v6, v6, v8, v10 clamp
-; VI-NEXT: v_mad_f32 v7, v7, v9, v11 clamp
-; VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp
-; VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp
-; VI-NEXT: v_cvt_f16_f32_sdwa v2, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT: v_cvt_f16_f32_sdwa v3, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; VI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; VI-NEXT: v_or_b32_e32 v1, v1, v2
-; VI-NEXT: v_or_b32_e32 v0, v0, v3
-; VI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mix_v4f32_clamp_precvt:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v9, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v10, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v11, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; SDAG-VI-NEXT: v_mad_f32 v7, v7, v9, v10 clamp
+; SDAG-VI-NEXT: v_mad_f32 v6, v6, v8, v11 clamp
+; SDAG-VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp
+; SDAG-VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp
+; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v2, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v3, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-VI-NEXT: v_or_b32_e32 v0, v0, v3
+; SDAG-VI-NEXT: v_or_b32_e32 v1, v1, v2
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mix_v4f32_clamp_precvt:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v11, v11
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v7, v7
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v10, v10
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v6
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v9, v9
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v8, v8
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v11, v11
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v7, v7
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v10, v10
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v6, v6
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v9, v9
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v8, v8
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SDAG-CI-NEXT: v_mad_f32 v0, v0, v4, v8 clamp
+; SDAG-CI-NEXT: v_mad_f32 v1, v1, v5, v9 clamp
+; SDAG-CI-NEXT: v_mad_f32 v2, v2, v6, v10 clamp
+; SDAG-CI-NEXT: v_mad_f32 v3, v3, v7, v11 clamp
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX900-LABEL: v_mad_mix_v4f32_clamp_precvt:
+; GISEL-GFX900: ; %bb.0:
+; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX900-NEXT: v_mad_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v3, v6
+; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-GFX900-NEXT: v_pack_b32_f16 v0, v3, v0
+; GISEL-GFX900-NEXT: v_pack_b32_f16 v1, v2, v1
+; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX906-LABEL: v_mad_mix_v4f32_clamp_precvt:
+; GISEL-GFX906: ; %bb.0:
+; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX906-NEXT: v_fma_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v3, v6
+; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-GFX906-NEXT: v_pack_b32_f16 v0, v3, v0
+; GISEL-GFX906-NEXT: v_pack_b32_f16 v1, v2, v1
+; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mix_v4f32_clamp_precvt:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v2
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v9, v3
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v10, v4
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v11, v5
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_mad_f32 v6, v6, v8, v10 clamp
+; GISEL-VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp
+; GISEL-VI-NEXT: v_mad_f32 v2, v7, v9, v11 clamp
+; GISEL-VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v3, v6
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GISEL-VI-NEXT: v_mov_b32_e32 v4, 16
+; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GISEL-VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_v4f32_clamp_precvt:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_cvt_f16_f32_e32 v11, v11
-; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
-; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NEXT: v_cvt_f16_f32_e32 v10, v10
-; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
-; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NEXT: v_cvt_f16_f32_e32 v9, v9
-; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
-; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CI-NEXT: v_cvt_f16_f32_e32 v8, v8
-; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v11, v11
-; CI-NEXT: v_cvt_f32_f16_e32 v7, v7
-; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NEXT: v_cvt_f32_f16_e32 v10, v10
-; CI-NEXT: v_cvt_f32_f16_e32 v6, v6
-; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NEXT: v_cvt_f32_f16_e32 v9, v9
-; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; CI-NEXT: v_cvt_f32_f16_e32 v8, v8
-; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: v_mad_f32 v1, v1, v5, v9 clamp
-; CI-NEXT: v_mad_f32 v2, v2, v6, v10 clamp
-; CI-NEXT: v_mad_f32 v3, v3, v7, v11 clamp
-; CI-NEXT: v_mad_f32 v0, v0, v4, v8 clamp
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NEXT: s_setpc_b64 s[30:31]
+; GISEL-CI-LABEL: v_mad_mix_v4f32_clamp_precvt:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v7
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v8, v8
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v9, v9
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v10, v10
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v11, v11
+; GISEL-CI-NEXT: v_mad_f32 v0, v0, v4, v8 clamp
+; GISEL-CI-NEXT: v_mad_f32 v1, v1, v5, v9 clamp
+; GISEL-CI-NEXT: v_mad_f32 v2, v2, v6, v10 clamp
+; GISEL-CI-NEXT: v_mad_f32 v3, v3, v7, v11 clamp
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext <4 x half> %src0 to <4 x float>
%src1.ext = fpext <4 x half> %src1 to <4 x float>
%src2.ext = fpext <4 x half> %src2 to <4 x float>
attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
attributes #1 = { nounwind readnone speculatable }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CI: {{.*}}
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900 %s
-; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906 %s
-; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=CIVI,VI %s
-; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CIVI,CI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,SDAG-GFX900 %s
+; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,SDAG-GFX906 %s
+; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,SDAG-VI %s
+; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,SDAG-CI %s
+
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,GISEL-GFX900 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,GISEL-GFX906 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,GISEL-VI %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,GISEL-CI %s
define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
; VI-NEXT: v_mac_f32_e32 v0, v3, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mad_f32 v0, v0, v1, v2
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%src2.ext = fpext half %src2 to float
; VI-NEXT: v_mac_f32_e32 v0, v3, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mad_f32 v0, v1, v3, v5
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mad_f32 v0, v1, v3, v5
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v3
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v5
+; GISEL-CI-NEXT: v_mac_f32_e32 v0, v1, v2
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.hi = extractelement <2 x half> %src0, i32 1
%src1.hi = extractelement <2 x half> %src1, i32 1
%src2.hi = extractelement <2 x half> %src2, i32 1
}
define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
-; GFX900-LABEL: v_mad_mix_v2f32:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX906-LABEL: v_mad_mix_v2f32:
-; GFX906: ; %bb.0:
-; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
-; GFX906-NEXT: v_mov_b32_e32 v1, v3
-; GFX906-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-LABEL: v_mad_mix_v2f32:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v4, v0
-; VI-NEXT: v_cvt_f32_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v6, v1
-; VI-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v2
-; VI-NEXT: v_mac_f32_e32 v1, v3, v5
-; VI-NEXT: v_mac_f32_e32 v0, v4, v6
-; VI-NEXT: s_setpc_b64 s[30:31]
-;
-; CI-LABEL: v_mad_mix_v2f32:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
-; CI-NEXT: v_cvt_f16_f32_e32 v6, v3
-; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
-; CI-NEXT: v_cvt_f32_f16_e32 v3, v5
-; CI-NEXT: v_cvt_f32_f16_e32 v5, v6
-; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NEXT: v_cvt_f16_f32_e32 v6, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v4
-; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NEXT: v_cvt_f32_f16_e32 v4, v6
-; CI-NEXT: v_mac_f32_e32 v3, v1, v5
-; CI-NEXT: v_mov_b32_e32 v1, v3
-; CI-NEXT: v_mac_f32_e32 v0, v4, v2
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX900-LABEL: v_mad_mix_v2f32:
+; SDAG-GFX900: ; %bb.0:
+; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v3
+; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX906-LABEL: v_mad_mix_v2f32:
+; SDAG-GFX906: ; %bb.0:
+; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v3
+; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-VI-LABEL: v_mad_mix_v2f32:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v6, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
+; SDAG-VI-NEXT: v_mac_f32_e32 v1, v3, v5
+; SDAG-VI-NEXT: v_mac_f32_e32 v0, v4, v6
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mix_v2f32:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v3
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v5
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v6
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v4
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v6
+; SDAG-CI-NEXT: v_mac_f32_e32 v3, v1, v5
+; SDAG-CI-NEXT: v_mov_b32_e32 v1, v3
+; SDAG-CI-NEXT: v_mac_f32_e32 v0, v4, v2
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX900-LABEL: v_mad_mix_v2f32:
+; GISEL-GFX900: ; %bb.0:
+; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v3
+; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX906-LABEL: v_mad_mix_v2f32:
+; GISEL-GFX906: ; %bb.0:
+; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v3
+; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mix_v2f32:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v5
+; GISEL-VI-NEXT: v_mac_f32_e32 v1, v4, v6
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_v2f32:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v4
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v5
+; GISEL-CI-NEXT: v_mac_f32_e32 v0, v6, v2
+; GISEL-CI-NEXT: v_mac_f32_e32 v1, v7, v3
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext <2 x half> %src0 to <2 x float>
%src1.ext = fpext <2 x half> %src1 to <2 x float>
%src2.ext = fpext <2 x half> %src2 to <2 x float>
; VI-NEXT: v_mov_b32_e32 v1, v2
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_v2f32_shuffle:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NEXT: v_cvt_f16_f32_e32 v4, v5
-; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NEXT: v_cvt_f16_f32_e32 v5, v1
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v4
-; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NEXT: v_cvt_f32_f16_e32 v4, v5
-; CI-NEXT: v_cvt_f32_f16_e32 v5, v0
-; CI-NEXT: v_mad_f32 v0, v4, v2, v1
-; CI-NEXT: v_mac_f32_e32 v1, v5, v3
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mix_v2f32_shuffle:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v5
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v1
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v4
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v5
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v0
+; SDAG-CI-NEXT: v_mad_f32 v0, v4, v2, v1
+; SDAG-CI-NEXT: v_mac_f32_e32 v1, v5, v3
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_v2f32_shuffle:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GISEL-CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GISEL-CI-NEXT: v_or_b32_e32 v0, v1, v0
+; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v5
+; GISEL-CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
+; GISEL-CI-NEXT: v_or_b32_e32 v1, v1, v4
+; GISEL-CI-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GISEL-CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v3
+; GISEL-CI-NEXT: v_mad_f32 v0, v4, v0, v1
+; GISEL-CI-NEXT: v_mac_f32_e32 v1, v5, v2
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.shuf = shufflevector <2 x half> %src0, <2 x half> undef, <2 x i32> <i32 1, i32 0>
%src1.shuf = shufflevector <2 x half> %src1, <2 x half> undef, <2 x i32> <i32 0, i32 1>
%src2.shuf = shufflevector <2 x half> %src2, <2 x half> undef, <2 x i32> <i32 1, i32 1>
; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT: v_mad_f32 v0, -v0, v1, v2
-; VI-NEXT: s_setpc_b64 s[30:31]
-;
-; CI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mad_f32 v0, -v0, v1, v2
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mad_f32 v0, -v0, v1, v2
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e64 v3, -v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v3, -v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%src2.ext = fpext half %src2 to float
; VI-NEXT: v_mad_f32 v0, |v0|, v1, v2
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mad_f32 v0, |v0|, v1, v2
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mad_f32 v0, |v0|, v1, v2
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_mad_f32 v0, |v0|, v1, v2
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%src2.ext = fpext half %src2 to float
; VI-NEXT: v_mad_f32 v0, -|v0|, v1, v2
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mad_f32 v0, -|v0|, v1, v2
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mad_f32 v0, -|v0|, v1, v2
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_mad_f32 v0, -|v0|, v1, v2
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%src2.ext = fpext half %src2 to float
; VI-NEXT: v_mad_f32 v0, v0, v1, v2
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mad_f32 v0, v0, v1, v2
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
; VI-NEXT: v_mad_f32 v0, v0, v1, -v2
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mad_f32 v0, v0, v1, -v2
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, -v2
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, -v2
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%src2.neg = fneg float %src2
; VI-NEXT: v_mad_f32 v0, v0, v1, |v2|
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mad_f32 v0, v0, v1, |v2|
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, |v2|
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, |v2|
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%src2.abs = call float @llvm.fabs.f32(float %src2)
; VI-NEXT: v_mad_f32 v0, v0, v1, -|v2|
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mad_f32 v0, v0, v1, -|v2|
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, -|v2|
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, -|v2|
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%src2.abs = call float @llvm.fabs.f32(float %src2)
; inline immediate.
define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 {
-; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: s_mov_b32 s4, 1.0
-; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
-; GFX906: ; %bb.0:
-; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT: s_mov_b32 s4, 1.0
-; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
-; GFX906-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
+; SDAG-GFX900: ; %bb.0:
+; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX900-NEXT: s_mov_b32 s4, 1.0
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
+; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
+; SDAG-GFX906: ; %bb.0:
+; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX906-NEXT: s_mov_b32 s4, 1.0
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
+; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
; VI: ; %bb.0:
; VI-NEXT: v_mad_f32 v0, v0, v1, 1.0
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mad_f32 v0, v0, v1, 1.0
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, 1.0
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
+; GISEL-GFX900: ; %bb.0:
+; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 1.0
+; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
+; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
+; GISEL-GFX906: ; %bb.0:
+; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 1.0
+; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
+; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, 1.0
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 1.0)
}
define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0 {
-; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: s_mov_b32 s4, 0.15915494
-; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
-; GFX906: ; %bb.0:
-; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT: s_mov_b32 s4, 0.15915494
-; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
-; GFX906-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
+; SDAG-GFX900: ; %bb.0:
+; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX900-NEXT: s_mov_b32 s4, 0.15915494
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
+; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
+; SDAG-GFX906: ; %bb.0:
+; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX906-NEXT: s_mov_b32 s4, 0.15915494
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
+; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
; VI: ; %bb.0:
; VI-NEXT: v_mad_f32 v0, v0, v1, 0.15915494
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_madak_f32 v0, v0, v1, 0x3e22f983
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_madak_f32 v0, v0, v1, 0x3e22f983
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
+; GISEL-GFX900: ; %bb.0:
+; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 0.15915494
+; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
+; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
+; GISEL-GFX906: ; %bb.0:
+; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 0.15915494
+; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
+; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x3e22f983
+; GISEL-CI-NEXT: v_mac_f32_e32 v0, v2, v1
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 0x3FC45F3060000000)
; f32 1/2pi = 0x3e22f983
define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1) #0 {
-; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: s_mov_b32 s4, 0x3e230000
-; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
-; GFX906: ; %bb.0:
-; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT: s_mov_b32 s4, 0x3e230000
-; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
-; GFX906-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000
-; VI-NEXT: s_setpc_b64 s[30:31]
-;
-; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
+; SDAG-GFX900: ; %bb.0:
+; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x3e230000
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
+; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
+; SDAG-GFX906: ; %bb.0:
+; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x3e230000
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
+; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
+; GISEL-GFX900: ; %bb.0:
+; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 0x3e230000
+; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
+; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
+; GISEL-GFX906: ; %bb.0:
+; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 0x3e230000
+; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
+; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x3e230000
+; GISEL-VI-NEXT: v_mac_f32_e32 v0, v2, v1
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x3e230000
+; GISEL-CI-NEXT: v_mac_f32_e32 v0, v2, v1
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%src2 = fpext half 0xH3118 to float
define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 {
-; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: s_mov_b32 s4, 0x367c0000
-; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
-; GFX906: ; %bb.0:
-; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT: s_mov_b32 s4, 0x367c0000
-; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
-; GFX906-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000
-; VI-NEXT: s_setpc_b64 s[30:31]
-;
-; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
+; SDAG-GFX900: ; %bb.0:
+; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x367c0000
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
+; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
+; SDAG-GFX906: ; %bb.0:
+; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x367c0000
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
+; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
+; GISEL-GFX900: ; %bb.0:
+; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 0x367c0000
+; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
+; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
+; GISEL-GFX906: ; %bb.0:
+; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 0x367c0000
+; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
+; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x367c0000
+; GISEL-VI-NEXT: v_mac_f32_e32 v0, v2, v1
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x367c0000
+; GISEL-CI-NEXT: v_mac_f32_e32 v0, v2, v1
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%src2 = fpext half 0xH003F to float
}
define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1) #0 {
-; GFX900-LABEL: v_mad_mix_v2f32_f32imm1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: s_mov_b32 s4, 1.0
-; GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
-; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX906-LABEL: v_mad_mix_v2f32_f32imm1:
-; GFX906: ; %bb.0:
-; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT: s_mov_b32 s4, 1.0
-; GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
-; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
-; GFX906-NEXT: v_mov_b32_e32 v1, v2
-; GFX906-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-LABEL: v_mad_mix_v2f32_f32imm1:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_e32 v3, v1
-; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_mad_f32 v0, v0, v3, 1.0
-; VI-NEXT: v_mad_f32 v1, v2, v1, 1.0
-; VI-NEXT: s_setpc_b64 s[30:31]
-;
-; CI-LABEL: v_mad_mix_v2f32_f32imm1:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; CI-NEXT: v_mad_f32 v0, v0, v2, 1.0
-; CI-NEXT: v_mad_f32 v1, v1, v3, 1.0
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX900-LABEL: v_mad_mix_v2f32_f32imm1:
+; SDAG-GFX900: ; %bb.0:
+; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX900-NEXT: s_mov_b32 s4, 1.0
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
+; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2
+; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX906-LABEL: v_mad_mix_v2f32_f32imm1:
+; SDAG-GFX906: ; %bb.0:
+; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX906-NEXT: s_mov_b32 s4, 1.0
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
+; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
+; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imm1:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_mad_f32 v0, v0, v3, 1.0
+; SDAG-VI-NEXT: v_mad_f32 v1, v2, v1, 1.0
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mix_v2f32_f32imm1:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-CI-NEXT: v_mad_f32 v0, v0, v2, 1.0
+; SDAG-CI-NEXT: v_mad_f32 v1, v1, v3, 1.0
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX900-LABEL: v_mad_mix_v2f32_f32imm1:
+; GISEL-GFX900: ; %bb.0:
+; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX900-NEXT: s_mov_b32 s4, 1.0
+; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0]
+; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX906-LABEL: v_mad_mix_v2f32_f32imm1:
+; GISEL-GFX906: ; %bb.0:
+; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX906-NEXT: s_mov_b32 s4, 1.0
+; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0]
+; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2
+; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imm1:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_mad_f32 v0, v2, v0, 1.0
+; GISEL-VI-NEXT: v_mad_f32 v1, v3, v1, 1.0
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_v2f32_f32imm1:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GISEL-CI-NEXT: v_mad_f32 v0, v0, v2, 1.0
+; GISEL-CI-NEXT: v_mad_f32 v1, v1, v3, 1.0
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext <2 x half> %src0 to <2 x float>
%src1.ext = fpext <2 x half> %src1 to <2 x float>
%result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 1.0, float 1.0>)
}
define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 {
-; GFX900-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: s_mov_b32 s4, 0x3e230000
-; GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
-; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX906-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
-; GFX906: ; %bb.0:
-; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT: s_mov_b32 s4, 0x3e230000
-; GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
-; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
-; GFX906-NEXT: v_mov_b32_e32 v1, v2
-; GFX906-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_e32 v3, v1
-; VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_mov_b32_e32 v1, 0x3e230000
-; VI-NEXT: v_madak_f32 v0, v0, v3, 0x3e230000
-; VI-NEXT: v_mac_f32_e32 v1, v2, v4
-; VI-NEXT: s_setpc_b64 s[30:31]
-;
-; CI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v4, v1
-; CI-NEXT: v_mov_b32_e32 v1, 0x3e230000
-; CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e230000
-; CI-NEXT: v_mac_f32_e32 v1, v4, v3
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX900-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
+; SDAG-GFX900: ; %bb.0:
+; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x3e230000
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
+; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2
+; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX906-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
+; SDAG-GFX906: ; %bb.0:
+; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x3e230000
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
+; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
+; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, 0x3e230000
+; SDAG-VI-NEXT: v_madak_f32 v0, v0, v3, 0x3e230000
+; SDAG-VI-NEXT: v_mac_f32_e32 v1, v2, v4
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v1
+; SDAG-CI-NEXT: v_mov_b32_e32 v1, 0x3e230000
+; SDAG-CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e230000
+; SDAG-CI-NEXT: v_mac_f32_e32 v1, v4, v3
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX900-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
+; GISEL-GFX900: ; %bb.0:
+; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX900-NEXT: s_mov_b32 s4, 0x3e230000
+; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0]
+; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX906-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
+; GISEL-GFX906: ; %bb.0:
+; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX906-NEXT: s_mov_b32 s4, 0x3e230000
+; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0]
+; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2
+; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: s_mov_b32 s4, 0x3e230000
+; GISEL-VI-NEXT: v_mad_f32 v0, v2, v0, s4
+; GISEL-VI-NEXT: v_mad_f32 v1, v3, v1, s4
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GISEL-CI-NEXT: s_mov_b32 s4, 0x3e230000
+; GISEL-CI-NEXT: v_mad_f32 v0, v0, v2, s4
+; GISEL-CI-NEXT: v_mad_f32 v1, v1, v3, s4
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext <2 x half> %src0 to <2 x float>
%src1.ext = fpext <2 x half> %src1 to <2 x float>
%src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float>
}
define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 {
-; GFX900-LABEL: v_mad_mix_v2f32_f32imminv2pi:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: s_mov_b32 s4, 0.15915494
-; GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
-; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX906-LABEL: v_mad_mix_v2f32_f32imminv2pi:
-; GFX906: ; %bb.0:
-; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT: s_mov_b32 s4, 0.15915494
-; GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
-; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
-; GFX906-NEXT: v_mov_b32_e32 v1, v2
-; GFX906-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_e32 v3, v1
-; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_mad_f32 v0, v0, v3, 0.15915494
-; VI-NEXT: v_mad_f32 v1, v2, v1, 0.15915494
-; VI-NEXT: s_setpc_b64 s[30:31]
-;
-; CI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: v_cvt_f32_f16_e32 v4, v1
-; CI-NEXT: v_mov_b32_e32 v1, 0x3e22f983
-; CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e22f983
-; CI-NEXT: v_mac_f32_e32 v1, v4, v3
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX900-LABEL: v_mad_mix_v2f32_f32imminv2pi:
+; SDAG-GFX900: ; %bb.0:
+; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX900-NEXT: s_mov_b32 s4, 0.15915494
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
+; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2
+; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX906-LABEL: v_mad_mix_v2f32_f32imminv2pi:
+; SDAG-GFX906: ; %bb.0:
+; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX906-NEXT: s_mov_b32 s4, 0.15915494
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
+; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
+; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_mad_f32 v0, v0, v3, 0.15915494
+; SDAG-VI-NEXT: v_mad_f32 v1, v2, v1, 0.15915494
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v1
+; SDAG-CI-NEXT: v_mov_b32_e32 v1, 0x3e22f983
+; SDAG-CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e22f983
+; SDAG-CI-NEXT: v_mac_f32_e32 v1, v4, v3
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX900-LABEL: v_mad_mix_v2f32_f32imminv2pi:
+; GISEL-GFX900: ; %bb.0:
+; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX900-NEXT: s_mov_b32 s4, 0.15915494
+; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0]
+; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX906-LABEL: v_mad_mix_v2f32_f32imminv2pi:
+; GISEL-GFX906: ; %bb.0:
+; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX906-NEXT: s_mov_b32 s4, 0.15915494
+; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0]
+; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2
+; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_mad_f32 v0, v2, v0, 0.15915494
+; GISEL-VI-NEXT: v_mad_f32 v1, v3, v1, 0.15915494
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GISEL-CI-NEXT: s_mov_b32 s4, 0x3e22f983
+; GISEL-CI-NEXT: v_mad_f32 v0, v0, v2, s4
+; GISEL-CI-NEXT: v_mad_f32 v1, v1, v3, s4
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext <2 x half> %src0 to <2 x float>
%src1.ext = fpext <2 x half> %src1 to <2 x float>
%src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float>
; VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mad_f32 v0, v1, v3, v5 clamp
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mad_f32 v0, v1, v3, v5 clamp
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v3
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v5
+; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.hi = extractelement <2 x half> %src0, i32 1
%src1.hi = extractelement <2 x half> %src1, i32 1
%src2.hi = extractelement <2 x half> %src2, i32 1
; GFX906-NEXT: v_fma_f32 v0, v0, v1, v2
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
-; CIVI-LABEL: no_mix_simple:
-; CIVI: ; %bb.0:
-; CIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CIVI-NEXT: v_mad_f32 v0, v0, v1, v2
-; CIVI-NEXT: s_setpc_b64 s[30:31]
+; VI-LABEL: no_mix_simple:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_mad_f32 v0, v0, v1, v2
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; CI-LABEL: no_mix_simple:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_mad_f32 v0, v0, v1, v2
+; CI-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2)
ret float %result
}
; GFX906-NEXT: v_fma_f32 v0, |v0|, v1, v2
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
-; CIVI-LABEL: no_mix_simple_fabs:
-; CIVI: ; %bb.0:
-; CIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CIVI-NEXT: v_mad_f32 v0, |v0|, v1, v2
-; CIVI-NEXT: s_setpc_b64 s[30:31]
+; VI-LABEL: no_mix_simple_fabs:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_mad_f32 v0, |v0|, v1, v2
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; CI-LABEL: no_mix_simple_fabs:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_mad_f32 v0, |v0|, v1, v2
+; CI-NEXT: s_setpc_b64 s[30:31]
%src0.fabs = call float @llvm.fabs.f32(float %src0)
%result = call float @llvm.fmuladd.f32(float %src0.fabs, float %src1, float %src2)
ret float %result
; VI-NEXT: v_add_f32_e32 v0, v0, v2
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_fma_f32 v0, v0, v1, v2
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_fma_f32 v0, v0, v1, v2
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_fma_f32 v0, v0, v1, v2
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%src2.ext = fpext half %src2 to float
; VI-NEXT: v_add_f32_e32 v0, v0, v2
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_fma_f32 v0, v0, v1, v2
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_fma_f32 v0, v0, v1, v2
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_fma_f32 v0, v0, v1, v2
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
; VI-NEXT: v_add_f32_e32 v0, v0, v2
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mul_f32_e32 v0, v0, v1
-; CI-NEXT: v_add_f32_e32 v0, v0, v2
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mul_f32_e32 v0, v0, v1
+; SDAG-CI-NEXT: v_add_f32_e32 v0, v0, v2
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_mul_f32_e32 v0, v0, v1
+; GISEL-CI-NEXT: v_add_f32_e32 v0, v0, v2
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%src2.ext = fpext half %src2 to float
; VI-NEXT: v_add_f32_e32 v0, v0, v2
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mul_f32_e32 v0, v0, v1
-; CI-NEXT: v_add_f32_e32 v0, v0, v2
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mul_f32_e32 v0, v0, v1
+; SDAG-CI-NEXT: v_add_f32_e32 v0, v0, v2
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_mul_f32_e32 v0, v0, v1
+; GISEL-CI-NEXT: v_add_f32_e32 v0, v0, v2
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%mul = fmul float %src0.ext, %src1.ext
; VI-NEXT: v_mac_f32_e32 v0, v3, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mad_f32 v0, v0, v1, v2
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%src2.ext = fpext half %src2 to float
; VI-NEXT: v_mad_f32 v0, v0, v1, v2
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_mad_f32 v0, v0, v1, v2
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float
%mul = fmul contract float %src0.ext, %src1.ext
; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT: v_mad_f32 v0, -v0, v1, v2
-; VI-NEXT: s_setpc_b64 s[30:31]
-;
-; CI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT: v_mad_f32 v0, -v0, v1, v2
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-CI-NEXT: v_mad_f32 v0, -v0, v1, v2
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_cvt_f32_f16_e64 v3, -v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v3, -v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
%src0 = extractelement <2 x half> %src0.arg.bc, i32 0
%src0.neg = fneg half %src0
; VI-NEXT: v_mad_f32 v0, |v0|, v1, v2
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; CI-NEXT: v_cvt_f32_f16_e64 v0, |v0|
-; CI-NEXT: v_mad_f32 v0, v0, v1, v2
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, |v0|
+; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v0, -v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GISEL-CI-NEXT: v_mad_f32 v0, |v0|, v1, v2
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
%src0 = extractelement <2 x half> %src0.arg.bc, i32 1
%src0.neg = fneg half %src0
; VI-NEXT: v_mac_f32_e32 v0, v3, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; CI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; CI-NEXT: v_cvt_f32_f16_e64 v0, |v0|
-; CI-NEXT: v_mad_f32 v0, v0, v1, v2
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-CI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, |v0|
+; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v3, |v0|
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
%src0 = extractelement <2 x half> %src0.arg.bc, i32 1
%src0.abs = call half @llvm.fabs.f16(half %src0)
; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT: v_mad_f32 v0, -v0, v1, v2
-; VI-NEXT: s_setpc_b64 s[30:31]
-;
-; CI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; CI-NEXT: v_cvt_f32_f16_e64 v0, -v0
-; CI-NEXT: v_mad_f32 v0, v0, v1, v2
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, -v0
+; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
+; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
%fneg = fneg <2 x half> %src0.arg.bc
%src0 = extractelement <2 x half> %fneg, i32 1
; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v2
-; VI-NEXT: v_mac_f32_e32 v0, v3, v1
-; VI-NEXT: s_setpc_b64 s[30:31]
-;
-; CI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; CI-NEXT: v_cvt_f32_f16_e64 v0, |v0|
-; CI-NEXT: v_mad_f32 v0, v0, v1, v2
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
+; SDAG-VI-NEXT: v_mac_f32_e32 v0, v3, v1
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, |v0|
+; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
+; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc)
%src0 = extractelement <2 x half> %fabs, i32 1
; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT: v_mad_f32 v0, -v0, v1, v2
-; VI-NEXT: s_setpc_b64 s[30:31]
-;
-; CI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
-; CI: ; %bb.0:
-; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; CI-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
-; CI-NEXT: v_mad_f32 v0, v0, v1, v2
-; CI-NEXT: s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-CI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
+; SDAG-CI: ; %bb.0:
+; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
+; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
+; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT: v_or_b32_e32 v0, 0x80008000, v0
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-CI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
+; GISEL-CI: ; %bb.0:
+; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-CI-NEXT: v_or_b32_e32 v0, 0x80008000, v0
+; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc)
%fneg.fabs = fneg <2 x half> %fabs