From: Petar Avramovic Date: Mon, 18 Jul 2022 12:48:14 +0000 (+0200) Subject: [AMDGPU][MC][GFX11] AsmParser for op_sel for VOP3 dpp opcodes X-Git-Tag: upstream/15.0.7~1335 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c287bc4841074b834137c1098ff3bd11fb93dd51;p=platform%2Fupstream%2Fllvm.git [AMDGPU][MC][GFX11] AsmParser for op_sel for VOP3 dpp opcodes Parse op_sel for *_e64_dpp VOP3 opcodes. Depends on D129637 and setting of VOP3_OPSEL in dpp pseudos. Differential Revision: https://reviews.llvm.org/D129767 --- diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 201a7ff..8b2e51f 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1739,6 +1739,8 @@ public: void cvtVOP3(MCInst &Inst, const OperandVector &Operands); void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); void cvtVOPD(MCInst &Inst, const OperandVector &Operands); + void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, + OptionalImmIndexMap &OptionalIdx); void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, OptionalImmIndexMap &OptionalIdx); @@ -8024,10 +8026,13 @@ OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) return MatchOperand_NoMatch; } -void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { - cvtVOP3P(Inst, Operands); - +// Determines which bit DST_OP_SEL occupies in the op_sel operand according to +// the number of src operands present, then copies that bit into src0_modifiers. +void cvtVOP3DstOpSelOnly(MCInst &Inst) { int Opc = Inst.getOpcode(); + int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); + if (OpSelIdx == -1) + return; int SrcNum; const int Ops[] = { AMDGPU::OpName::src0, @@ -8038,7 +8043,6 @@ void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) ++SrcNum); assert(SrcNum > 0); - int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); if ((OpSel & (1 << SrcNum)) != 0) { @@ -8048,6 +8052,18 @@ void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) } } +void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, + const OperandVector &Operands) { + cvtVOP3P(Inst, Operands); + cvtVOP3DstOpSelOnly(Inst); +} + +void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, + OptionalImmIndexMap &OptionalIdx) { + cvtVOP3P(Inst, Operands, OptionalIdx); + cvtVOP3DstOpSelOnly(Inst); +} + static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { // 1. This operand is input modifiers return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS @@ -8802,6 +8818,8 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bo } if (Desc.TSFlags & SIInstrFlags::VOP3P) cvtVOP3P(Inst, Operands, OptionalIdx); + else if (Desc.TSFlags & SIInstrFlags::VOP3) + cvtVOP3OpSel(Inst, Operands, OptionalIdx); else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); } diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s index 0d54e42..08f7270 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s @@ -18500,3 +18500,276 @@ v_xor_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_xor_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x1d,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x08,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x90,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x08,0x59,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x90,0x59,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_max3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x7c,0x4c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_max3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x0b,0x4c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_max3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x15,0x4c,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_max3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x26,0x4c,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc7,0x4c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_med3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x7c,0x4f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_med3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x0b,0x4f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_med3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x15,0x4f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_med3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x26,0x4f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc7,0x4f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_min3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x7c,0x49,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_min3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x0b,0x49,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_min3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x15,0x49,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_min3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x26,0x49,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc7,0x49,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s index 51fd58c1..f376a13 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s @@ -7037,3 +7037,276 @@ v_xor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_xor_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x1d,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x0b,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x15,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x26,0x48,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x78,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x20,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x5a,0xd6,0xe9,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x90,0x5a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x78,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x20,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x59,0xd6,0xe9,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x90,0x59,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x7c,0x4c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x0b,0x4c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x15,0x4c,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x26,0x4c,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc7,0x4c,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_med3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x7c,0x4f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x0b,0x4f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x15,0x4f,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x26,0x4f,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc7,0x4f,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x78,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x20,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x78,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x20,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x7c,0x49,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x0b,0x49,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x15,0x49,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x26,0x49,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc7,0x49,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]