From: Joe Nash Date: Wed, 13 Jul 2022 18:14:48 +0000 (-0400) Subject: [AMDGPU][GFX11] Use VGPR_32_Lo128 for VOP1,2,C X-Git-Tag: upstream/17.0.6~33029 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b982ba2a6e0f11340b4e75d1d4eba9ff62a81df7;p=platform%2Fupstream%2Fllvm.git [AMDGPU][GFX11] Use VGPR_32_Lo128 for VOP1,2,C Due to the encoding changes in GFX11, we had a hack in place that disables the use of VGPRs above 128. This patch removes the need for that hack. We introduce a new register class VGPR_32_Lo128 which is used for 16-bit operands of VOP1, VOP2, and VOPC instructions. This register class only has the low 128 VGPRs, but is otherwise identical to VGPR_32. Therefore, 16-bit VOP1, VOP2, and VOPC instructions are correctly limited to use the first 128 VGPRs, while the other instructions can freely use all 256. We introduce new pseduo-instructions used on GFX11 which have the suffix t16 (True 16) to use the VGPR_32_Lo128 register class. Reviewed By: foad, rampitec, #amdgpu Differential Revision: https://reviews.llvm.org/D133723 --- diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 0256e53..dd96eb5 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -343,6 +343,7 @@ public: } bool isVRegWithInputMods() const; + bool isT16VRegWithInputMods() const; bool isSDWAOperand(MVT type) const; bool isSDWAFP16Operand() const; @@ -522,6 +523,10 @@ public: return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); } + bool isVCSrcTB16_Lo128() const { + return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16); + } + bool isVCSrcB16() const { return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); } @@ -538,6 +543,10 @@ public: return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); } + bool isVCSrcTF16_Lo128() const { + return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16); + } + bool isVCSrcF16() const { return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); } @@ -554,6 +563,10 @@ public: return isVCSrcF64() || isLiteralImm(MVT::i64); } + bool isVSrcTB16_Lo128() const { + return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16); + } + bool isVSrcB16() const { return isVCSrcB16() || isLiteralImm(MVT::i16); } @@ -586,6 +599,10 @@ public: return isVCSrcF64() || isLiteralImm(MVT::f64); } + bool isVSrcTF16_Lo128() const { + return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16); + } + bool isVSrcF16() const { return isVCSrcF16() || isLiteralImm(MVT::f16); } @@ -2049,6 +2066,10 @@ bool AMDGPUOperand::isVRegWithInputMods() const { AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); } +bool AMDGPUOperand::isT16VRegWithInputMods() const { + return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID); +} + bool AMDGPUOperand::isSDWAOperand(MVT type) const { if (AsmParser->isVI()) return isVReg32(); @@ -8329,19 +8350,16 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, // we don't allow modifiers for this operand in assembler so src2_modifiers // should be 0. if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || - Opc == AMDGPU::V_MAC_F32_e64_gfx10 || - Opc == AMDGPU::V_MAC_F32_e64_vi || + Opc == AMDGPU::V_MAC_F32_e64_gfx10 || Opc == AMDGPU::V_MAC_F32_e64_vi || Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || - Opc == AMDGPU::V_MAC_F16_e64_vi || - Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || + Opc == AMDGPU::V_MAC_F16_e64_vi || Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || - Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || - Opc == AMDGPU::V_FMAC_F32_e64_vi || + Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || Opc == AMDGPU::V_FMAC_F32_e64_vi || Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || - Opc == AMDGPU::V_FMAC_F16_e64_gfx11) { + Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11) { auto it = Inst.begin(); std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 97a1621..9dec1fe 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -119,6 +119,7 @@ static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass) DECODE_OPERAND_REG(VGPR_32) +DECODE_OPERAND_REG(VGPR_32_Lo128) DECODE_OPERAND_REG(VRegOrLds_32) DECODE_OPERAND_REG(VS_32) DECODE_OPERAND_REG(VS_64) @@ -604,7 +605,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, MI.getOpcode() == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || MI.getOpcode() == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx10 || - MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx11)) { + MI.getOpcode() == AMDGPU::V_FMAC_F16_t16_e64_gfx11)) { // Insert dummy unused src2_modifiers. insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::src2_modifiers); @@ -1139,6 +1140,10 @@ MCOperand AMDGPUDisassembler::decodeOperand_VSrcV232(unsigned Val) const { return decodeSrcOp(OPWV232, Val); } +MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32_Lo128(unsigned Val) const { + return createRegOperand(AMDGPU::VGPR_32_Lo128RegClassID, Val); +} + MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const { // Some instructions have operand restrictions beyond what the encoding // allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index d17e2d8..e987778d 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -167,6 +167,7 @@ public: DecodeStatus convertVOPCDPPInst(MCInst &MI) const; MCOperand decodeOperand_VGPR_32(unsigned Val) const; + MCOperand decodeOperand_VGPR_32_Lo128(unsigned Val) const; MCOperand decodeOperand_VRegOrLds_32(unsigned Val) const; MCOperand decodeOperand_VS_32(unsigned Val) const; diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp index 58add8c..95557cd 100644 --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -123,6 +123,10 @@ bool GCNDPPCombine::isShrinkable(MachineInstr &MI) const { LLVM_DEBUG(dbgs() << " Inst hasn't e32 equivalent\n"); return false; } + // Do not shrink True16 instructions pre-RA to avoid the restriction in + // register allocation from only being able to use 128 VGPRs + if (AMDGPU::isTrue16Inst(Op)) + return false; if (const auto *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) { // Give up if there are any uses of the sdst in carry-out or VOPC. // The shrunken form of the instruction would write it to vcc instead of to @@ -601,6 +605,8 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { LLVM_DEBUG(dbgs() << " try: " << OrigMI); auto OrigOp = OrigMI.getOpcode(); + assert((TII->get(OrigOp).Size != 4 || !AMDGPU::isTrue16Inst(OrigOp)) && + "There should not be e32 True16 instructions pre-RA"); if (OrigOp == AMDGPU::REG_SEQUENCE) { Register FwdReg = OrigMI.getOperand(0).getReg(); unsigned FwdSubReg = 0; diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 21e7554..4897f48 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -156,6 +156,8 @@ static unsigned macToMad(unsigned Opc) { return AMDGPU::V_FMA_F32_e64; case AMDGPU::V_FMAC_F16_e64: return AMDGPU::V_FMA_F16_gfx9_e64; + case AMDGPU::V_FMAC_F16_t16_e64: + return AMDGPU::V_FMA_F16_gfx9_e64; case AMDGPU::V_FMAC_LEGACY_F32_e64: return AMDGPU::V_FMA_LEGACY_F32_e64; case AMDGPU::V_FMAC_F64_e64: @@ -1297,6 +1299,7 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const { switch (Op) { case AMDGPU::V_MAX_F32_e64: case AMDGPU::V_MAX_F16_e64: + case AMDGPU::V_MAX_F16_t16_e64: case AMDGPU::V_MAX_F64_e64: case AMDGPU::V_PK_MAX_F16: { if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm()) @@ -1391,7 +1394,8 @@ static int getOModValue(unsigned Opc, int64_t Val) { return SIOutMods::NONE; } } - case AMDGPU::V_MUL_F16_e64: { + case AMDGPU::V_MUL_F16_e64: + case AMDGPU::V_MUL_F16_t16_e64: { switch (static_cast(Val)) { case 0x3800: // 0.5 return SIOutMods::DIV2; @@ -1417,10 +1421,12 @@ SIFoldOperands::isOMod(const MachineInstr &MI) const { switch (Op) { case AMDGPU::V_MUL_F64_e64: case AMDGPU::V_MUL_F32_e64: + case AMDGPU::V_MUL_F16_t16_e64: case AMDGPU::V_MUL_F16_e64: { // If output denormals are enabled, omod is ignored. if ((Op == AMDGPU::V_MUL_F32_e64 && MFI->getMode().FP32OutputDenormals) || - ((Op == AMDGPU::V_MUL_F64_e64 || Op == AMDGPU::V_MUL_F16_e64) && + ((Op == AMDGPU::V_MUL_F64_e64 || Op == AMDGPU::V_MUL_F16_e64 || + Op == AMDGPU::V_MUL_F16_t16_e64) && MFI->getMode().FP64FP16OutputDenormals)) return std::make_pair(nullptr, SIOutMods::NONE); @@ -1449,10 +1455,12 @@ SIFoldOperands::isOMod(const MachineInstr &MI) const { } case AMDGPU::V_ADD_F64_e64: case AMDGPU::V_ADD_F32_e64: - case AMDGPU::V_ADD_F16_e64: { + case AMDGPU::V_ADD_F16_e64: + case AMDGPU::V_ADD_F16_t16_e64: { // If output denormals are enabled, omod is ignored. if ((Op == AMDGPU::V_ADD_F32_e64 && MFI->getMode().FP32OutputDenormals) || - ((Op == AMDGPU::V_ADD_F64_e64 || Op == AMDGPU::V_ADD_F16_e64) && + ((Op == AMDGPU::V_ADD_F64_e64 || Op == AMDGPU::V_ADD_F16_e64 || + Op == AMDGPU::V_ADD_F16_t16_e64) && MFI->getMode().FP64FP16OutputDenormals)) return std::make_pair(nullptr, SIOutMods::NONE); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 6032ae8..3cc81f1 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2974,7 +2974,8 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, if (Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 || Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 || - Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64) { + Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 || + Opc == AMDGPU::V_FMAC_F16_t16_e64) { // Don't fold if we are using source or output modifiers. The new VOP2 // instructions don't have them. if (hasAnyModifiersSet(UseMI)) @@ -2991,8 +2992,10 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, bool IsF32 = Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64; - bool IsFMA = Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 || - Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64; + bool IsFMA = + Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 || + Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 || + Opc == AMDGPU::V_FMAC_F16_t16_e64; MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1); MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2); @@ -3006,8 +3009,10 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, return false; unsigned NewOpc = - IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32 : AMDGPU::V_FMAMK_F16) - : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16); + IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32 + : ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_t16 + : AMDGPU::V_FMAMK_F16) + : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16); if (pseudoToMCOpcode(NewOpc) == -1) return false; @@ -3024,9 +3029,8 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Src0->setSubReg(Src1SubReg); Src0->setIsKill(Src1->isKill()); - if (Opc == AMDGPU::V_MAC_F32_e64 || - Opc == AMDGPU::V_MAC_F16_e64 || - Opc == AMDGPU::V_FMAC_F32_e64 || + if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 || + Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 || Opc == AMDGPU::V_FMAC_F16_e64) UseMI.untieRegOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); @@ -3085,8 +3089,10 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, } unsigned NewOpc = - IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32 : AMDGPU::V_FMAAK_F16) - : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16); + IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32 + : ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_t16 + : AMDGPU::V_FMAAK_F16) + : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16); if (pseudoToMCOpcode(NewOpc) == -1) return false; @@ -3095,9 +3101,8 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, // FIXME: This would be a lot easier if we could return a new instruction // instead of having to modify in place. - if (Opc == AMDGPU::V_MAC_F32_e64 || - Opc == AMDGPU::V_MAC_F16_e64 || - Opc == AMDGPU::V_FMAC_F32_e64 || + if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 || + Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 || Opc == AMDGPU::V_FMAC_F16_e64) UseMI.untieRegOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); @@ -3286,13 +3291,19 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, return MIB; } + assert(Opc != AMDGPU::V_FMAC_F16_t16_e32 && + "V_FMAC_F16_t16_e32 is not supported and not expected to be present " + "pre-RA"); + // Handle MAC/FMAC. bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 || - Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64; + Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 || + Opc == AMDGPU::V_FMAC_F16_t16_e64; bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 || Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 || + Opc == AMDGPU::V_FMAC_F16_t16_e64 || Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64; bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64; bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 || @@ -3306,6 +3317,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, return nullptr; case AMDGPU::V_MAC_F16_e64: case AMDGPU::V_FMAC_F16_e64: + case AMDGPU::V_FMAC_F16_t16_e64: case AMDGPU::V_MAC_F32_e64: case AMDGPU::V_MAC_LEGACY_F32_e64: case AMDGPU::V_FMAC_F32_e64: @@ -3369,7 +3381,9 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, int64_t Imm; if (!Src0Literal && getFoldableImm(Src2, Imm, &DefMI)) { unsigned NewOpc = - IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32) + IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_t16 + : AMDGPU::V_FMAAK_F16) + : AMDGPU::V_FMAAK_F32) : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32); if (pseudoToMCOpcode(NewOpc) != -1) { MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) @@ -3384,9 +3398,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, return MIB; } } - unsigned NewOpc = IsFMA - ? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32) - : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32); + unsigned NewOpc = + IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_t16 + : AMDGPU::V_FMAMK_F16) + : AMDGPU::V_FMAMK_F32) + : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32); if (!Src0Literal && getFoldableImm(Src1, Imm, &DefMI)) { if (pseudoToMCOpcode(NewOpc) != -1) { MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) @@ -3812,6 +3828,7 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI, case AMDGPU::V_MAC_F32_e64: case AMDGPU::V_MAC_LEGACY_F32_e64: case AMDGPU::V_FMAC_F16_e64: + case AMDGPU::V_FMAC_F16_t16_e64: case AMDGPU::V_FMAC_F32_e64: case AMDGPU::V_FMAC_F64_e64: case AMDGPU::V_FMAC_LEGACY_F32_e64: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 84dc0b1..cf1aa5b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1400,10 +1400,20 @@ def FPVRegInputModsMatchClass : AsmOperandClass { let PredicateMethod = "isVRegWithInputMods"; } +def FPT16VRegInputModsMatchClass : AsmOperandClass { + let Name = "T16VRegWithFPInputMods"; + let ParserMethod = "parseRegWithFPInputMods"; + let PredicateMethod = "isT16VRegWithInputMods"; +} + def FPVRegInputMods : InputMods { let PrintMethod = "printOperandAndFPInputMods"; } +def FPT16VRegInputMods : InputMods { + let PrintMethod = "printOperandAndFPInputMods"; +} + class IntSDWAInputModsMatchClass : AsmOperandClass { let Name = "SDWAWithInt"#opSize#"InputMods"; let ParserMethod = "parseRegOrImmWithIntInputMods"; @@ -1432,6 +1442,16 @@ def IntVRegInputModsMatchClass : AsmOperandClass { let PredicateMethod = "isVRegWithInputMods"; } +def IntT16VRegInputModsMatchClass : AsmOperandClass { + let Name = "T16VRegWithIntInputMods"; + let ParserMethod = "parseRegWithIntInputMods"; + let PredicateMethod = "isT16VRegWithInputMods"; +} + +def IntT16VRegInputMods : InputMods { + let PrintMethod = "printOperandAndIntInputMods"; +} + def IntVRegInputMods : InputMods { let PrintMethod = "printOperandAndIntInputMods"; } @@ -1598,6 +1618,14 @@ class getVALUDstForVT { VOPDstS64orS32)))); // else VT == i1 } +class getVALUDstForVT_t16 { + RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand, + !if(!eq(VT.Size, 128), VOPDstOperand, + !if(!eq(VT.Size, 64), VOPDstOperand, + !if(!eq(VT.Size, 16), VOPDstOperand, + VOPDstS64orS32)))); // else VT == i1 +} + // Returns the register class to use for the destination of VOP[12C] // instructions with SDWA extension class getSDWADstForVT { @@ -1608,7 +1636,7 @@ class getSDWADstForVT { // Returns the register class to use for source 0 of VOP[12C] // instructions for the given VT. -class getVOPSrc0ForVT { +class getVOPSrc0ForVT { bit isFP = isFloatType.ret; RegisterOperand ret = @@ -1616,7 +1644,10 @@ class getVOPSrc0ForVT { !if(!eq(VT.Size, 64), VSrc_f64, !if(!eq(VT.Value, f16.Value), - VSrc_f16, + !if(IsTrue16, + VSrcT_f16_Lo128, + VSrc_f16 + ), !if(!eq(VT.Value, v2f16.Value), VSrc_v2f16, !if(!eq(VT.Value, v4f16.Value), @@ -1629,7 +1660,10 @@ class getVOPSrc0ForVT { !if(!eq(VT.Size, 64), VSrc_b64, !if(!eq(VT.Value, i16.Value), - VSrc_b16, + !if(IsTrue16, + VSrcT_b16_Lo128, + VSrc_b16 + ), !if(!eq(VT.Value, v2i16.Value), VSrc_v2b16, VSrc_b32 @@ -1652,6 +1686,15 @@ class getVregSrcForVT { VGPR_32)))); } +class getVregSrcForVT_t16 { + RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128, + !if(!eq(VT.Size, 96), VReg_96, + !if(!eq(VT.Size, 64), VReg_64, + !if(!eq(VT.Size, 48), VReg_64, + !if(!eq(VT.Size, 16), VGPR_32_Lo128, + VGPR_32))))); +} + class getSDWASrcForVT { bit isFP = isFloatType.ret; RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32); @@ -1759,6 +1802,16 @@ class getSrcModDPP { Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods); } +class getSrcModDPP_t16 { + bit isFP = isFloatType.ret; + Operand ret = + !if (isFP, + !if (!eq(VT.Value, f16.Value), FPT16VRegInputMods, + FPVRegInputMods), + !if (!eq(VT.Value, i16.Value), IntT16VRegInputMods, + IntVRegInputMods)); +} + // Return type of input modifiers operand for specified input operand for DPP class getSrcModVOP3DPP { bit isFP = isFloatType.ret; @@ -2382,6 +2435,7 @@ class VOPProfile _ArgVT, bit _EnableF32SrcMods = 0, field list ArgVT = _ArgVT; field bit EnableF32SrcMods = _EnableF32SrcMods; field bit EnableClamp = _EnableClamp; + field bit IsTrue16 = 0; field ValueType DstVT = ArgVT[0]; field ValueType Src0VT = ArgVT[1]; @@ -2392,7 +2446,7 @@ class VOPProfile _ArgVT, bit _EnableF32SrcMods = 0, field RegisterOperand DstRC64 = DstRC; field RegisterOperand DstRCVOP3DPP = DstRC64; field RegisterOperand DstRCSDWA = getSDWADstForVT.ret; - field RegisterOperand Src0RC32 = getVOPSrc0ForVT.ret; + field RegisterOperand Src0RC32 = getVOPSrc0ForVT.ret; field RegisterOperand Src1RC32 = RegisterOperand.ret>; field RegisterOperand Src0RC64 = getVOP3SrcForVT.ret; field RegisterOperand Src1RC64 = getVOP3SrcForVT.ret; @@ -2411,6 +2465,8 @@ class VOPProfile _ArgVT, bit _EnableF32SrcMods = 0, field Operand Src0ModDPP = getSrcModDPP.ret; field Operand Src1ModDPP = getSrcModDPP.ret; field Operand Src2ModDPP = getSrcModDPP.ret; + field Operand Src0ModVOP3DPP = getSrcModDPP.ret; + field Operand Src1ModVOP3DPP = getSrcModDPP.ret; field Operand Src2ModVOP3DPP = getSrcModVOP3DPP.ret; field Operand Src0ModSDWA = getSrcModSDWA.ret; field Operand Src1ModSDWA = getSrcModSDWA.ret; @@ -2513,7 +2569,7 @@ class VOPProfile _ArgVT, bit _EnableF32SrcMods = 0, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret; field dag InsVOP3Base = getInsVOP3Base.ret; + Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel, IsVOP3P>.ret; field dag InsVOP3DPP = getInsVOP3DPP.ret; field dag InsVOP3DPP16 = getInsVOP3DPP16.ret; field dag InsVOP3DPP8 = getInsVOP3DPP8.ret; @@ -2569,7 +2625,25 @@ class VOPProfile _ArgVT, bit _EnableF32SrcMods = 0, class VOP_PAT_GEN : VOPProfile { let NeedPatGen = mode; } -def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>; + +// VOPC_Profile_t16, VOPC_NoSdst_Profile_t16, VOPC_Class_Profile_t16, +// VOPC_Class_NoSdst_Profile_t16, and VOP_MAC_F16_t16 do not inherit from this +// class, so copy changes to this class in those profiles +class VOPProfile_True16 : VOPProfile { + let IsTrue16 = 1; + // Most DstVT are 16-bit, but not all + let DstRC = getVALUDstForVT_t16.ret; + let DstRC64 = getVALUDstForVT.ret; + let Src1RC32 = RegisterOperand.ret>; + let Src0DPP = getVregSrcForVT_t16.ret; + let Src1DPP = getVregSrcForVT_t16.ret; + let Src2DPP = getVregSrcForVT_t16.ret; + let Src0ModDPP = getSrcModDPP_t16.ret; + let Src1ModDPP = getSrcModDPP_t16.ret; + let Src2ModDPP = getSrcModDPP_t16.ret; +} + +def VOP_F16_F16 : VOPProfile<[f16, f16, untyped, untyped]>; def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>; def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>; def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index b6a2532..1972e04 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -921,62 +921,70 @@ def : GCNPat < } // End OtherPredicates = [UnsafeFPMath] -// f16_to_fp patterns -def : GCNPat < - (f32 (f16_to_fp i32:$src0)), - (V_CVT_F32_F16_e64 SRCMODS.NONE, $src0) ->; +multiclass f16_fp_Pats { + // f16_to_fp patterns + def : GCNPat < + (f32 (f16_to_fp i32:$src0)), + (cvt_f32_f16_inst_e64 SRCMODS.NONE, $src0) + >; -def : GCNPat < - (f32 (f16_to_fp (and_oneuse i32:$src0, 0x7fff))), - (V_CVT_F32_F16_e64 SRCMODS.ABS, $src0) ->; + def : GCNPat < + (f32 (f16_to_fp (and_oneuse i32:$src0, 0x7fff))), + (cvt_f32_f16_inst_e64 SRCMODS.ABS, $src0) + >; -def : GCNPat < - (f32 (f16_to_fp (i32 (srl_oneuse (and_oneuse i32:$src0, 0x7fff0000), (i32 16))))), - (V_CVT_F32_F16_e64 SRCMODS.ABS, (i32 (V_LSHRREV_B32_e64 (i32 16), i32:$src0))) ->; + def : GCNPat < + (f32 (f16_to_fp (i32 (srl_oneuse (and_oneuse i32:$src0, 0x7fff0000), (i32 16))))), + (cvt_f32_f16_inst_e64 SRCMODS.ABS, (i32 (V_LSHRREV_B32_e64 (i32 16), i32:$src0))) + >; -def : GCNPat < - (f32 (f16_to_fp (or_oneuse i32:$src0, 0x8000))), - (V_CVT_F32_F16_e64 SRCMODS.NEG_ABS, $src0) ->; + def : GCNPat < + (f32 (f16_to_fp (or_oneuse i32:$src0, 0x8000))), + (cvt_f32_f16_inst_e64 SRCMODS.NEG_ABS, $src0) + >; -def : GCNPat < - (f32 (f16_to_fp (xor_oneuse i32:$src0, 0x8000))), - (V_CVT_F32_F16_e64 SRCMODS.NEG, $src0) ->; + def : GCNPat < + (f32 (f16_to_fp (xor_oneuse i32:$src0, 0x8000))), + (cvt_f32_f16_inst_e64 SRCMODS.NEG, $src0) + >; -def : GCNPat < - (f64 (fpextend f16:$src)), - (V_CVT_F64_F32_e32 (V_CVT_F32_F16_e32 $src)) ->; + def : GCNPat < + (f64 (fpextend f16:$src)), + (V_CVT_F64_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, $src)) + >; -// fp_to_fp16 patterns -def : GCNPat < - (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))), - (V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0) ->; + // fp_to_fp16 patterns + def : GCNPat < + (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))), + (cvt_f16_f32_inst_e64 $src0_modifiers, f32:$src0) + >; -def : GCNPat < - (i32 (fp_to_sint f16:$src)), - (V_CVT_I32_F32_e32 (V_CVT_F32_F16_e32 VSrc_b32:$src)) ->; + def : GCNPat < + (i32 (fp_to_sint f16:$src)), + (V_CVT_I32_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, VSrc_b32:$src)) + >; -def : GCNPat < - (i32 (fp_to_uint f16:$src)), - (V_CVT_U32_F32_e32 (V_CVT_F32_F16_e32 VSrc_b32:$src)) ->; + def : GCNPat < + (i32 (fp_to_uint f16:$src)), + (V_CVT_U32_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, VSrc_b32:$src)) + >; -def : GCNPat < - (f16 (sint_to_fp i32:$src)), - (V_CVT_F16_F32_e32 (V_CVT_F32_I32_e32 VSrc_b32:$src)) ->; + def : GCNPat < + (f16 (sint_to_fp i32:$src)), + (cvt_f16_f32_inst_e64 SRCMODS.NONE, (V_CVT_F32_I32_e32 VSrc_b32:$src)) + >; -def : GCNPat < - (f16 (uint_to_fp i32:$src)), - (V_CVT_F16_F32_e32 (V_CVT_F32_U32_e32 VSrc_b32:$src)) ->; + def : GCNPat < + (f16 (uint_to_fp i32:$src)), + (cvt_f16_f32_inst_e64 SRCMODS.NONE, (V_CVT_F32_U32_e32 VSrc_b32:$src)) + >; +} + +let SubtargetPredicate = NotHasTrue16BitInsts in +defm : f16_fp_Pats; + +let SubtargetPredicate = HasTrue16BitInsts in +defm : f16_fp_Pats; //===----------------------------------------------------------------------===// // VOP2 Patterns @@ -1503,7 +1511,10 @@ class ClampPat : GCNPat < def : ClampPat; def : ClampPat; +let SubtargetPredicate = NotHasTrue16BitInsts in def : ClampPat; +let SubtargetPredicate = HasTrue16BitInsts in +def : ClampPat; let SubtargetPredicate = HasVOP3PInsts in { def : GCNPat < @@ -2268,6 +2279,7 @@ def : GCNPat < ) >; +let SubtargetPredicate = NotHasTrue16BitInsts in def : GCNPat < (f16 (sint_to_fp i1:$src)), (V_CVT_F16_F32_e32 ( @@ -2276,6 +2288,16 @@ def : GCNPat < SSrc_i1:$src)) >; +let SubtargetPredicate = HasTrue16BitInsts in +def : GCNPat < + (f16 (sint_to_fp i1:$src)), + (V_CVT_F16_F32_t16_e32 ( + V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE), + SSrc_i1:$src)) +>; + +let SubtargetPredicate = NotHasTrue16BitInsts in def : GCNPat < (f16 (uint_to_fp i1:$src)), (V_CVT_F16_F32_e32 ( @@ -2283,6 +2305,14 @@ def : GCNPat < /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE), SSrc_i1:$src)) >; +let SubtargetPredicate = HasTrue16BitInsts in +def : GCNPat < + (f16 (uint_to_fp i1:$src)), + (V_CVT_F16_F32_t16_e32 ( + V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE), + SSrc_i1:$src)) +>; def : GCNPat < (f32 (sint_to_fp i1:$src)), @@ -2501,6 +2531,8 @@ def : GCNPat< // Prefer selecting to max when legal, but using mul is always valid. let AddedComplexity = -5 in { + +let OtherPredicates = [NotHasTrue16BitInsts] in { def : GCNPat< (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))), (V_MUL_F16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src) @@ -2510,6 +2542,19 @@ def : GCNPat< (fcanonicalize (f16 (fneg (VOP3Mods f16:$src, i32:$src_mods)))), (V_MUL_F16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src) >; +} // End OtherPredicates + +let OtherPredicates = [HasTrue16BitInsts] in { +def : GCNPat< + (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))), + (V_MUL_F16_t16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src) +>; + +def : GCNPat< + (fcanonicalize (f16 (fneg (VOP3Mods f16:$src, i32:$src_mods)))), + (V_MUL_F16_t16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src) +>; +} // End OtherPredicates def : GCNPat< (fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))), @@ -2552,8 +2597,13 @@ multiclass SelectCanonicalizeAsMax< def : GCNPat< (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))), (V_MAX_F16_e64 $src_mods, $src, $src_mods, $src, 0, 0)> { - // FIXME: Should have 16-bit inst subtarget predicate - let OtherPredicates = f16_preds; + let OtherPredicates = !listconcat(f16_preds, [Has16BitInsts, NotHasTrue16BitInsts]); + } + + def : GCNPat< + (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))), + (V_MAX_F16_t16_e64 $src_mods, $src, $src_mods, $src, 0, 0)> { + let OtherPredicates = !listconcat(f16_preds, [Has16BitInsts, HasTrue16BitInsts]); } def : GCNPat< @@ -2600,9 +2650,10 @@ def : GCNPat < >; } // End OtherPredicates = [HasDLInsts] -let SubtargetPredicate = isGFX10Plus in +let SubtargetPredicate = isGFX10Plus in { // Don't allow source modifiers. If there are any source modifiers then it's // better to select fma instead of fmac. +let OtherPredicates = [NotHasTrue16BitInsts] in def : GCNPat < (fma (f16 (VOP3NoMods f32:$src0)), (f16 (VOP3NoMods f32:$src1)), @@ -2610,6 +2661,15 @@ def : GCNPat < (V_FMAC_F16_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2) >; +let OtherPredicates = [HasTrue16BitInsts] in +def : GCNPat < + (fma (f16 (VOP3NoMods f32:$src0)), + (f16 (VOP3NoMods f32:$src1)), + (f16 (VOP3NoMods f32:$src2))), + (V_FMAC_F16_t16_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, + SRCMODS.NONE, $src2) +>; +} let SubtargetPredicate = isGFX90APlus in // Don't allow source modifiers. If there are any source modifiers then it's diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp index a5816e2..0d48c31 100644 --- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp +++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp @@ -174,14 +174,36 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI, return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_ZERO)); case AMDGPU::FPTRUNC_UPWARD_PSEUDO: { - // Replacing the pseudo by a real instruction - MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32)); + // Replacing the pseudo by a real instruction in place + if (TII->getSubtarget().hasTrue16BitInsts()) { + MachineBasicBlock &MBB = *MI.getParent(); + MachineInstrBuilder B(*MBB.getParent(), MI); + MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_t16_e64)); + MachineOperand Src0 = MI.getOperand(1); + MI.removeOperand(1); + B.addImm(0); // src0_modifiers + B.add(Src0); // re-add src0 operand + B.addImm(0); // clamp + B.addImm(0); // omod + } else + MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32)); return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_INF)); } case AMDGPU::FPTRUNC_DOWNWARD_PSEUDO: { - // Replacing the pseudo by a real instruction - MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32)); + // Replacing the pseudo by a real instruction in place + if (TII->getSubtarget().hasTrue16BitInsts()) { + MachineBasicBlock &MBB = *MI.getParent(); + MachineInstrBuilder B(*MBB.getParent(), MI); + MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_t16_e64)); + MachineOperand Src0 = MI.getOperand(1); + MI.removeOperand(1); + B.addImm(0); // src0_modifiers + B.add(Src0); // re-add src0 operand + B.addImm(0); // clamp + B.addImm(0); // omod + } else + MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32)); return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEGINF)); } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 99c8f86..fae76be 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -553,6 +553,15 @@ def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types let Size = 32; let Weight = 1; } + +// Identical to VGPR_32 except it only contains the low 128 (Lo128) registers. +def VGPR_32_Lo128 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32, + (add (sequence "VGPR%u", 0, 127))> { + let AllocationPriority = 0; + let GeneratePressureSet = 0; + let Size = 32; + let Weight = 1; +} } // End HasVGPR = 1 // VGPR 64-bit registers @@ -885,6 +894,13 @@ def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, let HasSGPR = 1; } +def VS_32_Lo128 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, + (add VGPR_32_Lo128, SReg_32, LDS_DIRECT_CLASS)> { + let isAllocatable = 0; + let HasVGPR = 1; + let HasSGPR = 1; +} + def VS_64 : SIRegisterClass<"AMDGPU", [i64, f64, v2f32], 32, (add VReg_64, SReg_64)> { let isAllocatable = 0; let HasVGPR = 1; @@ -934,6 +950,25 @@ class RegImmMatcher : AsmOperandClass { let RenderMethod = "addRegOrImmOperands"; } +// For VOP1,2,C True16 instructions. Uses first 128 32-bit VGPRs only +multiclass SIRegOperand16 { + let OperandNamespace = "AMDGPU" in { + def _b16_Lo128 : RegisterOperand(rc#rc_suffix#"_Lo128")> { + let OperandType = opType#"_INT16"; + let ParserMatchClass = RegImmMatcher; + let DecoderMethod = "decodeOperand_VSrc16"; + } + + def _f16_Lo128 : RegisterOperand(rc#rc_suffix#"_Lo128")> { + let OperandType = opType#"_FP16"; + let ParserMatchClass = RegImmMatcher; + let DecoderMethod = "decodeOperand_" # rc # "_16"; + } + } +} + + multiclass SIRegOperand32 { let OperandNamespace = "AMDGPU" in { @@ -1053,6 +1088,7 @@ defm SCSrc : RegInlineOperand<"SReg", "SCSrc"> ; //===----------------------------------------------------------------------===// defm VSrc : RegImmOperand<"VS", "VSrc">; +defm VSrcT : SIRegOperand16<"VS", "VSrcT", "OPERAND_REG_IMM">; def VSrc_128 : RegisterOperand { let DecoderMethod = "DecodeVS_128RegisterClass"; @@ -1063,6 +1099,17 @@ def VSrc_128 : RegisterOperand { // with FMAMK/FMAAK //===----------------------------------------------------------------------===// +multiclass SIRegOperand16_Deferred { + let OperandNamespace = "AMDGPU" in { + def _f16_Lo128_Deferred : RegisterOperand(rc#rc_suffix#"_Lo128")> { + let OperandType = opType#"_FP16_DEFERRED"; + let ParserMatchClass = RegImmMatcher; + let DecoderMethod = "decodeOperand_" # rc # "_16_Deferred"; + } + } +} + multiclass SIRegOperand32_Deferred { let OperandNamespace = "AMDGPU" in { @@ -1081,6 +1128,7 @@ multiclass SIRegOperand32_Deferred ; +defm VSrcT : SIRegOperand16_Deferred<"VS", "VSrcT", "OPERAND_REG_IMM">; //===----------------------------------------------------------------------===// // VRegSrc_* Operands with a VGPR @@ -1113,6 +1161,9 @@ def VRegSrc_256 : RegisterOperand { def VGPRSrc_32 : RegisterOperand { let DecoderMethod = "DecodeVGPR_32RegisterClass"; } +def VGPRSrc_32_Lo128 : RegisterOperand { + let DecoderMethod = "DecodeVGPR_32RegisterClass"; +} //===----------------------------------------------------------------------===// // ASrc_* Operands with an AccVGPR @@ -1128,6 +1179,7 @@ def ARegSrc_32 : RegisterOperand { //===----------------------------------------------------------------------===// defm VCSrc : RegInlineOperand<"VS", "VCSrc">; +defm VCSrcT : SIRegOperand16<"VS", "VCSrcT", "OPERAND_REG_INLINE_C">; //===----------------------------------------------------------------------===// // VISrc_* Operands with a VGPR or an inline constant diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 52f308d..c54d23e 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -11,6 +11,7 @@ #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -40,6 +41,7 @@ public: } bool foldImmediates(MachineInstr &MI, bool TryToCommute = true) const; + bool shouldShrinkTrue16(MachineInstr &MI) const; bool isKImmOperand(const MachineOperand &Src) const; bool isKUImmOperand(const MachineOperand &Src) const; bool isKImmOrKUImmOperand(const MachineOperand &Src, bool &IsUnsigned) const; @@ -140,6 +142,23 @@ bool SIShrinkInstructions::foldImmediates(MachineInstr &MI, return false; } +/// Do not shrink the instruction if its registers are not expressible in the +/// shrunk encoding. +bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &MI) const { + for (unsigned I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) { + const MachineOperand &MO = MI.getOperand(I); + if (MO.isReg()) { + Register Reg = MO.getReg(); + assert(!Reg.isVirtual() && "Prior checks should ensure we only shrink " + "True16 Instructions post-RA"); + if (AMDGPU::VGPR_32RegClass.contains(Reg) && + !AMDGPU::VGPR_32_Lo128RegClass.contains(Reg)) + return false; + } + } + return true; +} + bool SIShrinkInstructions::isKImmOperand(const MachineOperand &Src) const { return isInt<16>(Src.getImm()) && !TII->isInlineConstant(*Src.getParent(), @@ -391,7 +410,8 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const { break; case AMDGPU::V_FMA_F16_e64: case AMDGPU::V_FMA_F16_gfx9_e64: - NewOpcode = AMDGPU::V_FMAAK_F16; + NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_t16 + : AMDGPU::V_FMAAK_F16; break; } } @@ -419,7 +439,8 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const { break; case AMDGPU::V_FMA_F16_e64: case AMDGPU::V_FMA_F16_gfx9_e64: - NewOpcode = AMDGPU::V_FMAMK_F16; + NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_t16 + : AMDGPU::V_FMAMK_F16; break; } } @@ -427,6 +448,9 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const { if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) return; + if (AMDGPU::isTrue16Inst(NewOpcode) && !shouldShrinkTrue16(MI)) + return; + if (Swap) { // Swap Src0 and Src1 by building a new instruction. BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(NewOpcode), @@ -964,6 +988,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { MachineFunctionProperties::Property::NoVRegs)) continue; + if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) && + !shouldShrinkTrue16(MI)) + continue; + // We can shrink this instruction LLVM_DEBUG(dbgs() << "Shrinking " << MI); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 1abb723..9b9c8919 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -33,11 +33,6 @@ static llvm::cl::opt llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(4)); -// TODO-GFX11: Remove this when full 16-bit codegen is implemented. -static llvm::cl::opt - LimitTo128VGPRs("amdgpu-limit-to-128-vgprs", llvm::cl::Hidden, - llvm::cl::desc("Never use more than 128 VGPRs")); - namespace { /// \returns Bit mask for given bit \p Shift and bit \p Width. @@ -289,6 +284,11 @@ struct VOPDInfo { uint16_t OpY; }; +struct VOPTrue16Info { + uint16_t Opcode; + bool IsTrue16; +}; + #define GET_MTBUFInfoTable_DECL #define GET_MTBUFInfoTable_IMPL #define GET_MUBUFInfoTable_DECL @@ -309,6 +309,8 @@ struct VOPDInfo { #define GET_VOPDComponentTable_IMPL #define GET_VOPDPairs_DECL #define GET_VOPDPairs_IMPL +#define GET_VOPTrue16Table_DECL +#define GET_VOPTrue16Table_IMPL #define GET_WMMAOpcode2AddrMappingTable_DECL #define GET_WMMAOpcode2AddrMappingTable_IMPL #define GET_WMMAOpcode3AddrMappingTable_DECL @@ -431,6 +433,11 @@ bool isVOPD(unsigned Opc) { return AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0X) != -1; } +bool isTrue16Inst(unsigned Opc) { + const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc); + return Info ? Info->IsTrue16 : false; +} + unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) { const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc); return Info ? Info->Opcode3Addr : ~0u; @@ -864,15 +871,6 @@ unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { } unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { - if (LimitTo128VGPRs.getNumOccurrences() ? LimitTo128VGPRs - : isGFX11Plus(*STI)) { - // GFX11 changes the encoding of 16-bit operands in VOP1/2/C instructions - // such that values 128..255 no longer mean v128..v255, they mean - // v0.hi..v127.hi instead. Until the compiler understands this, it is not - // safe to use v128..v255. - // TODO-GFX11: Remove this when full 16-bit codegen is implemented. - return 128; - } if (STI->getFeatureBits().test(FeatureGFX90AInsts)) return 512; return 256; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index ceb1fcb..11a8996 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -500,6 +500,9 @@ LLVM_READONLY bool isVOPD(unsigned Opc); LLVM_READONLY +bool isTrue16Inst(unsigned Opc); + +LLVM_READONLY unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc); LLVM_READONLY diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 68f38fe..5294354 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -142,7 +142,18 @@ multiclass VOP1Inst , LetDummies; foreach _ = BoolToList.ret in - def : MnemonicAlias, LetDummies; + def : MnemonicAlias, LetDummies; +} + +multiclass VOP1Inst_t16 { + let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in { + defm NAME : VOP1Inst; + } + let OtherPredicates = [HasTrue16BitInsts] in { + defm _t16 : VOP1Inst, node>; + } } // Special profile for instructions which have clamp @@ -151,7 +162,19 @@ class VOPProfileI2F : VOPProfile<[dstVt, srcVt, untyped, untyped]> { let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); - let InsVOP3Base = (ins Src0DPP:$src0, clampmod:$clamp, omod:$omod); + let InsVOP3Base = (ins Src0VOP3DPP:$src0, clampmod:$clamp, omod:$omod); + let Asm64 = "$vdst, $src0$clamp$omod"; + let AsmVOP3DPPBase = Asm64; + + let HasModifiers = 0; + let HasClamp = 1; +} + +class VOPProfileI2F_True16 : + VOPProfile_True16> { + + let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); + let InsVOP3Base = (ins Src0VOP3DPP:$src0, clampmod:$clamp, omod:$omod); let Asm64 = "$vdst, $src0$clamp$omod"; let AsmVOP3DPPBase = Asm64; @@ -162,6 +185,7 @@ class VOPProfileI2F : def VOP1_F64_I32 : VOPProfileI2F ; def VOP1_F32_I32 : VOPProfileI2F ; def VOP1_F16_I16 : VOPProfileI2F ; +def VOP1_F16_I16_t16 : VOPProfileI2F_True16 ; def VOP_NOP_PROFILE : VOPProfile <[untyped, untyped, untyped, untyped]>{ let HasExtVOP3DPP = 0; @@ -177,6 +201,9 @@ class VOP_SPECIAL_OMOD_PROF : def VOP_I32_F32_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF; def VOP_I32_F64_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF; def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF; +def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_True16 { + let HasOMod = 1; +} //===----------------------------------------------------------------------===// // VOP1 Instructions @@ -264,10 +291,16 @@ defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>; defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_uint>; defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_sint>; let FPDPRounding = 1, isReMaterializable = 0 in { -defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>; + let OtherPredicates = [NotHasTrue16BitInsts] in + defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>; + let OtherPredicates = [HasTrue16BitInsts] in + defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_True16, fpround>; } // End FPDPRounding = 1, isReMaterializable = 0 +let OtherPredicates = [NotHasTrue16BitInsts] in defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>; +let OtherPredicates = [HasTrue16BitInsts] in +defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_True16, fpextend>; let ReadsModeReg = 0, mayRaiseFPException = 0 in { defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; @@ -425,48 +458,68 @@ let SubtargetPredicate = isGFX7Plus in { } // End SubtargetPredicate = isGFX7Plus } // End isReMaterializable = 1 -let SubtargetPredicate = Has16BitInsts in { - let FPDPRounding = 1 in { +let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>; defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; +} +let OtherPredicates = [HasTrue16BitInsts] in { +defm V_CVT_F16_U16_t16 : VOP1Inst <"v_cvt_f16_u16_t16", VOP1_F16_I16_t16, uint_to_fp>; +defm V_CVT_F16_I16_t16 : VOP1Inst <"v_cvt_f16_i16_t16", VOP1_F16_I16_t16, sint_to_fp>; +} } // End FPDPRounding = 1 // OMod clears exceptions when set in these two instructions +let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_uint>; defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_sint>; +} +let OtherPredicates = [HasTrue16BitInsts] in { +defm V_CVT_U16_F16_t16 : VOP1Inst <"v_cvt_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_uint>; +defm V_CVT_I16_F16_t16 : VOP1Inst <"v_cvt_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_sint>; +} let TRANS = 1, SchedRW = [WriteTrans32] in { -defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; -defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>; -defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>; -defm V_LOG_F16 : VOP1Inst <"v_log_f16", VOP_F16_F16, flog2>; -defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16, fexp2>; -defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; -defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; +defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; +defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>; +defm V_RSQ_F16 : VOP1Inst_t16 <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>; +defm V_LOG_F16 : VOP1Inst_t16 <"v_log_f16", VOP_F16_F16, flog2>; +defm V_EXP_F16 : VOP1Inst_t16 <"v_exp_f16", VOP_F16_F16, fexp2>; +defm V_SIN_F16 : VOP1Inst_t16 <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; +defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; } // End TRANS = 1, SchedRW = [WriteTrans32] -defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; +defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; +let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16_SPECIAL_OMOD, int_amdgcn_frexp_exp>; -defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16, ffloor>; -defm V_CEIL_F16 : VOP1Inst <"v_ceil_f16", VOP_F16_F16, fceil>; -defm V_TRUNC_F16 : VOP1Inst <"v_trunc_f16", VOP_F16_F16, ftrunc>; -defm V_RNDNE_F16 : VOP1Inst <"v_rndne_f16", VOP_F16_F16, frint>; +} +let OtherPredicates = [HasTrue16BitInsts] in { +defm V_FREXP_EXP_I16_F16_t16 : VOP1Inst <"v_frexp_exp_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, int_amdgcn_frexp_exp>; +} +defm V_FLOOR_F16 : VOP1Inst_t16 <"v_floor_f16", VOP_F16_F16, ffloor>; +defm V_CEIL_F16 : VOP1Inst_t16 <"v_ceil_f16", VOP_F16_F16, fceil>; +defm V_TRUNC_F16 : VOP1Inst_t16 <"v_trunc_f16", VOP_F16_F16, ftrunc>; +defm V_RNDNE_F16 : VOP1Inst_t16 <"v_rndne_f16", VOP_F16_F16, frint>; let FPDPRounding = 1 in { -defm V_FRACT_F16 : VOP1Inst <"v_fract_f16", VOP_F16_F16, AMDGPUfract>; +defm V_FRACT_F16 : VOP1Inst_t16 <"v_fract_f16", VOP_F16_F16, AMDGPUfract>; } // End FPDPRounding = 1 -} - -let OtherPredicates = [Has16BitInsts] in { - +let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { def : GCNPat< (f32 (f16_to_fp i16:$src)), (V_CVT_F32_F16_e32 $src) >; - def : GCNPat< (i16 (AMDGPUfp_to_f16 f32:$src)), (V_CVT_F16_F32_e32 $src) >; - +} +let OtherPredicates = [HasTrue16BitInsts] in { +def : GCNPat< + (f32 (f16_to_fp i16:$src)), + (V_CVT_F32_F16_t16_e32 $src) +>; +def : GCNPat< + (i16 (AMDGPUfp_to_f16 f32:$src)), + (V_CVT_F16_F32_t16_e32 $src) +>; } def VOP_SWAP_I32 : VOPProfile<[i32, i32, i32, untyped]> { @@ -489,8 +542,14 @@ let SubtargetPredicate = isGFX9Plus in { defm V_SAT_PK_U8_I16 : VOP1Inst<"v_sat_pk_u8_i16", VOP_I32_I32>; let mayRaiseFPException = 0 in { - defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16_SPECIAL_OMOD>; - defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16_SPECIAL_OMOD>; + let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { + defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16_SPECIAL_OMOD>; + defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16_SPECIAL_OMOD>; + } + let OtherPredicates = [HasTrue16BitInsts] in { + defm V_CVT_NORM_I16_F16_t16 : VOP1Inst<"v_cvt_norm_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>; + defm V_CVT_NORM_U16_F16_t16 : VOP1Inst<"v_cvt_norm_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>; + } } // End mayRaiseFPException = 0 } // End SubtargetPredicate = isGFX9Plus @@ -584,9 +643,9 @@ let SubtargetPredicate = isGFX11Plus in { getVOP1Pat64.ret, /*VOP1Only=*/ 1>; - defm V_NOT_B16 : VOP1Inst<"v_not_b16", VOP_I16_I16>; - defm V_CVT_I32_I16 : VOP1Inst<"v_cvt_i32_i16", VOP_I32_I16>; - defm V_CVT_U32_U16 : VOP1Inst<"v_cvt_u32_u16", VOP_I32_I16>; + defm V_NOT_B16 : VOP1Inst_t16<"v_not_b16", VOP_I16_I16>; + defm V_CVT_I32_I16 : VOP1Inst_t16<"v_cvt_i32_i16", VOP_I32_I16>; + defm V_CVT_U32_U16 : VOP1Inst_t16<"v_cvt_u32_u16", VOP_I32_I16>; } // End SubtargetPredicate = isGFX11Plus //===----------------------------------------------------------------------===// @@ -650,8 +709,7 @@ let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in { string asmName> { defvar ps = !cast(opName#"_e32"); let AsmString = asmName # ps.AsmOperands in { - defm NAME : VOP1_Real_e32_gfx11, - MnemonicAlias, Requires<[isGFX11Plus]>; + defm NAME : VOP1_Real_e32_gfx11; } } multiclass VOP1_Real_e64_gfx11 op> { @@ -669,8 +727,7 @@ let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in { string asmName> { defvar ps = !cast(opName#"_e32"); let AsmString = asmName # ps.Pfl.AsmDPP16, DecoderNamespace = "DPPGFX11" in { - defm NAME : VOP1_Real_dpp_gfx11, - MnemonicAlias, Requires<[isGFX11Plus]>; + defm NAME : VOP1_Real_dpp_gfx11; } } multiclass VOP1_Real_dpp8_gfx11 op, string opName = NAME> { @@ -683,8 +740,7 @@ let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in { string asmName> { defvar ps = !cast(opName#"_e32"); let AsmString = asmName # ps.Pfl.AsmDPP8, DecoderNamespace = "DPP8GFX11" in { - defm NAME : VOP1_Real_dpp8_gfx11, - MnemonicAlias, Requires<[isGFX11Plus]>; + defm NAME : VOP1_Real_dpp8_gfx11; } } } // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" @@ -703,16 +759,24 @@ multiclass VOP1_Real_FULL_gfx11 op> : VOP1_Real_dpp_gfx11, VOP1_Real_dpp8_gfx11; multiclass VOP1_Real_NO_VOP3_with_name_gfx11 op, string opName, - string asmName> : - VOP1_Real_e32_with_name_gfx11, - VOP1_Real_dpp_with_name_gfx11, - VOP1_Real_dpp8_with_name_gfx11; + string asmName> { + defm NAME : VOP1_Real_e32_with_name_gfx11, + VOP1_Real_dpp_with_name_gfx11, + VOP1_Real_dpp8_with_name_gfx11; + defvar ps = !cast(opName#"_e32"); + def gfx11_alias : MnemonicAlias, + Requires<[isGFX11Plus]>; +} multiclass VOP1_Real_FULL_with_name_gfx11 op, string opName, string asmName> : VOP1_Real_NO_VOP3_with_name_gfx11, VOP1_Realtriple_e64_with_name_gfx11; +multiclass VOP1_Real_FULL_t16_gfx11 op, string asmName, + string opName = NAME> : + VOP1_Real_FULL_with_name_gfx11; + multiclass VOP1_Real_NO_DPP_gfx11 op> : VOP1_Real_e32_gfx11, VOP1_Real_e64_gfx11; @@ -727,9 +791,33 @@ defm V_CTZ_I32_B32 : VOP1_Real_FULL_with_name_gfx11<0x03a, defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11<0x03b, "V_FFBH_I32", "v_cls_i32">; defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11<0x067>; -defm V_NOT_B16 : VOP1_Real_FULL_gfx11<0x069>; -defm V_CVT_I32_I16 : VOP1_Real_FULL_gfx11<0x06a>; -defm V_CVT_U32_U16 : VOP1_Real_FULL_gfx11<0x06b>; +defm V_NOT_B16_t16 : VOP1_Real_FULL_t16_gfx11<0x069, "v_not_b16">; +defm V_CVT_I32_I16_t16 : VOP1_Real_FULL_t16_gfx11<0x06a, "v_cvt_i32_i16">; +defm V_CVT_U32_U16_t16 : VOP1_Real_FULL_t16_gfx11<0x06b, "v_cvt_u32_u16">; + +defm V_CVT_F16_U16_t16 : VOP1_Real_FULL_t16_gfx11<0x050, "v_cvt_f16_u16">; +defm V_CVT_F16_I16_t16 : VOP1_Real_FULL_t16_gfx11<0x051, "v_cvt_f16_i16">; +defm V_CVT_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x052, "v_cvt_u16_f16">; +defm V_CVT_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x053, "v_cvt_i16_f16">; +defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x054, "v_rcp_f16">; +defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x055, "v_sqrt_f16">; +defm V_RSQ_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x056, "v_rsq_f16">; +defm V_LOG_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x057, "v_log_f16">; +defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x058, "v_exp_f16">; +defm V_FREXP_MANT_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x059, "v_frexp_mant_f16">; +defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x05a, "v_frexp_exp_i16_f16">; +defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x05b, "v_floor_f16">; +defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x05c, "v_ceil_f16">; +defm V_TRUNC_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x05d, "v_trunc_f16">; +defm V_RNDNE_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x05e, "v_rndne_f16">; +defm V_FRACT_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x05f, "v_fract_f16">; +defm V_SIN_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x060, "v_sin_f16">; +defm V_COS_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x061, "v_cos_f16">; +defm V_CVT_NORM_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x063, "v_cvt_norm_i16_f16">; +defm V_CVT_NORM_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x064, "v_cvt_norm_u16_f16">; + +defm V_CVT_F16_F32_t16 : VOP1_Real_FULL_t16_gfx11<0x00a, "v_cvt_f16_f32">; +defm V_CVT_F32_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x00b, "v_cvt_f32_f16">; //===----------------------------------------------------------------------===// // GFX10. @@ -789,27 +877,27 @@ multiclass VOP1Only_Real_gfx10_gfx11 op> : defm V_PIPEFLUSH : VOP1_Real_gfx10_NO_DPP_gfx11<0x01b>; defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10_FULL_gfx11<0x048>; -defm V_CVT_F16_U16 : VOP1_Real_gfx10_FULL_gfx11<0x050>; -defm V_CVT_F16_I16 : VOP1_Real_gfx10_FULL_gfx11<0x051>; -defm V_CVT_U16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x052>; -defm V_CVT_I16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x053>; -defm V_RCP_F16 : VOP1_Real_gfx10_FULL_gfx11<0x054>; -defm V_SQRT_F16 : VOP1_Real_gfx10_FULL_gfx11<0x055>; -defm V_RSQ_F16 : VOP1_Real_gfx10_FULL_gfx11<0x056>; -defm V_LOG_F16 : VOP1_Real_gfx10_FULL_gfx11<0x057>; -defm V_EXP_F16 : VOP1_Real_gfx10_FULL_gfx11<0x058>; -defm V_FREXP_MANT_F16 : VOP1_Real_gfx10_FULL_gfx11<0x059>; -defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05a>; -defm V_FLOOR_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05b>; -defm V_CEIL_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05c>; -defm V_TRUNC_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05d>; -defm V_RNDNE_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05e>; -defm V_FRACT_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05f>; -defm V_SIN_F16 : VOP1_Real_gfx10_FULL_gfx11<0x060>; -defm V_COS_F16 : VOP1_Real_gfx10_FULL_gfx11<0x061>; +defm V_CVT_F16_U16 : VOP1_Real_gfx10<0x050>; +defm V_CVT_F16_I16 : VOP1_Real_gfx10<0x051>; +defm V_CVT_U16_F16 : VOP1_Real_gfx10<0x052>; +defm V_CVT_I16_F16 : VOP1_Real_gfx10<0x053>; +defm V_RCP_F16 : VOP1_Real_gfx10<0x054>; +defm V_SQRT_F16 : VOP1_Real_gfx10<0x055>; +defm V_RSQ_F16 : VOP1_Real_gfx10<0x056>; +defm V_LOG_F16 : VOP1_Real_gfx10<0x057>; +defm V_EXP_F16 : VOP1_Real_gfx10<0x058>; +defm V_FREXP_MANT_F16 : VOP1_Real_gfx10<0x059>; +defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10<0x05a>; +defm V_FLOOR_F16 : VOP1_Real_gfx10<0x05b>; +defm V_CEIL_F16 : VOP1_Real_gfx10<0x05c>; +defm V_TRUNC_F16 : VOP1_Real_gfx10<0x05d>; +defm V_RNDNE_F16 : VOP1_Real_gfx10<0x05e>; +defm V_FRACT_F16 : VOP1_Real_gfx10<0x05f>; +defm V_SIN_F16 : VOP1_Real_gfx10<0x060>; +defm V_COS_F16 : VOP1_Real_gfx10<0x061>; defm V_SAT_PK_U8_I16 : VOP1_Real_gfx10_FULL_gfx11<0x062>; -defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x063>; -defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x064>; +defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10<0x063>; +defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10<0x064>; defm V_SWAP_B32 : VOP1Only_Real_gfx10_gfx11<0x065>; defm V_SWAPREL_B32 : VOP1Only_Real_gfx10_gfx11<0x068>; @@ -893,8 +981,8 @@ defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x005>; defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x006>; defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x007>; defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x008>; -defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x00a>; -defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x00b>; +defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00a>; +defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>; defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>; defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>; defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x00e>; @@ -1077,17 +1165,17 @@ let VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [EXEC, M0] in { // indexing mode. vdst can't be treated as a def for codegen purposes, // and an implicit use and def of the super register should be added. def V_MOV_B32_indirect_write : VPseudoInstSI<(outs), - (ins getVALUDstForVT.ret:$vdst, getVOPSrc0ForVT.ret:$src0)>, + (ins getVALUDstForVT.ret:$vdst, getVOPSrc0ForVT.ret:$src0)>, PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT.ret:$vdst, - getVOPSrc0ForVT.ret:$src0)>; + getVOPSrc0ForVT.ret:$src0)>; // Copy of v_mov_b32 for use with VGPR indexing mode. An implicit use of the // super register should be added. def V_MOV_B32_indirect_read : VPseudoInstSI< (outs getVALUDstForVT.ret:$vdst), - (ins getVOPSrc0ForVT.ret:$src0)>, + (ins getVOPSrc0ForVT.ret:$src0)>, PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT.ret:$vdst, - getVOPSrc0ForVT.ret:$src0)>; + getVOPSrc0ForVT.ret:$src0)>; } // End VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [M0] diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index b24857ed..5710e06 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -186,6 +186,36 @@ multiclass VOP2Inst { + let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in { + defm NAME : VOP2Inst; + } + let SubtargetPredicate = HasTrue16BitInsts in { + defm _t16 : VOP2Inst, node, revOp#"_t16", GFX9Renamed>; + } +} + +// Creating a _t16_e32 pseudo when there is no corresponding real instruction on +// any subtarget is a problem. It makes getMCOpcodeGen return -1, which we +// assume means the instruction is already a real. The fix is to not create that +// _t16_e32 pseudo +multiclass VOP2Inst_e64_t16 { + let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in { + defm NAME : VOP2Inst; + } + let SubtargetPredicate = HasTrue16BitInsts in { + defm _t16 : VOP2Inst_e64, node, revOp#"_t16", GFX9Renamed>; + } +} + multiclass VOP2Inst_VOPD VOPDOp, @@ -341,11 +371,18 @@ class VOP_MADAK : VOP_MADK_Base { } def VOP_MADAK_F16 : VOP_MADAK ; +def VOP_MADAK_F16_t16 : VOP_MADAK { + let IsTrue16 = 1; + let DstRC = VOPDstOperand; + let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, VGPR_32_Lo128:$src1, ImmOpType:$imm); +} def VOP_MADAK_F32 : VOP_MADAK ; class VOP_MADMK : VOP_MADK_Base { field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); - field dag Ins32 = (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1); + field dag Ins32 = !if(!eq(vt.Size, 32), + (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1), + (ins VSrc_f16_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1)); field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$imm, VGPR_32:$vsrc1X); let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$immDeferred, VGPR_32:$vsrc1X); field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, ImmOpType:$imm, VGPR_32:$vsrc1Y); @@ -359,6 +396,11 @@ class VOP_MADMK : VOP_MADK_Base { } def VOP_MADMK_F16 : VOP_MADMK ; +def VOP_MADMK_F16_t16 : VOP_MADMK { + let IsTrue16 = 1; + let DstRC = VOPDstOperand; + let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPR_32_Lo128:$src1); +} def VOP_MADMK_F32 : VOP_MADMK ; class getRegisterOperandForVT { @@ -411,6 +453,28 @@ class VOP_MAC : VOPProfile <[vt0, vt1, vt1, v } def VOP_MAC_F16 : VOP_MAC ; +def VOP_MAC_F16_t16 : VOP_MAC { + let IsTrue16 = 1; + let DstRC = VOPDstOperand; + let DstRC64 = VOPDstOperand; + let Src1RC32 = VGPRSrc_32_Lo128; + let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT_t16.ret:$src2); + let Src0DPP = getVregSrcForVT_t16.ret; + let Src1DPP = getVregSrcForVT_t16.ret; + let Src2DPP = getVregSrcForVT_t16.ret; + let Src0ModDPP = getSrcModDPP_t16.ret; + let Src1ModDPP = getSrcModDPP_t16.ret; + let Src2ModDPP = getSrcModDPP_t16.ret; + let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, + Src1ModDPP:$src1_modifiers, Src1DPP:$src1, + getVregSrcForVT_t16.ret:$src2, // stub argument + dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, + bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); + let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, + Src1ModDPP:$src1_modifiers, Src1DPP:$src1, + getVregSrcForVT_t16.ret:$src2, // stub argument + dpp8:$dpp8, FI:$fi); +} def VOP_MAC_F32 : VOP_MAC ; let HasExtDPP = 0, HasExt32BitDPP = 0 in def VOP_MAC_LEGACY_F32 : VOP_MAC ; @@ -752,39 +816,82 @@ def : divergent_i64_BinOp ; def : divergent_i64_BinOp ; def : divergent_i64_BinOp ; +//===----------------------------------------------------------------------===// +// 16-Bit Operand Instructions +//===----------------------------------------------------------------------===// -let SubtargetPredicate = Has16BitInsts in { let isReMaterializable = 1 in { let FPDPRounding = 1 in { -def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">; -defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>; +defm V_LDEXP_F16 : VOP2Inst_t16 <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>; } // End FPDPRounding = 1 - -defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>; -defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16, clshr_rev_16>; -defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16, cashr_rev_16>; - +// FIXME VOP3 Only instructions. NFC using VOPProfile_True16 for these until a planned change to use a new register class for VOP3 encoded True16 instuctions +defm V_LSHLREV_B16 : VOP2Inst_e64_t16 <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>; +defm V_LSHRREV_B16 : VOP2Inst_e64_t16 <"v_lshrrev_b16", VOP_I16_I16_I16, clshr_rev_16>; +defm V_ASHRREV_I16 : VOP2Inst_e64_t16 <"v_ashrrev_i16", VOP_I16_I16_I16, cashr_rev_16>; let isCommutable = 1 in { let FPDPRounding = 1 in { -defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16, any_fadd>; -defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16, any_fsub>; -defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">; -defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, any_fmul>; +defm V_ADD_F16 : VOP2Inst_t16 <"v_add_f16", VOP_F16_F16_F16, any_fadd>; +defm V_SUB_F16 : VOP2Inst_t16 <"v_sub_f16", VOP_F16_F16_F16, any_fsub>; +defm V_SUBREV_F16 : VOP2Inst_t16 <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">; +defm V_MUL_F16 : VOP2Inst_t16 <"v_mul_f16", VOP_F16_F16_F16, any_fmul>; +} // End FPDPRounding = 1 +defm V_MUL_LO_U16 : VOP2Inst_e64_t16 <"v_mul_lo_u16", VOP_I16_I16_I16, mul>; +defm V_MAX_F16 : VOP2Inst_t16 <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>; +defm V_MIN_F16 : VOP2Inst_t16 <"v_min_f16", VOP_F16_F16_F16, fminnum_like>; +defm V_MAX_U16 : VOP2Inst_e64_t16 <"v_max_u16", VOP_I16_I16_I16, umax>; +defm V_MAX_I16 : VOP2Inst_e64_t16 <"v_max_i16", VOP_I16_I16_I16, smax>; +defm V_MIN_U16 : VOP2Inst_e64_t16 <"v_min_u16", VOP_I16_I16_I16, umin>; +defm V_MIN_I16 : VOP2Inst_e64_t16 <"v_min_i16", VOP_I16_I16_I16, smin>; +} // End isCommutable = 1 +} // End isReMaterializable = 1 -let mayRaiseFPException = 0 in { -def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">; +let SubtargetPredicate = isGFX11Plus in { + let isCommutable = 1 in { + defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_True16, and>; + defm V_OR_B16_t16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_True16, or>; + defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_True16, xor>; + } // End isCommutable = 1 +} // End SubtargetPredicate = isGFX11Plus + +let FPDPRounding = 1, isReMaterializable = 1 in { +let SubtargetPredicate = isGFX10Plus, OtherPredicates = [NotHasTrue16BitInsts] in { +def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">; +} +let SubtargetPredicate = HasTrue16BitInsts in { +def V_FMAMK_F16_t16 : VOP2_Pseudo <"v_fmamk_f16_t16", VOP_MADMK_F16_t16, [], "">; } -} // End FPDPRounding = 1 +let isCommutable = 1 in { +let SubtargetPredicate = isGFX10Plus, OtherPredicates = [NotHasTrue16BitInsts] in { +def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">; +} +let SubtargetPredicate = HasTrue16BitInsts in { +def V_FMAAK_F16_t16 : VOP2_Pseudo <"v_fmaak_f16_t16", VOP_MADAK_F16_t16, [], "">; +} +} // End isCommutable = 1 +} // End FPDPRounding = 1, isReMaterializable = 1 -defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16, mul>; -defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>; -defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum_like>; -defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16, umax>; -defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16, smax>; -defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16, umin>; -defm V_MIN_I16 : VOP2Inst <"v_min_i16", VOP_I16_I16_I16, smin>; +let Constraints = "$vdst = $src2", + DisableEncoding="$src2", + isConvertibleToThreeAddress = 1, + isCommutable = 1 in { +let SubtargetPredicate = isGFX10Plus, OtherPredicates = [NotHasTrue16BitInsts] in { +defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>; +} +let SubtargetPredicate = HasTrue16BitInsts in { +defm V_FMAC_F16_t16 : VOP2Inst <"v_fmac_f16_t16", VOP_MAC_F16_t16>; +} +} // End FMAC Constraints +let SubtargetPredicate = Has16BitInsts in { +let isReMaterializable = 1 in { +let FPDPRounding = 1 in { +def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">; +} // End FPDPRounding = 1 +let isCommutable = 1 in { +let mayRaiseFPException = 0 in { +def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">; +} let SubtargetPredicate = isGFX8GFX9 in { defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16_ARITH, add>; defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16_ARITH, sub>; @@ -800,6 +907,7 @@ defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>; } } // End SubtargetPredicate = Has16BitInsts + let SubtargetPredicate = HasDLInsts in { let isReMaterializable = 1 in @@ -840,7 +948,6 @@ let Constraints = "$vdst = $src2", isConvertibleToThreeAddress = 1, isCommutable = 1 in defm V_FMAC_F32 : VOP2Inst_VOPD <"v_fmac_f32", VOP_MAC_F32, 0x0, "v_fmac_f32">; - } // End SubtargetPredicate = HasDLInsts let SubtargetPredicate = HasFmaLegacy32 in { @@ -911,24 +1018,6 @@ let isCommutable = 1 in def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">, VOPD_Component<0x1, "v_fmaak_f32">; } -let SubtargetPredicate = isGFX10Plus in { - -let FPDPRounding = 1, isReMaterializable = 1 in { -def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">; - -let isCommutable = 1 in -def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">; -} // End FPDPRounding = 1, isReMaterializable = 1 - -let Constraints = "$vdst = $src2", - DisableEncoding="$src2", - isConvertibleToThreeAddress = 1, - isCommutable = 1 in { -defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>; -} - -} // End SubtargetPredicate = isGFX10Plus - let SubtargetPredicate = HasPkFmacF16Inst in { defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>; } // End SubtargetPredicate = HasPkFmacF16Inst @@ -1034,14 +1123,6 @@ def : VOPBinOpClampPat; def : VOPBinOpClampPat; } -let SubtargetPredicate = isGFX11Plus in { - let isCommutable = 1 in { - defm V_AND_B16 : VOP2Inst <"v_and_b16", VOP_I16_I16_I16, and>; - defm V_OR_B16 : VOP2Inst <"v_or_b16", VOP_I16_I16_I16, or>; - defm V_XOR_B16 : VOP2Inst <"v_xor_b16", VOP_I16_I16_I16, xor>; - } // End isCommutable = 1 -} // End SubtargetPredicate = isGFX11Plus - //===----------------------------------------------------------------------===// // DPP Encodings //===----------------------------------------------------------------------===// @@ -1108,6 +1189,15 @@ let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in { VOP2_Real(NAME), SIEncodingFamily.GFX11>, VOP2_MADKe(NAME).Pfl>; } + multiclass VOP2Only_Real_MADK_gfx11_with_name op, string asmName, + string opName = NAME> { + def _gfx11 : + VOP2_Real(opName), SIEncodingFamily.GFX11>, + VOP2_MADKe(opName).Pfl> { + VOP2_Pseudo ps = !cast(opName); + let AsmString = asmName # ps.AsmOperands; + } + } multiclass VOP2_Real_e32_gfx11 op> { def _e32_gfx11 : VOP2_Real(NAME#"_e32"), SIEncodingFamily.GFX11>, @@ -1141,8 +1231,7 @@ let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in { defvar ps = !cast(opName#"_e32"); def _e32_gfx11 : VOP2_Real, - VOP2e, - MnemonicAlias, Requires<[isGFX11Plus]> { + VOP2e { let AsmString = asmName # ps.AsmOperands; let IsSingle = single; } @@ -1152,8 +1241,7 @@ let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in { defvar ps = !cast(opName#"_e64"); def _e64_gfx11 : VOP3_Real, - VOP3e_gfx11<{0, 1, 0, 0, op{5-0}}, ps.Pfl>, - MnemonicAlias, Requires<[isGFX11Plus]> { + VOP3e_gfx11<{0, 1, 0, 0, op{5-0}}, ps.Pfl> { let AsmString = asmName # ps.AsmOperands; } } @@ -1274,23 +1362,33 @@ multiclass VOP2_Real_FULL_gfx11 op> : VOP2_Realtriple_e64_gfx11, VOP2_Real_NO_VOP3_gfx11; multiclass VOP2_Real_NO_VOP3_with_name_gfx11 op, string opName, - string asmName, bit isSingle = 0> : - VOP2_Real_e32_with_name_gfx11, + string asmName, bit isSingle = 0> { + + defm NAME : VOP2_Real_e32_with_name_gfx11, VOP2_Real_dpp_with_name_gfx11, VOP2_Real_dpp8_with_name_gfx11; + defvar ps = !cast(opName#"_e32"); + def _gfx11_alias : MnemonicAlias, Requires<[isGFX11Plus]>; +} multiclass VOP2_Real_FULL_with_name_gfx11 op, string opName, string asmName> : VOP2_Realtriple_e64_with_name_gfx11, VOP2_Real_NO_VOP3_with_name_gfx11; +multiclass VOP2_Real_FULL_t16_gfx11 op, string asmName, string opName = NAME> + : VOP2_Real_FULL_with_name_gfx11; + multiclass VOP2_Real_NO_DPP_gfx11 op> : VOP2_Real_e32_gfx11, VOP2_Real_e64_gfx11; multiclass VOP2_Real_NO_DPP_with_name_gfx11 op, string opName, - string asmName> : - VOP2_Real_e32_with_name_gfx11, - VOP2_Real_e64_with_name_gfx11; + string asmName> { + defm NAME : VOP2_Real_e32_with_name_gfx11, + VOP2_Real_e64_with_name_gfx11; + defvar ps = !cast(opName#"_e32"); + def _gfx11_alias : MnemonicAlias, Requires<[isGFX11Plus]>; +} defm V_CNDMASK_B32 : VOP2e_Real_gfx11<0x001, "V_CNDMASK_B32", "v_cndmask_b32">; @@ -1314,6 +1412,17 @@ defm V_CVT_PK_RTZ_F16_F32 : VOP2_Real_FULL_with_name_gfx11<0x02f, "V_CVT_PKRTZ_F16_F32", "v_cvt_pk_rtz_f16_f32">; defm V_PK_FMAC_F16 : VOP2Only_Real_gfx11<0x03c>; +defm V_ADD_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x032, "v_add_f16">; +defm V_SUB_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x033, "v_sub_f16">; +defm V_SUBREV_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x034, "v_subrev_f16">; +defm V_MUL_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x035, "v_mul_f16">; +defm V_FMAC_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x036, "v_fmac_f16">; +defm V_LDEXP_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03b, "v_ldexp_f16">; +defm V_MAX_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">; +defm V_MIN_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">; +defm V_FMAMK_F16_t16 : VOP2Only_Real_MADK_gfx11_with_name<0x037, "v_fmamk_f16">; +defm V_FMAAK_F16_t16 : VOP2Only_Real_MADK_gfx11_with_name<0x038, "v_fmaak_f16">; + // VOP3 only. defm V_CNDMASK_B16 : VOP3Only_Realtriple_gfx11<0x25d>; defm V_LDEXP_F32 : VOP3Only_Realtriple_gfx11<0x31c>; @@ -1604,16 +1713,16 @@ defm V_XNOR_B32 : VOP2_Real_gfx10_gfx11<0x01e>; defm V_FMAC_F32 : VOP2_Real_gfx10_gfx11<0x02b>; defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10_gfx11<0x02c>; defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10_gfx11<0x02d>; -defm V_ADD_F16 : VOP2_Real_gfx10_gfx11<0x032>; -defm V_SUB_F16 : VOP2_Real_gfx10_gfx11<0x033>; -defm V_SUBREV_F16 : VOP2_Real_gfx10_gfx11<0x034>; -defm V_MUL_F16 : VOP2_Real_gfx10_gfx11<0x035>; -defm V_FMAC_F16 : VOP2_Real_gfx10_gfx11<0x036>; -defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10_gfx11<0x037>; -defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10_gfx11<0x038>; -defm V_MAX_F16 : VOP2_Real_gfx10_gfx11<0x039>; -defm V_MIN_F16 : VOP2_Real_gfx10_gfx11<0x03a>; -defm V_LDEXP_F16 : VOP2_Real_gfx10_gfx11<0x03b>; +defm V_ADD_F16 : VOP2_Real_gfx10<0x032>; +defm V_SUB_F16 : VOP2_Real_gfx10<0x033>; +defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>; +defm V_MUL_F16 : VOP2_Real_gfx10<0x035>; +defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>; +defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>; +defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>; +defm V_MAX_F16 : VOP2_Real_gfx10<0x039>; +defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>; +defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>; let IsSingle = 1 in { defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index c98a7f1..bb2b918 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -767,8 +767,8 @@ class VOP3_DOT_Profile : VOP let HasSrc0Mods = 1; let HasSrc1Mods = 1; let HasSrc2Mods = 1; - let Src0ModDPP = FPVRegInputMods; - let Src1ModDPP = FPVRegInputMods; + let Src0ModVOP3DPP = FPVRegInputMods; + let Src1ModVOP3DPP = FPVRegInputMods; let Src2ModVOP3DPP = FP16InputMods; let InsVOP3OpSel = getInsVOP3OpSel; defm V_ADD_NC_U16 : VOP3Only_Realtriple_gfx11<0x303>; defm V_SUB_NC_U16 : VOP3Only_Realtriple_gfx11<0x304>; -defm V_MUL_LO_U16 : VOP3Only_Realtriple_gfx11<0x305>; +defm V_MUL_LO_U16_t16 : VOP3Only_Realtriple_t16_gfx11<0x305, "v_mul_lo_u16">; defm V_CVT_PK_I16_F32 : VOP3_Realtriple_gfx11<0x306>; defm V_CVT_PK_U16_F32 : VOP3_Realtriple_gfx11<0x307>; -defm V_MAX_U16 : VOP3Only_Realtriple_gfx11<0x309>; -defm V_MAX_I16 : VOP3Only_Realtriple_gfx11<0x30a>; -defm V_MIN_U16 : VOP3Only_Realtriple_gfx11<0x30b>; -defm V_MIN_I16 : VOP3Only_Realtriple_gfx11<0x30c>; +defm V_MAX_U16_t16 : VOP3Only_Realtriple_t16_gfx11<0x309, "v_max_u16">; +defm V_MAX_I16_t16 : VOP3Only_Realtriple_t16_gfx11<0x30a, "v_max_i16">; +defm V_MIN_U16_t16 : VOP3Only_Realtriple_t16_gfx11<0x30b, "v_min_u16">; +defm V_MIN_I16_t16 : VOP3Only_Realtriple_t16_gfx11<0x30c, "v_min_i16">; defm V_ADD_NC_I16 : VOP3_Realtriple_with_name_gfx11<0x30d, "V_ADD_I16", "v_add_nc_i16">; defm V_SUB_NC_I16 : VOP3_Realtriple_with_name_gfx11<0x30e, "V_SUB_I16", "v_sub_nc_i16">; defm V_PACK_B32_F16 : VOP3_Realtriple_gfx11<0x311>; @@ -945,9 +945,9 @@ defm V_MUL_LO_U32 : VOP3_Real_Base_gfx11<0x32c>; defm V_MUL_HI_U32 : VOP3_Real_Base_gfx11<0x32d>; defm V_MUL_HI_I32 : VOP3_Real_Base_gfx11<0x32e>; defm V_TRIG_PREOP_F64 : VOP3_Real_Base_gfx11<0x32f>; -defm V_LSHLREV_B16 : VOP3Only_Realtriple_gfx11<0x338>; -defm V_LSHRREV_B16 : VOP3Only_Realtriple_gfx11<0x339>; -defm V_ASHRREV_I16 : VOP3Only_Realtriple_gfx11<0x33a>; +defm V_LSHLREV_B16_t16 : VOP3Only_Realtriple_t16_gfx11<0x338, "v_lshlrev_b16">; +defm V_LSHRREV_B16_t16 : VOP3Only_Realtriple_t16_gfx11<0x339, "v_lshrrev_b16">; +defm V_ASHRREV_I16_t16 : VOP3Only_Realtriple_t16_gfx11<0x33a, "v_ashrrev_i16">; defm V_LSHLREV_B64 : VOP3_Real_Base_gfx11<0x33c>; defm V_LSHRREV_B64 : VOP3_Real_Base_gfx11<0x33d>; defm V_ASHRREV_I64 : VOP3_Real_Base_gfx11<0x33e>; @@ -955,9 +955,9 @@ defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx11<0x360>; // Pseudo in VOP2 let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in { defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_gfx11<0x361>; // Pseudo in VOP2 } // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) -defm V_AND_B16 : VOP3Only_Realtriple_gfx11<0x362>; -defm V_OR_B16 : VOP3Only_Realtriple_gfx11<0x363>; -defm V_XOR_B16 : VOP3Only_Realtriple_gfx11<0x364>; +defm V_AND_B16_t16 : VOP3Only_Realtriple_t16_gfx11<0x362, "v_and_b16">; +defm V_OR_B16_t16 : VOP3Only_Realtriple_t16_gfx11<0x363, "v_or_b16">; +defm V_XOR_B16_t16 : VOP3Only_Realtriple_t16_gfx11<0x364, "v_xor_b16">; //===----------------------------------------------------------------------===// // GFX10. diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td index 5973d32..13f5bed 100644 --- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -84,6 +84,20 @@ class VOPC_Profile sched, ValueType vt0, ValueType vt1 = vt list Schedule = sched; } +multiclass VOPC_Profile_t16 sched, ValueType vt0, ValueType vt1 = vt0> { + def NAME : VOPC_Profile; + def _t16 : VOPC_Profile { + let IsTrue16 = 1; + let Src1RC32 = RegisterOperand.ret>; + let Src0DPP = getVregSrcForVT_t16.ret; + let Src1DPP = getVregSrcForVT_t16.ret; + let Src2DPP = getVregSrcForVT_t16.ret; + let Src0ModDPP = getSrcModDPP_t16.ret; + let Src1ModDPP = getSrcModDPP_t16.ret; + let Src2ModDPP = getSrcModDPP_t16.ret; + } +} + class VOPC_NoSdst_Profile sched, ValueType vt0, ValueType vt1 = vt0> : VOPC_Profile { @@ -101,6 +115,20 @@ class VOPC_NoSdst_Profile sched, ValueType vt0, let EmitDst = 0; } +multiclass VOPC_NoSdst_Profile_t16 sched, ValueType vt0, ValueType vt1 = vt0> { + def NAME : VOPC_NoSdst_Profile; + def _t16 : VOPC_NoSdst_Profile { + let IsTrue16 = 1; + let Src1RC32 = RegisterOperand.ret>; + let Src0DPP = getVregSrcForVT_t16.ret; + let Src1DPP = getVregSrcForVT_t16.ret; + let Src2DPP = getVregSrcForVT_t16.ret; + let Src0ModDPP = getSrcModDPP_t16.ret; + let Src1ModDPP = getSrcModDPP_t16.ret; + let Src2ModDPP = getSrcModDPP_t16.ret; + } +} + class VOPC_Pseudo pattern=[], bit DefVcc = 1> : InstSI<(outs), P.Ins32, "", pattern>, @@ -197,30 +225,30 @@ class VOPCInstAlias { +multiclass VOPCInstAliases { def : VOPCInstAlias (old_name#"_e64"), !cast(real_name#"_e32_"#Arch), !cast(old_name#"_e64").Pfl.Asm32, - real_name>; + mnemonic_from>; let WaveSizePredicate = isWave32 in { def : VOPCInstAlias (old_name#"_e64"), !cast(real_name#"_e32_"#Arch), "vcc_lo, "#!cast(old_name#"_e64").Pfl.Asm32, - real_name>; + mnemonic_from>; } let WaveSizePredicate = isWave64 in { def : VOPCInstAlias (old_name#"_e64"), !cast(real_name#"_e32_"#Arch), "vcc, "#!cast(old_name#"_e64").Pfl.Asm32, - real_name>; + mnemonic_from>; } } -multiclass VOPCXInstAliases { +multiclass VOPCXInstAliases { def : VOPCInstAlias (old_name#"_e64"), !cast(real_name#"_e32_"#Arch), !cast(old_name#"_e64").Pfl.Asm32, - real_name>; + mnemonic_from>; } class getVOPCPat64 : LetDummies { @@ -363,23 +391,29 @@ multiclass VOPCX_Pseudos ; +defm VOPC_I1_F16_F16 : VOPC_Profile_t16<[Write32Bit], f16>; def VOPC_I1_F32_F32 : VOPC_Profile<[Write32Bit], f32>; def VOPC_I1_F64_F64 : VOPC_Profile<[WriteDoubleAdd], f64>; -def VOPC_I1_I16_I16 : VOPC_Profile<[Write32Bit], i16>; +defm VOPC_I1_I16_I16 : VOPC_Profile_t16<[Write32Bit], i16>; def VOPC_I1_I32_I32 : VOPC_Profile<[Write32Bit], i32>; def VOPC_I1_I64_I64 : VOPC_Profile<[Write64Bit], i64>; -def VOPC_F16_F16 : VOPC_NoSdst_Profile<[Write32Bit], f16>; +defm VOPC_F16_F16 : VOPC_NoSdst_Profile_t16<[Write32Bit], f16>; def VOPC_F32_F32 : VOPC_NoSdst_Profile<[Write32Bit], f32>; def VOPC_F64_F64 : VOPC_NoSdst_Profile<[Write64Bit], f64>; -def VOPC_I16_I16 : VOPC_NoSdst_Profile<[Write32Bit], i16>; +defm VOPC_I16_I16 : VOPC_NoSdst_Profile_t16<[Write32Bit], i16>; def VOPC_I32_I32 : VOPC_NoSdst_Profile<[Write32Bit], i32>; def VOPC_I64_I64 : VOPC_NoSdst_Profile<[Write64Bit], i64>; multiclass VOPC_F16 : - VOPC_Pseudos ; + string revOp = opName> { + let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in { + defm NAME : VOPC_Pseudos ; + } + let OtherPredicates = [HasTrue16BitInsts] in { + defm _t16 : VOPC_Pseudos ; + } +} multiclass VOPC_F32 : VOPC_Pseudos ; @@ -387,8 +421,15 @@ multiclass VOPC_F32 : VOPC_Pseudos ; -multiclass VOPC_I16 : - VOPC_Pseudos ; +multiclass VOPC_I16 { + let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in { + defm NAME : VOPC_Pseudos ; + } + let OtherPredicates = [HasTrue16BitInsts] in { + defm _t16 : VOPC_Pseudos ; + } +} multiclass VOPC_I32 : VOPC_Pseudos ; @@ -396,8 +437,14 @@ multiclass VOPC_I32 : VOPC_Pseudos ; -multiclass VOPCX_F16 : - VOPCX_Pseudos ; +multiclass VOPCX_F16 { + let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in { + defm NAME : VOPCX_Pseudos ; + } + let OtherPredicates = [HasTrue16BitInsts] in { + defm _t16 : VOPCX_Pseudos ; + } +} multiclass VOPCX_F32 : VOPCX_Pseudos ; @@ -405,8 +452,14 @@ multiclass VOPCX_F32 : multiclass VOPCX_F64 : VOPCX_Pseudos ; -multiclass VOPCX_I16 : - VOPCX_Pseudos ; +multiclass VOPCX_I16 { + let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in { + defm NAME : VOPCX_Pseudos ; + } + let OtherPredicates = [HasTrue16BitInsts] in { + defm _t16 : VOPCX_Pseudos ; + } +} multiclass VOPCX_I32 : VOPCX_Pseudos ; @@ -709,11 +762,11 @@ defm V_CMPX_T_U64 : VOPCX_I64 <"v_cmpx_t_u64">; // Class instructions //===----------------------------------------------------------------------===// -class VOPC_Class_Profile sched, ValueType vt> : - VOPC_Profile { +class VOPC_Class_Profile sched, ValueType src0VT, ValueType src1VT = i32> : + VOPC_Profile { let AsmDPP = "$src0_modifiers, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; let AsmDPP16 = AsmDPP#"$fi"; - let InsDPP = (ins FPVRegInputMods:$src0_modifiers, VGPR_32:$src0, VGPR_32:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); + let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, Src1DPP:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); let InsDPP16 = !con(InsDPP, (ins FI:$fi)); // DPP8 forbids modifiers and can inherit from VOPC_Profile @@ -734,8 +787,22 @@ class VOPC_Class_Profile sched, ValueType vt> : let HasOMod = 0; } -class VOPC_Class_NoSdst_Profile sched, ValueType vt> : - VOPC_Class_Profile { +multiclass VOPC_Class_Profile_t16 sched> { + def NAME : VOPC_Class_Profile; + def _t16 : VOPC_Class_Profile { + let IsTrue16 = 1; + let Src1RC32 = RegisterOperand.ret>; + let Src0DPP = getVregSrcForVT_t16.ret; + let Src1DPP = getVregSrcForVT_t16.ret; + let Src2DPP = getVregSrcForVT_t16.ret; + let Src0ModDPP = getSrcModDPP_t16.ret; + let Src1ModDPP = getSrcModDPP_t16.ret; + let Src2ModDPP = getSrcModDPP_t16.ret; + } +} + +class VOPC_Class_NoSdst_Profile sched, ValueType src0VT, ValueType src1VT = i32> : + VOPC_Class_Profile { let Outs64 = (outs ); let OutsSDWA = (outs ); let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, @@ -747,14 +814,29 @@ class VOPC_Class_NoSdst_Profile sched, ValueType vt> : let EmitDst = 0; } +multiclass VOPC_Class_NoSdst_Profile_t16 sched> { + def NAME : VOPC_Class_NoSdst_Profile; + def _t16 : VOPC_Class_NoSdst_Profile { + let IsTrue16 = 1; + let Src1RC32 = RegisterOperand.ret>; + let Src0DPP = getVregSrcForVT_t16.ret; + let Src1DPP = getVregSrcForVT_t16.ret; + let Src2DPP = getVregSrcForVT_t16.ret; + let Src0ModDPP = getSrcModDPP_t16.ret; + let Src1ModDPP = getSrcModDPP_t16.ret; + let Src2ModDPP = getSrcModDPP_t16.ret; + } +} + class getVOPCClassPat64 { list ret = [(set i1:$sdst, (AMDGPUfp_class (P.Src0VT (VOP3Mods P.Src0VT:$src0, i32:$src0_modifiers)), - P.Src1VT:$src1))]; + i32:$src1))]; } + // Special case for class instructions which only have modifiers on // the 1st source operand. multiclass VOPC_Class_Pseudos ; +defm VOPC_I1_F16_I16 : VOPC_Class_Profile_t16<[Write32Bit]>; def VOPC_I1_F32_I32 : VOPC_Class_Profile<[Write32Bit], f32>; def VOPC_I1_F64_I32 : VOPC_Class_Profile<[WriteDoubleAdd], f64>; -def VOPC_F16_I32 : VOPC_Class_NoSdst_Profile<[Write32Bit], f16>; +defm VOPC_F16_I16 : VOPC_Class_NoSdst_Profile_t16<[Write32Bit]>; def VOPC_F32_I32 : VOPC_Class_NoSdst_Profile<[Write32Bit], f32>; def VOPC_F64_I32 : VOPC_Class_NoSdst_Profile<[Write64Bit], f64>; -multiclass VOPC_CLASS_F16 : - VOPC_Class_Pseudos ; +multiclass VOPC_CLASS_F16 { + let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in { + defm NAME : VOPC_Class_Pseudos ; + } + let OtherPredicates = [HasTrue16BitInsts] in { + defm _t16 : VOPC_Class_Pseudos ; + } +} -multiclass VOPCX_CLASS_F16 : - VOPCX_Class_Pseudos ; +multiclass VOPCX_CLASS_F16 { + let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in { + defm NAME : VOPCX_Class_Pseudos ; + } + let OtherPredicates = [HasTrue16BitInsts] in { + defm _t16 : VOPCX_Class_Pseudos ; + } +} multiclass VOPC_CLASS_F32 : VOPC_Class_Pseudos ; @@ -882,10 +976,8 @@ defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <"v_cmpx_class_f32">; defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <"v_cmp_class_f64">; defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <"v_cmpx_class_f64">; -let SubtargetPredicate = Has16BitInsts in { defm V_CMP_CLASS_F16 : VOPC_CLASS_F16 <"v_cmp_class_f16">; defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">; -} } // End ReadsModeReg = 0, mayRaiseFPException = 0 //===----------------------------------------------------------------------===// @@ -1265,7 +1357,7 @@ let AssemblerPredicate = isGFX11Only in { } multiclass VOPC_Real_with_name_gfx11 op, string OpName, - string asm_name> { + string asm_name, string pseudo_mnemonic = ""> { defvar ps32 = !cast(OpName#"_e32"); defvar ps64 = !cast(OpName#"_e64"); let DecoderNamespace = "GFX11" in { @@ -1276,11 +1368,17 @@ let AssemblerPredicate = isGFX11Only in { // the destination-less 32bit forms add it to the asmString here. VOPC_Real, VOPCe, - MnemonicAlias, Requires<[isGFX11Plus]>; + MnemonicAlias, + Requires<[isGFX11Plus]>; def _e64_gfx11 : VOP3_Real, VOP3a_gfx11<{0, op}, ps64.Pfl>, - MnemonicAlias, Requires<[isGFX11Plus]> { + MnemonicAlias, + Requires<[isGFX11Plus]> { // Encoding used for VOPC instructions encoded as VOP3 differs from // VOP3e by destination name (sdst) as VOPC doesn't have vector dst. bits<8> sdst; @@ -1288,7 +1386,7 @@ let AssemblerPredicate = isGFX11Only in { } } // End DecoderNamespace = "GFX11" - defm : VOPCInstAliases; + defm : VOPCInstAliases; foreach _ = BoolToList.ret in { defvar psDPP = !cast(OpName #"_e32" #"_dpp"); @@ -1363,9 +1461,11 @@ let AssemblerPredicate = isGFX11Only in { } } } - } + multiclass VOPC_Real_t16_gfx11 op, string asm_name, + string OpName = NAME> : VOPC_Real_with_name_gfx11; + multiclass VOPCX_Real_gfx11 op> { defvar ps32 = !cast(NAME#"_nosdst_e32"); defvar ps64 = !cast(NAME#"_nosdst_e64"); @@ -1426,20 +1526,24 @@ let AssemblerPredicate = isGFX11Only in { } multiclass VOPCX_Real_with_name_gfx11 op, string OpName, - string asm_name> { + string asm_name, string pseudo_mnemonic = ""> { defvar ps32 = !cast(OpName#"_nosdst_e32"); defvar ps64 = !cast(OpName#"_nosdst_e64"); let DecoderNamespace = "GFX11" in { def _e32_gfx11 : VOPC_Real, - MnemonicAlias, + MnemonicAlias, Requires<[isGFX11Plus]>, VOPCe { let AsmString = asm_name # "{_e32} " # ps32.AsmOperands; } def _e64_gfx11 : VOP3_Real, - MnemonicAlias, + MnemonicAlias, Requires<[isGFX11Plus]>, VOP3a_gfx11<{0, op}, ps64.Pfl> { let Inst{7-0} = ? ; // sdst @@ -1447,7 +1551,7 @@ let AssemblerPredicate = isGFX11Only in { } } // End DecoderNamespace = "GFX11" - defm : VOPCXInstAliases; + defm : VOPCXInstAliases; foreach _ = BoolToList.ret in { defvar psDPP = !cast(OpName#"_nosdst_e32"#"_dpp"); @@ -1476,26 +1580,30 @@ let AssemblerPredicate = isGFX11Only in { } } } - } + + multiclass VOPCX_Real_t16_gfx11 op, string asm_name, + string OpName = NAME> : VOPCX_Real_with_name_gfx11; + + } // End AssemblerPredicate = isGFX11Only -defm V_CMP_F_F16 : VOPC_Real_gfx11<0x000>; -defm V_CMP_LT_F16 : VOPC_Real_gfx11<0x001>; -defm V_CMP_EQ_F16 : VOPC_Real_gfx11<0x002>; -defm V_CMP_LE_F16 : VOPC_Real_gfx11<0x003>; -defm V_CMP_GT_F16 : VOPC_Real_gfx11<0x004>; -defm V_CMP_LG_F16 : VOPC_Real_gfx11<0x005>; -defm V_CMP_GE_F16 : VOPC_Real_gfx11<0x006>; -defm V_CMP_O_F16 : VOPC_Real_gfx11<0x007>; -defm V_CMP_U_F16 : VOPC_Real_gfx11<0x008>; -defm V_CMP_NGE_F16 : VOPC_Real_gfx11<0x009>; -defm V_CMP_NLG_F16 : VOPC_Real_gfx11<0x00a>; -defm V_CMP_NGT_F16 : VOPC_Real_gfx11<0x00b>; -defm V_CMP_NLE_F16 : VOPC_Real_gfx11<0x00c>; -defm V_CMP_NEQ_F16 : VOPC_Real_gfx11<0x00d>; -defm V_CMP_NLT_F16 : VOPC_Real_gfx11<0x00e>; -defm V_CMP_T_F16 : VOPC_Real_with_name_gfx11<0x00f, "V_CMP_TRU_F16", "v_cmp_t_f16">; +defm V_CMP_F_F16_t16 : VOPC_Real_t16_gfx11<0x000, "v_cmp_f_f16">; +defm V_CMP_LT_F16_t16 : VOPC_Real_t16_gfx11<0x001, "v_cmp_lt_f16">; +defm V_CMP_EQ_F16_t16 : VOPC_Real_t16_gfx11<0x002, "v_cmp_eq_f16">; +defm V_CMP_LE_F16_t16 : VOPC_Real_t16_gfx11<0x003, "v_cmp_le_f16">; +defm V_CMP_GT_F16_t16 : VOPC_Real_t16_gfx11<0x004, "v_cmp_gt_f16">; +defm V_CMP_LG_F16_t16 : VOPC_Real_t16_gfx11<0x005, "v_cmp_lg_f16">; +defm V_CMP_GE_F16_t16 : VOPC_Real_t16_gfx11<0x006, "v_cmp_ge_f16">; +defm V_CMP_O_F16_t16 : VOPC_Real_t16_gfx11<0x007, "v_cmp_o_f16">; +defm V_CMP_U_F16_t16 : VOPC_Real_t16_gfx11<0x008, "v_cmp_u_f16">; +defm V_CMP_NGE_F16_t16 : VOPC_Real_t16_gfx11<0x009, "v_cmp_nge_f16">; +defm V_CMP_NLG_F16_t16 : VOPC_Real_t16_gfx11<0x00a, "v_cmp_nlg_f16">; +defm V_CMP_NGT_F16_t16 : VOPC_Real_t16_gfx11<0x00b, "v_cmp_ngt_f16">; +defm V_CMP_NLE_F16_t16 : VOPC_Real_t16_gfx11<0x00c, "v_cmp_nle_f16">; +defm V_CMP_NEQ_F16_t16 : VOPC_Real_t16_gfx11<0x00d, "v_cmp_neq_f16">; +defm V_CMP_NLT_F16_t16 : VOPC_Real_t16_gfx11<0x00e, "v_cmp_nlt_f16">; +defm V_CMP_T_F16_t16 : VOPC_Real_with_name_gfx11<0x00f, "V_CMP_TRU_F16_t16", "v_cmp_t_f16", "v_cmp_tru_f16">; defm V_CMP_F_F32 : VOPC_Real_gfx11<0x010>; defm V_CMP_LT_F32 : VOPC_Real_gfx11<0x011>; defm V_CMP_EQ_F32 : VOPC_Real_gfx11<0x012>; @@ -1513,18 +1621,18 @@ defm V_CMP_NEQ_F32 : VOPC_Real_gfx11<0x01d>; defm V_CMP_NLT_F32 : VOPC_Real_gfx11<0x01e>; defm V_CMP_T_F32 : VOPC_Real_with_name_gfx11<0x01f, "V_CMP_TRU_F32", "v_cmp_t_f32">; defm V_CMP_T_F64 : VOPC_Real_with_name_gfx11<0x02f, "V_CMP_TRU_F64", "v_cmp_t_f64">; -defm V_CMP_LT_I16 : VOPC_Real_gfx11<0x031>; -defm V_CMP_EQ_I16 : VOPC_Real_gfx11<0x032>; -defm V_CMP_LE_I16 : VOPC_Real_gfx11<0x033>; -defm V_CMP_GT_I16 : VOPC_Real_gfx11<0x034>; -defm V_CMP_NE_I16 : VOPC_Real_gfx11<0x035>; -defm V_CMP_GE_I16 : VOPC_Real_gfx11<0x036>; -defm V_CMP_LT_U16 : VOPC_Real_gfx11<0x039>; -defm V_CMP_EQ_U16 : VOPC_Real_gfx11<0x03a>; -defm V_CMP_LE_U16 : VOPC_Real_gfx11<0x03b>; -defm V_CMP_GT_U16 : VOPC_Real_gfx11<0x03c>; -defm V_CMP_NE_U16 : VOPC_Real_gfx11<0x03d>; -defm V_CMP_GE_U16 : VOPC_Real_gfx11<0x03e>; +defm V_CMP_LT_I16_t16 : VOPC_Real_t16_gfx11<0x031, "v_cmp_lt_i16">; +defm V_CMP_EQ_I16_t16 : VOPC_Real_t16_gfx11<0x032, "v_cmp_eq_i16">; +defm V_CMP_LE_I16_t16 : VOPC_Real_t16_gfx11<0x033, "v_cmp_le_i16">; +defm V_CMP_GT_I16_t16 : VOPC_Real_t16_gfx11<0x034, "v_cmp_gt_i16">; +defm V_CMP_NE_I16_t16 : VOPC_Real_t16_gfx11<0x035, "v_cmp_ne_i16">; +defm V_CMP_GE_I16_t16 : VOPC_Real_t16_gfx11<0x036, "v_cmp_ge_i16">; +defm V_CMP_LT_U16_t16 : VOPC_Real_t16_gfx11<0x039, "v_cmp_lt_u16">; +defm V_CMP_EQ_U16_t16 : VOPC_Real_t16_gfx11<0x03a, "v_cmp_eq_u16">; +defm V_CMP_LE_U16_t16 : VOPC_Real_t16_gfx11<0x03b, "v_cmp_le_u16">; +defm V_CMP_GT_U16_t16 : VOPC_Real_t16_gfx11<0x03c, "v_cmp_gt_u16">; +defm V_CMP_NE_U16_t16 : VOPC_Real_t16_gfx11<0x03d, "v_cmp_ne_u16">; +defm V_CMP_GE_U16_t16 : VOPC_Real_t16_gfx11<0x03e, "v_cmp_ge_u16">; defm V_CMP_F_I32 : VOPC_Real_gfx11<0x040>; defm V_CMP_LT_I32 : VOPC_Real_gfx11<0x041>; defm V_CMP_EQ_I32 : VOPC_Real_gfx11<0x042>; @@ -1559,26 +1667,26 @@ defm V_CMP_NE_U64 : VOPC_Real_gfx11<0x05d>; defm V_CMP_GE_U64 : VOPC_Real_gfx11<0x05e>; defm V_CMP_T_U64 : VOPC_Real_gfx11<0x05f>; -defm V_CMP_CLASS_F16 : VOPC_Real_gfx11<0x07d>; +defm V_CMP_CLASS_F16_t16 : VOPC_Real_t16_gfx11<0x07d, "v_cmp_class_f16">; defm V_CMP_CLASS_F32 : VOPC_Real_gfx11<0x07e>; defm V_CMP_CLASS_F64 : VOPC_Real_gfx11<0x07f>; -defm V_CMPX_F_F16 : VOPCX_Real_gfx11<0x080>; -defm V_CMPX_LT_F16 : VOPCX_Real_gfx11<0x081>; -defm V_CMPX_EQ_F16 : VOPCX_Real_gfx11<0x082>; -defm V_CMPX_LE_F16 : VOPCX_Real_gfx11<0x083>; -defm V_CMPX_GT_F16 : VOPCX_Real_gfx11<0x084>; -defm V_CMPX_LG_F16 : VOPCX_Real_gfx11<0x085>; -defm V_CMPX_GE_F16 : VOPCX_Real_gfx11<0x086>; -defm V_CMPX_O_F16 : VOPCX_Real_gfx11<0x087>; -defm V_CMPX_U_F16 : VOPCX_Real_gfx11<0x088>; -defm V_CMPX_NGE_F16 : VOPCX_Real_gfx11<0x089>; -defm V_CMPX_NLG_F16 : VOPCX_Real_gfx11<0x08a>; -defm V_CMPX_NGT_F16 : VOPCX_Real_gfx11<0x08b>; -defm V_CMPX_NLE_F16 : VOPCX_Real_gfx11<0x08c>; -defm V_CMPX_NEQ_F16 : VOPCX_Real_gfx11<0x08d>; -defm V_CMPX_NLT_F16 : VOPCX_Real_gfx11<0x08e>; -defm V_CMPX_T_F16 : VOPCX_Real_with_name_gfx11<0x08f, "V_CMPX_TRU_F16", "v_cmpx_t_f16">; +defm V_CMPX_F_F16_t16 : VOPCX_Real_t16_gfx11<0x080, "v_cmpx_f_f16">; +defm V_CMPX_LT_F16_t16 : VOPCX_Real_t16_gfx11<0x081, "v_cmpx_lt_f16">; +defm V_CMPX_EQ_F16_t16 : VOPCX_Real_t16_gfx11<0x082, "v_cmpx_eq_f16">; +defm V_CMPX_LE_F16_t16 : VOPCX_Real_t16_gfx11<0x083, "v_cmpx_le_f16">; +defm V_CMPX_GT_F16_t16 : VOPCX_Real_t16_gfx11<0x084, "v_cmpx_gt_f16">; +defm V_CMPX_LG_F16_t16 : VOPCX_Real_t16_gfx11<0x085, "v_cmpx_lg_f16">; +defm V_CMPX_GE_F16_t16 : VOPCX_Real_t16_gfx11<0x086, "v_cmpx_ge_f16">; +defm V_CMPX_O_F16_t16 : VOPCX_Real_t16_gfx11<0x087, "v_cmpx_o_f16">; +defm V_CMPX_U_F16_t16 : VOPCX_Real_t16_gfx11<0x088, "v_cmpx_u_f16">; +defm V_CMPX_NGE_F16_t16 : VOPCX_Real_t16_gfx11<0x089, "v_cmpx_nge_f16">; +defm V_CMPX_NLG_F16_t16 : VOPCX_Real_t16_gfx11<0x08a, "v_cmpx_nlg_f16">; +defm V_CMPX_NGT_F16_t16 : VOPCX_Real_t16_gfx11<0x08b, "v_cmpx_ngt_f16">; +defm V_CMPX_NLE_F16_t16 : VOPCX_Real_t16_gfx11<0x08c, "v_cmpx_nle_f16">; +defm V_CMPX_NEQ_F16_t16 : VOPCX_Real_t16_gfx11<0x08d, "v_cmpx_neq_f16">; +defm V_CMPX_NLT_F16_t16 : VOPCX_Real_t16_gfx11<0x08e, "v_cmpx_nlt_f16">; +defm V_CMPX_T_F16_t16 : VOPCX_Real_with_name_gfx11<0x08f, "V_CMPX_TRU_F16_t16", "v_cmpx_t_f16", "v_cmpx_tru_f16">; defm V_CMPX_F_F32 : VOPCX_Real_gfx11<0x090>; defm V_CMPX_LT_F32 : VOPCX_Real_gfx11<0x091>; defm V_CMPX_EQ_F32 : VOPCX_Real_gfx11<0x092>; @@ -1613,18 +1721,18 @@ defm V_CMPX_NEQ_F64 : VOPCX_Real_gfx11<0x0ad>; defm V_CMPX_NLT_F64 : VOPCX_Real_gfx11<0x0ae>; defm V_CMPX_T_F64 : VOPCX_Real_with_name_gfx11<0x0af, "V_CMPX_TRU_F64", "v_cmpx_t_f64">; -defm V_CMPX_LT_I16 : VOPCX_Real_gfx11<0x0b1>; -defm V_CMPX_EQ_I16 : VOPCX_Real_gfx11<0x0b2>; -defm V_CMPX_LE_I16 : VOPCX_Real_gfx11<0x0b3>; -defm V_CMPX_GT_I16 : VOPCX_Real_gfx11<0x0b4>; -defm V_CMPX_NE_I16 : VOPCX_Real_gfx11<0x0b5>; -defm V_CMPX_GE_I16 : VOPCX_Real_gfx11<0x0b6>; -defm V_CMPX_LT_U16 : VOPCX_Real_gfx11<0x0b9>; -defm V_CMPX_EQ_U16 : VOPCX_Real_gfx11<0x0ba>; -defm V_CMPX_LE_U16 : VOPCX_Real_gfx11<0x0bb>; -defm V_CMPX_GT_U16 : VOPCX_Real_gfx11<0x0bc>; -defm V_CMPX_NE_U16 : VOPCX_Real_gfx11<0x0bd>; -defm V_CMPX_GE_U16 : VOPCX_Real_gfx11<0x0be>; +defm V_CMPX_LT_I16_t16 : VOPCX_Real_t16_gfx11<0x0b1, "v_cmpx_lt_i16">; +defm V_CMPX_EQ_I16_t16 : VOPCX_Real_t16_gfx11<0x0b2, "v_cmpx_eq_i16">; +defm V_CMPX_LE_I16_t16 : VOPCX_Real_t16_gfx11<0x0b3, "v_cmpx_le_i16">; +defm V_CMPX_GT_I16_t16 : VOPCX_Real_t16_gfx11<0x0b4, "v_cmpx_gt_i16">; +defm V_CMPX_NE_I16_t16 : VOPCX_Real_t16_gfx11<0x0b5, "v_cmpx_ne_i16">; +defm V_CMPX_GE_I16_t16 : VOPCX_Real_t16_gfx11<0x0b6, "v_cmpx_ge_i16">; +defm V_CMPX_LT_U16_t16 : VOPCX_Real_t16_gfx11<0x0b9, "v_cmpx_lt_u16">; +defm V_CMPX_EQ_U16_t16 : VOPCX_Real_t16_gfx11<0x0ba, "v_cmpx_eq_u16">; +defm V_CMPX_LE_U16_t16 : VOPCX_Real_t16_gfx11<0x0bb, "v_cmpx_le_u16">; +defm V_CMPX_GT_U16_t16 : VOPCX_Real_t16_gfx11<0x0bc, "v_cmpx_gt_u16">; +defm V_CMPX_NE_U16_t16 : VOPCX_Real_t16_gfx11<0x0bd, "v_cmpx_ne_u16">; +defm V_CMPX_GE_U16_t16 : VOPCX_Real_t16_gfx11<0x0be, "v_cmpx_ge_u16">; defm V_CMPX_F_I32 : VOPCX_Real_gfx11<0x0c0>; defm V_CMPX_LT_I32 : VOPCX_Real_gfx11<0x0c1>; defm V_CMPX_EQ_I32 : VOPCX_Real_gfx11<0x0c2>; @@ -1658,7 +1766,7 @@ defm V_CMPX_GT_U64 : VOPCX_Real_gfx11<0x0dc>; defm V_CMPX_NE_U64 : VOPCX_Real_gfx11<0x0dd>; defm V_CMPX_GE_U64 : VOPCX_Real_gfx11<0x0de>; defm V_CMPX_T_U64 : VOPCX_Real_gfx11<0x0df>; -defm V_CMPX_CLASS_F16 : VOPCX_Real_gfx11<0x0fd>; +defm V_CMPX_CLASS_F16_t16 : VOPCX_Real_t16_gfx11<0x0fd, "v_cmpx_class_f16">; defm V_CMPX_CLASS_F32 : VOPCX_Real_gfx11<0x0fe>; defm V_CMPX_CLASS_F64 : VOPCX_Real_gfx11<0x0ff>; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index b65ca2d..b3c418a 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -24,6 +24,9 @@ class LetDummies { list SchedRW; list Uses; list Defs; + list OtherPredicates; + Predicate AssemblerPredicate; + string DecoderNamespace; } class VOP { @@ -61,6 +64,8 @@ class VOP_Pseudo (NAME); + bit IsTrue16 = P.IsTrue16; VOPProfile Pfl = P; string AsmOperands; @@ -1351,14 +1356,13 @@ let AssemblerPredicate = isGFX11Only, foreach _ = BoolToList.ret in def _e64_gfx11 : VOP3_Real, - VOP3OpSel_gfx11, - MnemonicAlias, Requires<[isGFX11Plus]>; + VOP3OpSel_gfx11; foreach _ = BoolToList.ret in def _e64_gfx11 : VOP3_Real, - VOP3e_gfx11, - MnemonicAlias, Requires<[isGFX11Plus]>; + VOP3e_gfx11; } + def _gfx11_VOP3_alias : MnemonicAlias, Requires<[isGFX11Plus]>, LetDummies; } // for READLANE/WRITELANE multiclass VOP3_Real_No_Suffix_gfx11 op, string opName = NAME> { @@ -1461,6 +1465,10 @@ multiclass VOP3Only_Realtriple_with_name_gfx11 op, string opName, string asmName> : VOP3_Realtriple_with_name_gfx11; +multiclass VOP3Only_Realtriple_t16_gfx11 op, string asmName, + string opName = NAME> + : VOP3Only_Realtriple_with_name_gfx11; + multiclass VOP3be_Realtriple_gfx11< bits<10> op, bit isSingle = 0, string opName = NAME, string asmName = !cast(opName#"_e64").Mnemonic> : @@ -1503,3 +1511,12 @@ class VOPC64Table : GenericTable { def VOPC64DPPTable : VOPC64Table<"DPP">; def VOPC64DPP8Table : VOPC64Table<"DPP8">; + +def VOPTrue16Table : GenericTable { + let FilterClass = "VOP_Pseudo"; + let CppTypeName = "VOPTrue16Info"; + let Fields = ["Opcode", "IsTrue16"]; + + let PrimaryKey = ["Opcode"]; + let PrimaryKeyName = "getTrue16OpcodeHelper"; +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir index e96968c..4ff4c91 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir @@ -108,8 +108,8 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s16) = G_TRUNC %0 @@ -201,8 +201,8 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -247,8 +247,8 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX11-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_ASHRREV_I16_e64_]], 0, 16, implicit $exec + ; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_ASHRREV_I16_t16_e64_]], 0, 16, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -446,8 +446,8 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_t16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:sgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir index eef2372..b72c4d6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir @@ -39,7 +39,7 @@ body: | ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %2 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 @@ -82,7 +82,7 @@ body: | ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %2 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir index 5482905..ac60a5e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir @@ -73,7 +73,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -110,7 +110,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -147,7 +147,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -184,7 +184,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -221,7 +221,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -257,7 +257,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -294,7 +294,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -331,7 +331,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -368,7 +368,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -405,7 +405,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -442,7 +442,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -479,7 +479,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -516,7 +516,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -553,7 +553,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir index ffc3b3b..ec50b56 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir @@ -25,7 +25,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -56,7 +56,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %5 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir index dff5a3a..27f275b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir @@ -25,7 +25,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -56,7 +56,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %5 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir index d35beb3..1768795 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir @@ -25,7 +25,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -56,7 +56,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %5 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir index e4560ce..34b50f6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir @@ -25,7 +25,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %4 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -56,7 +56,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %5 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir index c4ac25b..7d2c3f4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir @@ -117,21 +117,21 @@ body: | ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr0 = COPY %2 ; VI-LABEL: name: fptosi_s16_to_s32_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec ; VI-NEXT: $vgpr0 = COPY %2 ; GFX11-LABEL: name: fptosi_s16_to_s32_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec ; GFX11-NEXT: $vgpr0 = COPY %2 %0:vgpr(s32) = COPY $vgpr0 @@ -154,21 +154,21 @@ body: | ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr0 = COPY %2 ; VI-LABEL: name: fptosi_s16_to_s32_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec ; VI-NEXT: $vgpr0 = COPY %2 ; GFX11-LABEL: name: fptosi_s16_to_s32_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec ; GFX11-NEXT: $vgpr0 = COPY %2 %0:sgpr(s32) = COPY $sgpr0 @@ -193,7 +193,7 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr0 = COPY %3 ; VI-LABEL: name: fptosi_s16_to_s32_fneg_vv @@ -202,7 +202,7 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec ; VI-NEXT: $vgpr0 = COPY %3 ; GFX11-LABEL: name: fptosi_s16_to_s32_fneg_vv @@ -211,7 +211,7 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec ; GFX11-NEXT: $vgpr0 = COPY %3 %0:vgpr(s32) = COPY $vgpr0 @@ -235,21 +235,21 @@ body: | ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit %2 ; VI-LABEL: name: fptosi_s16_to_s1_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec ; VI-NEXT: S_ENDPGM 0, implicit %2 ; GFX11-LABEL: name: fptosi_s16_to_s1_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %2 %0:vgpr(s32) = COPY $vgpr0 @@ -273,21 +273,21 @@ body: | ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit %2 ; VI-LABEL: name: fptosi_s16_to_s1_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec ; VI-NEXT: S_ENDPGM 0, implicit %2 ; GFX11-LABEL: name: fptosi_s16_to_s1_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %2 %0:sgpr(s32) = COPY $sgpr0 @@ -313,7 +313,7 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GCN-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit %3 ; VI-LABEL: name: fptosi_s16_to_s1_fneg_vv @@ -322,7 +322,7 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec ; VI-NEXT: S_ENDPGM 0, implicit %3 ; GFX11-LABEL: name: fptosi_s16_to_s1_fneg_vv @@ -331,7 +331,7 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %3 %0:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir index 580f49a..09a8862 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir @@ -73,21 +73,21 @@ body: | ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr0 = COPY %2 ; VI-LABEL: name: fptoui_s16_to_s32_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec ; VI-NEXT: $vgpr0 = COPY %2 ; GFX11-LABEL: name: fptoui_s16_to_s32_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec ; GFX11-NEXT: $vgpr0 = COPY %2 %0:vgpr(s32) = COPY $vgpr0 @@ -110,21 +110,21 @@ body: | ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr0 = COPY %2 ; VI-LABEL: name: fptoui_s16_to_s32_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec ; VI-NEXT: $vgpr0 = COPY %2 ; GFX11-LABEL: name: fptoui_s16_to_s32_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec ; GFX11-NEXT: $vgpr0 = COPY %2 %0:sgpr(s32) = COPY $sgpr0 @@ -149,7 +149,7 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr0 = COPY %3 ; VI-LABEL: name: fptoui_s16_to_s32_fneg_vv @@ -158,7 +158,7 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec ; VI-NEXT: $vgpr0 = COPY %3 ; GFX11-LABEL: name: fptoui_s16_to_s32_fneg_vv @@ -167,7 +167,7 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec ; GFX11-NEXT: $vgpr0 = COPY %3 %0:vgpr(s32) = COPY $vgpr0 @@ -191,21 +191,21 @@ body: | ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit %2 ; VI-LABEL: name: fptoui_s16_to_s1_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec ; VI-NEXT: S_ENDPGM 0, implicit %2 ; GFX11-LABEL: name: fptoui_s16_to_s1_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %2 %0:vgpr(s32) = COPY $vgpr0 @@ -229,21 +229,21 @@ body: | ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit %2 ; VI-LABEL: name: fptoui_s16_to_s1_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec ; VI-NEXT: S_ENDPGM 0, implicit %2 ; GFX11-LABEL: name: fptoui_s16_to_s1_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %2 %0:sgpr(s32) = COPY $sgpr0 @@ -269,7 +269,7 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GCN-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit %3 ; VI-LABEL: name: fptoui_s16_to_s1_fneg_vv @@ -278,7 +278,7 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec ; VI-NEXT: S_ENDPGM 0, implicit %3 ; GFX11-LABEL: name: fptoui_s16_to_s1_fneg_vv @@ -287,7 +287,7 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit %3 %0:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir index 3045043..3c2c375 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir @@ -32,8 +32,8 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] + ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:sgpr(s16) = G_TRUNC %0 @@ -71,8 +71,8 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] + ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s16) = G_TRUNC %0 @@ -110,8 +110,8 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] + ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -149,8 +149,8 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]] + ; GFX11-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -188,8 +188,8 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]] + ; GFX11-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -227,8 +227,8 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]] + ; GFX11-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -266,8 +266,8 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]] + ; GFX11-NEXT: [[V_CMP_LT_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -305,8 +305,8 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]] + ; GFX11-NEXT: [[V_CMP_LE_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir index 5937c9d..ddb80f4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir @@ -106,8 +106,8 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s16) = G_TRUNC %0 @@ -199,8 +199,8 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -245,8 +245,8 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX11-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_LSHRREV_B16_e64_]], 0, 16, implicit $exec + ; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_LSHRREV_B16_t16_e64_]], 0, 16, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -444,8 +444,8 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_t16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:sgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir index 4749e70..e0ce71e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir @@ -90,9 +90,9 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[V_MAX_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_e64_]] + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -143,9 +143,9 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[V_MIN_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MIN_I16_e64_]] + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MIN_I16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -197,10 +197,10 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX11-NEXT: [[V_MIN_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[V_MAX_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_t16_e64 [[V_MIN_I16_t16_e64_]], [[COPY2]], implicit $exec ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_e64_]] + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir index 5048368..7f4b778 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir @@ -90,9 +90,9 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[V_MAX_U16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_e64_]] + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -143,9 +143,9 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[V_MIN_U16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MIN_U16_e64_]] + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MIN_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -197,10 +197,10 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX11-NEXT: [[V_MIN_U16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[V_MAX_U16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_t16_e64 [[V_MIN_U16_t16_e64_]], [[COPY2]], implicit $exec ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_e64_]] + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir index 989ac52..6e8a379c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir @@ -106,8 +106,8 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; GFX11-NEXT: [[V_LSHLREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s16) = G_TRUNC %0 @@ -199,8 +199,8 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; GFX11-NEXT: [[V_LSHLREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -245,8 +245,8 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX11-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_LSHLREV_B16_e64_]], 0, 16, implicit $exec + ; GFX11-NEXT: [[V_LSHLREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_LSHLREV_B16_t16_e64_]], 0, 16, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -444,8 +444,8 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; GFX11-NEXT: [[V_LSHLREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_t16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:sgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir index 1aa8d5d..043d93a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir @@ -74,21 +74,21 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec - ; WAVE64-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[V_CVT_F32_I32_e32_]], implicit $mode, implicit $exec + ; WAVE64-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec ; WAVE64-NEXT: $vgpr0 = COPY %1 ; WAVE32-LABEL: name: sitofp_s32_to_s16_vv ; WAVE32: liveins: $vgpr0 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec - ; WAVE32-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[V_CVT_F32_I32_e32_]], implicit $mode, implicit $exec + ; WAVE32-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec ; WAVE32-NEXT: $vgpr0 = COPY %1 ; GFX11-LABEL: name: sitofp_s32_to_s16_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[V_CVT_F32_I32_e32_]], implicit $mode, implicit $exec + ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: $vgpr0 = COPY %1 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_SITOFP %0 @@ -111,21 +111,21 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; WAVE64-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec - ; WAVE64-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[V_CVT_F32_I32_e32_]], implicit $mode, implicit $exec + ; WAVE64-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec ; WAVE64-NEXT: $vgpr0 = COPY %1 ; WAVE32-LABEL: name: sitofp_s32_to_s16_vs ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; WAVE32-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec - ; WAVE32-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[V_CVT_F32_I32_e32_]], implicit $mode, implicit $exec + ; WAVE32-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec ; WAVE32-NEXT: $vgpr0 = COPY %1 ; GFX11-LABEL: name: sitofp_s32_to_s16_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX11-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[V_CVT_F32_I32_e32_]], implicit $mode, implicit $exec + ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: $vgpr0 = COPY %1 %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s16) = G_SITOFP %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir index 4f381c8..a9419b9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir @@ -84,21 +84,21 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec - ; WAVE64-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[V_CVT_F32_U32_e32_]], implicit $mode, implicit $exec + ; WAVE64-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec ; WAVE64-NEXT: $vgpr0 = COPY %1 ; WAVE32-LABEL: name: uitofp_s32_to_s16_vv ; WAVE32: liveins: $vgpr0 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec - ; WAVE32-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[V_CVT_F32_U32_e32_]], implicit $mode, implicit $exec + ; WAVE32-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec ; WAVE32-NEXT: $vgpr0 = COPY %1 ; GFX11-LABEL: name: uitofp_s32_to_s16_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[V_CVT_F32_U32_e32_]], implicit $mode, implicit $exec + ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: $vgpr0 = COPY %1 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_UITOFP %0 @@ -121,21 +121,21 @@ body: | ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; WAVE64-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec - ; WAVE64-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[V_CVT_F32_U32_e32_]], implicit $mode, implicit $exec + ; WAVE64-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec ; WAVE64-NEXT: $vgpr0 = COPY %1 ; WAVE32-LABEL: name: uitofp_s32_to_s16_vs ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; WAVE32-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec - ; WAVE32-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[V_CVT_F32_U32_e32_]], implicit $mode, implicit $exec + ; WAVE32-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec ; WAVE32-NEXT: $vgpr0 = COPY %1 ; GFX11-LABEL: name: uitofp_s32_to_s16_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX11-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[V_CVT_F32_U32_e32_]], implicit $mode, implicit $exec + ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: $vgpr0 = COPY %1 %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s16) = G_UITOFP %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll index 2734984..c26fba5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll @@ -51,7 +51,7 @@ define amdgpu_kernel void @asm_simple_agpr_clobber() { define i32 @asm_vgpr_early_clobber() { ; CHECK-LABEL: name: asm_vgpr_early_clobber ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %0, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %1, !0 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1966091 /* regdef-ec:VGPR_32 */, def early-clobber %0, 1966091 /* regdef-ec:VGPR_32 */, def early-clobber %1, !0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] @@ -79,7 +79,7 @@ entry: define i32 @test_single_vgpr_output() nounwind { ; CHECK-LABEL: name: test_single_vgpr_output ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %0 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:VGPR_32 */, def %0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 @@ -91,7 +91,7 @@ entry: define i32 @test_single_sgpr_output_s32() nounwind { ; CHECK-LABEL: name: test_single_sgpr_output_s32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %0 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2097162 /* regdef:SReg_32 */, def %0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 @@ -104,7 +104,7 @@ entry: define float @test_multiple_register_outputs_same() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_same ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %0, 1835018 /* regdef:VGPR_32 */, def %1 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1966090 /* regdef:VGPR_32 */, def %0, 1966090 /* regdef:VGPR_32 */, def %1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] @@ -121,7 +121,7 @@ define float @test_multiple_register_outputs_same() #0 { define double @test_multiple_register_outputs_mixed() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_mixed ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %0, 2949130 /* regdef:VReg_64 */, def %1 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1966090 /* regdef:VGPR_32 */, def %0, 3211274 /* regdef:VReg_64 */, def %1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) @@ -153,7 +153,7 @@ define amdgpu_kernel void @test_input_vgpr_imm() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[C]](s32) - ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, $0", 1 /* sideeffect attdialect */, 1835017 /* reguse:VGPR_32 */, [[COPY]] + ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, $0", 1 /* sideeffect attdialect */, 1966089 /* reguse:VGPR_32 */, [[COPY]] ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "v_mov_b32 v0, $0", "v"(i32 42) ret void @@ -164,7 +164,7 @@ define amdgpu_kernel void @test_input_sgpr_imm() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[C]](s32) - ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 1966089 /* reguse:SReg_32 */, [[COPY]] + ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 2097161 /* reguse:SReg_32 */, [[COPY]] ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "s_mov_b32 s0, $0", "s"(i32 42) ret void @@ -188,7 +188,7 @@ define float @test_input_vgpr(i32 %src) nounwind { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 1835017 /* reguse:VGPR_32 */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1966090 /* regdef:VGPR_32 */, def %1, 1966089 /* reguse:VGPR_32 */, [[COPY1]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %1 ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 @@ -203,7 +203,7 @@ define i32 @test_memory_constraint(i32 addrspace(3)* %a) nounwind { ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 196622 /* mem:m */, [[COPY]](p3) + ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1966090 /* regdef:VGPR_32 */, def %1, 196622 /* mem:m */, [[COPY]](p3) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 @@ -220,7 +220,7 @@ define i32 @test_vgpr_matching_constraint(i32 %a) nounwind { ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32) - ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %3, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) + ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1966090 /* regdef:VGPR_32 */, def %3, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %3 ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 @@ -232,13 +232,13 @@ define i32 @test_vgpr_matching_constraint(i32 %a) nounwind { define i32 @test_sgpr_matching_constraint() nounwind { ; CHECK-LABEL: name: test_sgpr_matching_constraint ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %0 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2097162 /* regdef:SReg_32 */, def %0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %2 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 2097162 /* regdef:SReg_32 */, def %2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %4, 1966089 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 2097162 /* regdef:SReg_32 */, def %4, 2097161 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY %4 ; CHECK-NEXT: $vgpr0 = COPY [[COPY4]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 @@ -261,7 +261,7 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %3, 1835018 /* regdef:VGPR_32 */, def %4, 1835018 /* regdef:VGPR_32 */, def %5, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5) + ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1966090 /* regdef:VGPR_32 */, def %3, 1966090 /* regdef:VGPR_32 */, def %4, 1966090 /* regdef:VGPR_32 */, def %5, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY %3 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %4 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %5 @@ -282,10 +282,10 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind { define i32 @test_sgpr_to_vgpr_move_matching_constraint() nounwind { ; CHECK-LABEL: name: test_sgpr_to_vgpr_move_matching_constraint ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %0 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2097162 /* regdef:SReg_32 */, def %0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1966090 /* regdef:VGPR_32 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll index d2240a0..3179f8b 100644 --- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll +++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll @@ -536,10 +536,10 @@ define internal void @use256vgprs() { ; GFX10WGP-WAVE64: NumVgprs: 256 ; GFX10CU-WAVE32: NumVgprs: 256 ; GFX10CU-WAVE64: NumVgprs: 256 -; GFX11WGP-WAVE32: NumVgprs: 128 -; GFX11WGP-WAVE64: NumVgprs: 128 -; GFX11CU-WAVE32: NumVgprs: 128 -; GFX11CU-WAVE64: NumVgprs: 128 +; GFX11WGP-WAVE32: NumVgprs: 256 +; GFX11WGP-WAVE64: NumVgprs: 256 +; GFX11CU-WAVE32: NumVgprs: 256 +; GFX11CU-WAVE64: NumVgprs: 256 define amdgpu_kernel void @f256() #256 { call void @use256vgprs() ret void @@ -555,8 +555,8 @@ attributes #256 = { nounwind "amdgpu-flat-work-group-size"="256,256" } ; GFX10WGP-WAVE64: NumVgprs: 256 ; GFX10CU-WAVE32: NumVgprs: 128 ; GFX10CU-WAVE64: NumVgprs: 128 -; GFX11WGP-WAVE32: NumVgprs: 128 -; GFX11WGP-WAVE64: NumVgprs: 128 +; GFX11WGP-WAVE32: NumVgprs: 256 +; GFX11WGP-WAVE64: NumVgprs: 256 ; GFX11CU-WAVE32: NumVgprs: 128 ; GFX11CU-WAVE64: NumVgprs: 128 define amdgpu_kernel void @f512() #512 { diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-early-clobber-subreg.mir b/llvm/test/CodeGen/AMDGPU/coalescer-early-clobber-subreg.mir index a25f22f..327f11d 100644 --- a/llvm/test/CodeGen/AMDGPU/coalescer-early-clobber-subreg.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-early-clobber-subreg.mir @@ -20,7 +20,7 @@ body: | ; CHECK-LABEL: name: foo1 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def undef %2.sub0, 1835019 /* regdef-ec:VGPR_32 */, def undef early-clobber %2.sub1 + ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835018 /* regdef:VRegOrLds_32 */, def undef %2.sub0, 1835019 /* regdef-ec:VRegOrLds_32 */, def undef early-clobber %2.sub1 ; CHECK-NEXT: FLAT_STORE_DWORDX2 $vgpr0_vgpr1, %2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; CHECK-NEXT: S_ENDPGM 0 INLINEASM &"", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %0:vgpr_32, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %1:vgpr_32 @@ -41,7 +41,7 @@ body: | ; CHECK-LABEL: name: foo2 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def undef early-clobber %2.sub1, 1835018 /* regdef:VGPR_32 */, def undef %2.sub0 + ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835019 /* regdef-ec:VRegOrLds_32 */, def undef early-clobber %2.sub1, 1835018 /* regdef:VRegOrLds_32 */, def undef %2.sub0 ; CHECK-NEXT: FLAT_STORE_DWORDX2 $vgpr0_vgpr1, %2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; CHECK-NEXT: S_ENDPGM 0 INLINEASM &"", 0 /* attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %1:vgpr_32, 1835018 /* regdef:VGPR_32 */, def %0:vgpr_32 @@ -62,7 +62,7 @@ body: | ; CHECK-LABEL: name: foo3 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def undef %2.sub0, 1835019 /* regdef-ec:VGPR_32 */, def undef early-clobber %2.sub1 + ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835018 /* regdef:VRegOrLds_32 */, def undef %2.sub0, 1835019 /* regdef-ec:VRegOrLds_32 */, def undef early-clobber %2.sub1 ; CHECK-NEXT: FLAT_STORE_DWORDX2 $vgpr0_vgpr1, %2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; CHECK-NEXT: S_ENDPGM 0 INLINEASM &"", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1:vgpr_32, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %0:vgpr_32 @@ -83,7 +83,7 @@ body: | ; CHECK-LABEL: name: foo4 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def undef early-clobber %2.sub1, 1835018 /* regdef:VGPR_32 */, def undef %2.sub0 + ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835019 /* regdef-ec:VRegOrLds_32 */, def undef early-clobber %2.sub1, 1835018 /* regdef:VRegOrLds_32 */, def undef %2.sub0 ; CHECK-NEXT: FLAT_STORE_DWORDX2 $vgpr0_vgpr1, %2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; CHECK-NEXT: S_ENDPGM 0 INLINEASM &"", 0 /* attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %0:vgpr_32, 1835018 /* regdef:VGPR_32 */, def %1:vgpr_32 diff --git a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir new file mode 100644 index 0000000..367c1fb --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir @@ -0,0 +1,84 @@ +# RUN: llc -march=amdgcn -mcpu=gfx1010 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck --check-prefixes=GFX10 %s + +# GFX10-LABEL: name: test_fmamk_reg_imm_f16 +# GFX10: %2:vgpr_32 = IMPLICIT_DEF +# GFX10-NOT: V_MOV_B32 +# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +--- +name: test_fmamk_reg_imm_f16 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } +body: | + bb.0: + + %0 = IMPLICIT_DEF + %1 = COPY %0.sub1 + %2 = V_MOV_B32_e32 1078523331, implicit $exec + %3 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec + +... + +# GFX10-LABEL: name: test_fmamk_imm_reg_f16 +# GFX10: %2:vgpr_32 = IMPLICIT_DEF +# GFX10-NOT: V_MOV_B32 +# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +--- +name: test_fmamk_imm_reg_f16 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } +body: | + bb.0: + + %0 = IMPLICIT_DEF + %1 = COPY %0.sub1 + %2 = V_MOV_B32_e32 1078523331, implicit $exec + %3 = V_FMAC_F16_e32 %2, killed %0.sub0, killed %1, implicit $mode, implicit $exec + +... + +# GFX10-LABEL: name: test_fmaak_f16 +# GFX10: %1:vgpr_32 = IMPLICIT_DEF +# GFX10-NOT: V_MOV_B32 +# GFX10: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec +--- +name: test_fmaak_f16 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } +body: | + bb.0: + + %0 = IMPLICIT_DEF + %1 = V_MOV_B32_e32 1078523331, implicit $exec + %2 = V_FMAC_F16_e32 killed %0.sub0, %0.sub1, %1, implicit $mode, implicit $exec +... + +# GFX10-LABEL: name: test_fmaak_inline_literal_f16 +# GFX10: %1:vgpr_32 = IMPLICIT_DEF +# GFX10-NOT: V_MOV_B32 +# GFX10: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec + +--- +name: test_fmaak_inline_literal_f16 +tracksRegLiveness: true +liveins: + - { reg: '$vgpr0', virtual-reg: '%0' } +body: | + bb.0: + liveins: $vgpr0 + + %0:vgpr_32 = COPY killed $vgpr0 + + %1:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec + %2:vgpr_32 = V_FMAC_F16_e32 16384, killed %0, %1, implicit $mode, implicit $exec + S_ENDPGM 0 + +... + diff --git a/llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir new file mode 100644 index 0000000..df75667 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir @@ -0,0 +1,101 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx1100 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck --check-prefixes=GFX11 %s + +--- +name: test_fmamk_reg_imm_f16 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: vgpr_32 } +body: | + bb.0: + + ; GFX11-LABEL: name: test_fmamk_reg_imm_f16 + ; GFX11: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec + ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec + %0 = IMPLICIT_DEF + %1 = COPY %0.sub1 + %2 = COPY %0.sub0 + %3 = V_MOV_B32_e32 1078523331, implicit $exec + %4 = V_FMAC_F16_t16_e64 0, killed %2, 0, %3, 0, killed %1, 0, 0, implicit $mode, implicit $exec + +... + +--- +name: test_fmamk_imm_reg_f16 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: vgpr_32 } +body: | + bb.0: + + ; GFX11-LABEL: name: test_fmamk_imm_reg_f16 + ; GFX11: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec + ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, [[COPY1]], 0, killed [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec + %0 = IMPLICIT_DEF + %1 = COPY %0.sub1 + %2 = COPY %0.sub0 + %3 = V_MOV_B32_e32 1078523331, implicit $exec + %4 = V_FMAC_F16_t16_e64 0, %2, 0, killed %3, 0, killed %1, 0, 0, implicit $mode, implicit $exec + +... + +--- +name: test_fmaak_f16 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: vgpr_32 } +body: | + bb.0: + + ; GFX11-LABEL: name: test_fmaak_f16 + ; GFX11: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec + ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed [[COPY]], 0, [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec + %0 = IMPLICIT_DEF + %1 = COPY %0.sub0 + %2 = COPY %0.sub1 + %3 = V_MOV_B32_e32 1078523331, implicit $exec + %4 = V_FMAC_F16_t16_e64 0, killed %1, 0, %2, 0, %3, 0, 0, implicit $mode, implicit $exec +... + +--- +name: test_fmaak_inline_literal_f16 +tracksRegLiveness: true +liveins: + - { reg: '$vgpr0', virtual-reg: '%0' } +body: | + bb.0: + liveins: $vgpr0 + + ; GFX11-LABEL: name: test_fmaak_inline_literal_f16 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec + ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, 16384, 0, killed [[COPY]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0 + %0:vgpr_32 = COPY killed $vgpr0 + + %1:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec + %2:vgpr_32 = V_FMAC_F16_t16_e64 0, 16384, 0, killed %0, 0, %1, 0, 0, implicit $mode, implicit $exec + S_ENDPGM 0 + +... + diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll index d3e18db..23991e8 100644 --- a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll @@ -8,15 +8,15 @@ define amdgpu_kernel void @s_input_output_i128() { ; GFX908-LABEL: name: s_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5242890 /* regdef:SGPR_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6881290 /* regdef:SGPR_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5242889 /* reguse:SGPR_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6881289 /* reguse:SGPR_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: s_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5242890 /* regdef:SGPR_128 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6881290 /* regdef:SGPR_128 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5242889 /* reguse:SGPR_128 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6881289 /* reguse:SGPR_128 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = tail call i128 asm sideeffect "; def $0", "=s"() call void asm sideeffect "; use $0", "s"(i128 %val) @@ -26,15 +26,15 @@ define amdgpu_kernel void @s_input_output_i128() { define amdgpu_kernel void @v_input_output_i128() { ; GFX908-LABEL: name: v_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4784138 /* regdef:VReg_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5832714 /* regdef:VReg_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4784137 /* reguse:VReg_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5832713 /* reguse:VReg_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: v_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4980746 /* regdef:VReg_128_Align2 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6160394 /* regdef:VReg_128_Align2 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4980745 /* reguse:VReg_128_Align2 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6160393 /* reguse:VReg_128_Align2 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = tail call i128 asm sideeffect "; def $0", "=v"() call void asm sideeffect "; use $0", "v"(i128 %val) @@ -44,15 +44,15 @@ define amdgpu_kernel void @v_input_output_i128() { define amdgpu_kernel void @a_input_output_i128() { ; GFX908-LABEL: name: a_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4718602 /* regdef:AReg_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:AReg_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4718601 /* reguse:AReg_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5767177 /* reguse:AReg_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: a_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4915210 /* regdef:AReg_128_Align2 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6029322 /* regdef:AReg_128_Align2 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4915209 /* reguse:AReg_128_Align2 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6029321 /* reguse:AReg_128_Align2 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = call i128 asm sideeffect "; def $0", "=a"() call void asm sideeffect "; use $0", "a"(i128 %val) diff --git a/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir b/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir new file mode 100644 index 0000000..fe5a84b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir @@ -0,0 +1,53 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=CHECK +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=CHECK +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=GFX11 + +--- +name: ftrunc_upward + +body: | + bb.0: + liveins: $sgpr0 + ; CHECK-LABEL: name: ftrunc_upward + ; CHECK: liveins: $sgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec + ; CHECK-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode + ; CHECK-NEXT: $vgpr1 = V_CVT_F16_F32_e32 $vgpr0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 + ; GFX11-LABEL: name: ftrunc_upward + ; GFX11: liveins: $sgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec + ; GFX11-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode + ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_t16_e64 0, $vgpr0, 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0 + $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec + $vgpr1 = FPTRUNC_UPWARD_PSEUDO $vgpr0, implicit $mode, implicit $exec + S_ENDPGM 0 +... +--- +name: ftrunc_downward + +body: | + bb.0: + liveins: $sgpr0 + ; CHECK-LABEL: name: ftrunc_downward + ; CHECK: liveins: $sgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec + ; CHECK-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode + ; CHECK-NEXT: $vgpr0 = V_CVT_F16_F32_e32 $vgpr1, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 + ; GFX11-LABEL: name: ftrunc_downward + ; GFX11: liveins: $sgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec + ; GFX11-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode + ; GFX11-NEXT: $vgpr0 = V_CVT_F16_F32_t16_e64 0, $vgpr1, 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0 + $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec + $vgpr0 = FPTRUNC_DOWNWARD_PSEUDO $vgpr1, implicit $mode, implicit $exec + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll index 318ea63..21c0a0f 100644 --- a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll @@ -10,10 +10,10 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 { ; REGALLOC-GFX908: bb.0 (%ir-block.0): ; REGALLOC-GFX908-NEXT: liveins: $sgpr4_sgpr5 ; REGALLOC-GFX908-NEXT: {{ $}} - ; REGALLOC-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1769481 /* reguse:AGPR_32 */, undef %5:agpr_32 - ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4784138 /* regdef:VReg_128 */, def %26 + ; REGALLOC-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1900553 /* reguse:AGPR_32 */, undef %5:agpr_32 + ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5832714 /* regdef:VReg_128 */, def %26 ; REGALLOC-GFX908-NEXT: [[COPY:%[0-9]+]]:av_128 = COPY %26 - ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2949130 /* regdef:VReg_64 */, def %23 + ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3211274 /* regdef:VReg_64 */, def %23 ; REGALLOC-GFX908-NEXT: SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) ; REGALLOC-GFX908-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[COPY]] ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1) @@ -34,10 +34,10 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 { ; PEI-GFX908-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; PEI-GFX908-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $sgpr7, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 ; PEI-GFX908-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 - ; PEI-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1769481 /* reguse:AGPR_32 */, undef renamable $agpr0 - ; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4784138 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3 + ; PEI-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1900553 /* reguse:AGPR_32 */, undef renamable $agpr0 + ; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5832714 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3 ; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec - ; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2949130 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1 + ; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3211274 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1 ; PEI-GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; PEI-GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 ; PEI-GFX908-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec @@ -57,10 +57,10 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 { ; REGALLOC-GFX90A: bb.0 (%ir-block.0): ; REGALLOC-GFX90A-NEXT: liveins: $sgpr4_sgpr5 ; REGALLOC-GFX90A-NEXT: {{ $}} - ; REGALLOC-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1769481 /* reguse:AGPR_32 */, undef %5:agpr_32 - ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4980746 /* regdef:VReg_128_Align2 */, def %25 + ; REGALLOC-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1900553 /* reguse:AGPR_32 */, undef %5:agpr_32 + ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6160394 /* regdef:VReg_128_Align2 */, def %25 ; REGALLOC-GFX90A-NEXT: [[COPY:%[0-9]+]]:av_128_align2 = COPY %25 - ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3080202 /* regdef:VReg_64_Align2 */, def %23 + ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64_Align2 */, def %23 ; REGALLOC-GFX90A-NEXT: SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64_align2, [[COPY]], 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1) ; REGALLOC-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset.cast, addrspace 4) @@ -79,10 +79,10 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 { ; PEI-GFX90A-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; PEI-GFX90A-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $sgpr7, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 ; PEI-GFX90A-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 - ; PEI-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1769481 /* reguse:AGPR_32 */, undef renamable $agpr0 - ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4980746 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3 + ; PEI-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1900553 /* reguse:AGPR_32 */, undef renamable $agpr0 + ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6160394 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3 ; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec - ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3080202 /* regdef:VReg_64_Align2 */, def renamable $vgpr0_vgpr1 + ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64_Align2 */, def renamable $vgpr0_vgpr1 ; PEI-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; PEI-GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 ; PEI-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef renamable $vgpr0_vgpr1, killed renamable $agpr0_agpr1_agpr2_agpr3, 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll b/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll index 1fdd1b4..8eae2dc 100644 --- a/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll +++ b/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll @@ -215,7 +215,7 @@ define i32 @zext_fadd_f16(half %x, half %y) { ; GFX9: v_fma_f16 [[FMA:v[0-9]+]], v0, v1, v2 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, [[FMA]] -; GFX10Plus: v_fmac_f16_e32 [[FMA:v[0-9]+]], v0, v1 +; GFX10Plus: v_fmac_f16{{_e64|_e32}} [[FMA:v[0-9]+]], v0, v1 ; GFX10Plus-NEXT: v_and_b32_e32 v0, 0xffff, [[FMA]] define i32 @zext_fma_f16(half %x, half %y, half %z) { %fma = call half @llvm.fma.f16(half %x, half %y, half %z) diff --git a/llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir b/llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir similarity index 65% rename from llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir rename to llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir index 51c3dcf..0a30d60 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GFX10 +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GFX11 --- name: mad_cvv_f32 @@ -10,6 +11,11 @@ body: | ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF ; GFX10-NEXT: $vgpr2 = V_MADMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; GFX11-LABEL: name: mad_cvv_f32 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_MADMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF $vgpr2 = V_MAD_F32_e64 0, 1092616192, 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec @@ -25,6 +31,11 @@ body: | ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF ; GFX10-NEXT: $vgpr2 = V_MADMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; GFX11-LABEL: name: mad_vcv_f32 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_MADMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF $vgpr2 = V_MAD_F32_e64 0, $vgpr0, 0, 1092616192, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec @@ -40,6 +51,11 @@ body: | ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF ; GFX10-NEXT: $vgpr2 = V_MADAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; GFX11-LABEL: name: mad_vvc_f32 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_MADAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF $vgpr2 = V_MAD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec @@ -55,6 +71,11 @@ body: | ; GFX10-NEXT: $sgpr1 = IMPLICIT_DEF ; GFX10-NEXT: $vgpr2 = V_MADAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; GFX11-LABEL: name: mad_vsc_f32 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $sgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_MADAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $sgpr1 = IMPLICIT_DEF $vgpr2 = V_MAD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec @@ -70,6 +91,11 @@ body: | ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF ; GFX10-NEXT: $vgpr2 = V_FMAMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; GFX11-LABEL: name: fma_cvv_f32 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_FMAMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF $vgpr2 = V_FMA_F32_e64 0, 1092616192, 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec @@ -85,6 +111,11 @@ body: | ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF ; GFX10-NEXT: $vgpr2 = V_FMAMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; GFX11-LABEL: name: fma_vcv_f32 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_FMAMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF $vgpr2 = V_FMA_F32_e64 0, $vgpr0, 0, 1092616192, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec @@ -100,6 +131,11 @@ body: | ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF ; GFX10-NEXT: $vgpr2 = V_FMAAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; GFX11-LABEL: name: fma_vvc_f32 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_FMAAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF $vgpr2 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec @@ -115,6 +151,11 @@ body: | ; GFX10-NEXT: $sgpr1 = IMPLICIT_DEF ; GFX10-NEXT: $vgpr2 = V_FMAAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; GFX11-LABEL: name: fma_vsc_f32 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $sgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_FMAAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $sgpr1 = IMPLICIT_DEF $vgpr2 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec @@ -130,6 +171,11 @@ body: | ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF ; GFX10-NEXT: $vgpr2 = V_MADMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; GFX11-LABEL: name: mad_cvv_f16 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_MADMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF $vgpr2 = V_MAD_F16_e64 0, 18688, 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec @@ -145,6 +191,11 @@ body: | ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF ; GFX10-NEXT: $vgpr2 = V_MADMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; GFX11-LABEL: name: mad_vcv_f16 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_MADMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF $vgpr2 = V_MAD_F16_e64 0, $vgpr0, 0, 18688, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec @@ -160,6 +211,11 @@ body: | ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF ; GFX10-NEXT: $vgpr2 = V_MADAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; GFX11-LABEL: name: mad_vvc_f16 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_MADAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF $vgpr2 = V_MAD_F16_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, implicit $mode, implicit $exec @@ -175,6 +231,11 @@ body: | ; GFX10-NEXT: $sgpr1 = IMPLICIT_DEF ; GFX10-NEXT: $vgpr2 = V_MADAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; GFX11-LABEL: name: mad_vsc_f16 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $sgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_MADAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $sgpr1 = IMPLICIT_DEF $vgpr2 = V_MAD_F16_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, implicit $mode, implicit $exec @@ -190,6 +251,11 @@ body: | ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF ; GFX10-NEXT: $vgpr2 = V_FMAMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; GFX11-LABEL: name: fma_cvv_f16 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_FMAMK_F16_t16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF $vgpr2 = V_FMA_F16_gfx9_e64 0, 18688, 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec @@ -205,6 +271,11 @@ body: | ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF ; GFX10-NEXT: $vgpr2 = V_FMAMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; GFX11-LABEL: name: fma_vcv_f16 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_FMAMK_F16_t16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, 18688, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec @@ -220,6 +291,11 @@ body: | ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF ; GFX10-NEXT: $vgpr2 = V_FMAAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; GFX11-LABEL: name: fma_vvc_f16 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_FMAAK_F16_t16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, implicit $mode, implicit $exec @@ -235,6 +311,11 @@ body: | ; GFX10-NEXT: $sgpr1 = IMPLICIT_DEF ; GFX10-NEXT: $vgpr2 = V_FMAAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; GFX11-LABEL: name: fma_vsc_f16 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $sgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_FMAAK_F16_t16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $sgpr1 = IMPLICIT_DEF $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, implicit $mode, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll index 2a35098..fd60228 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll @@ -12,14 +12,14 @@ define amdgpu_kernel void @test_spill_av_class(<4 x i32> %arg) #0 { ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY]], 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def undef %22.sub0 + ; GCN-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 1966090 /* regdef:VGPR_32 */, def undef %22.sub0 ; GCN-NEXT: undef %24.sub0:av_64 = COPY %22.sub0 ; GCN-NEXT: SI_SPILL_AV64_SAVE %24, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]] ; GCN-NEXT: GLOBAL_STORE_DWORDX4 undef %16:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: [[SI_SPILL_AV64_RESTORE:%[0-9]+]]:av_64 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: undef %23.sub0:vreg_64 = COPY [[SI_SPILL_AV64_RESTORE]].sub0 - ; GCN-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2949129 /* reguse:VReg_64 */, %23 + ; GCN-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3211273 /* reguse:VReg_64 */, %23 ; GCN-NEXT: S_ENDPGM 0 %v0 = call i32 asm sideeffect "; def $0", "=v"() %tmp = insertelement <2 x i32> undef, i32 %v0, i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/true16-ra-f128-fail.mir b/llvm/test/CodeGen/AMDGPU/true16-ra-f128-fail.mir new file mode 100644 index 0000000..7cd4f39 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/true16-ra-f128-fail.mir @@ -0,0 +1,34 @@ +# RUN: not llc -march=amdgcn -mcpu=gfx1100 -debug-only=regalloc -start-before=greedy,0 -stop-after=virtregrewriter,1 -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck --check-prefixes=CHECK %s +# REQUIRES: asserts + +--- | + define amdgpu_ps void @e32() { + ret void + } +... + + +--- +name: e32 +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112, $vgpr113, $vgpr114, $vgpr115, $vgpr116, $vgpr117, $vgpr118, $vgpr119, $vgpr120, $vgpr121, $vgpr122, $vgpr123, $vgpr124, $vgpr125, $vgpr126, $vgpr127 + + ; CHECK: error: ran out of registers during register allocation + ; CHECK: [[REG1:vgpr[0-9]+]] = V_ADD_F16_t16_e32 + ; CHECK: SI_SPILL_V32_SAVE $[[REG1]] + %0:vgpr_32_lo128 = V_ADD_F16_t16_e32 $vgpr0, $vgpr1, implicit $exec, implicit $mode + S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 + S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 + S_NOP 0, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79 + S_NOP 0, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95 + S_NOP 0, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111 + S_NOP 0, implicit $vgpr112, implicit $vgpr113, implicit $vgpr114, implicit $vgpr115, implicit $vgpr116, implicit $vgpr117, implicit $vgpr118, implicit $vgpr119, implicit $vgpr120, implicit $vgpr121, implicit $vgpr122, implicit $vgpr123, implicit $vgpr124, implicit $vgpr125, implicit $vgpr126, implicit $vgpr127 + S_ENDPGM 0, implicit %0 +... + diff --git a/llvm/test/CodeGen/AMDGPU/true16-ra-pre-gfx11-regression-test.mir b/llvm/test/CodeGen/AMDGPU/true16-ra-pre-gfx11-regression-test.mir new file mode 100644 index 0000000..6b98c48 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/true16-ra-pre-gfx11-regression-test.mir @@ -0,0 +1,55 @@ +# RUN: llc -march=amdgcn -mcpu=gfx1010 -start-before=greedy,0 -stop-after=virtregrewriter,1 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s + +--- | + define amdgpu_ps void @e32() #0 { + ret void + } + + define amdgpu_ps void @e64() #0 { + ret void + } + +... + + +--- +name: e32 +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112, $vgpr113, $vgpr114, $vgpr115, $vgpr116, $vgpr117, $vgpr118, $vgpr119, $vgpr120, $vgpr121, $vgpr122, $vgpr123, $vgpr124, $vgpr125, $vgpr126, $vgpr127 + + ; GCN-LABEL: name: e32 + ; GCN: renamable $vgpr128 = V_ADD_F16_e32 $vgpr0, $vgpr1, implicit $exec, implicit $mode + %0:vgpr_32 = V_ADD_F16_e32 $vgpr0, $vgpr1, implicit $exec, implicit $mode + S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 + S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 + S_NOP 0, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79 + S_NOP 0, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95 + S_NOP 0, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111 + S_NOP 0, implicit $vgpr112, implicit $vgpr113, implicit $vgpr114, implicit $vgpr115, implicit $vgpr116, implicit $vgpr117, implicit $vgpr118, implicit $vgpr119, implicit $vgpr120, implicit $vgpr121, implicit $vgpr122, implicit $vgpr123, implicit $vgpr124, implicit $vgpr125, implicit $vgpr126, implicit $vgpr127 + S_ENDPGM 0, implicit %0 +... + +--- +name: e64 +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112, $vgpr113, $vgpr114, $vgpr115, $vgpr116, $vgpr117, $vgpr118, $vgpr119, $vgpr120, $vgpr121, $vgpr122, $vgpr123, $vgpr124, $vgpr125, $vgpr126, $vgpr127 + + ; GCN-LABEL: name: e64 + ; GCN: renamable $vgpr128 = V_ADD_F16_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec, implicit $mode + %0:vgpr_32 = V_ADD_F16_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec, implicit $mode + S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 + S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 + S_NOP 0, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79 + S_NOP 0, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95 + S_NOP 0, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111 + S_NOP 0, implicit $vgpr112, implicit $vgpr113, implicit $vgpr114, implicit $vgpr115, implicit $vgpr116, implicit $vgpr117, implicit $vgpr118, implicit $vgpr119, implicit $vgpr120, implicit $vgpr121, implicit $vgpr122, implicit $vgpr123, implicit $vgpr124, implicit $vgpr125, implicit $vgpr126, implicit $vgpr127 + S_ENDPGM 0, implicit %0 +... diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir index 8d665d6..7757c75 100644 --- a/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir @@ -1,5 +1,5 @@ -# RUN: llc -march=amdgcn -mcpu=gfx1010 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck -check-prefix=GCN %s -# RUN: llc -march=amdgcn -mcpu=gfx1100 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck --check-prefixes=GCN %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck --check-prefixes=GCN %s # GCN-LABEL: name: test_fmamk_reg_imm_f32 # GCN: %2:vgpr_32 = IMPLICIT_DEF @@ -62,65 +62,6 @@ body: | ... -# GCN-LABEL: name: test_fmamk_reg_imm_f16 -# GCN: %2:vgpr_32 = IMPLICIT_DEF -# GCN-NOT: V_MOV_B32 -# GCN: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec ---- -name: test_fmamk_reg_imm_f16 -registers: - - { id: 0, class: vreg_64 } - - { id: 1, class: vgpr_32 } - - { id: 2, class: vgpr_32 } - - { id: 3, class: vgpr_32 } -body: | - bb.0: - - %0 = IMPLICIT_DEF - %1 = COPY %0.sub1 - %2 = V_MOV_B32_e32 1078523331, implicit $exec - %3 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec - -... - -# GCN-LABEL: name: test_fmamk_imm_reg_f16 -# GCN: %2:vgpr_32 = IMPLICIT_DEF -# GCN-NOT: V_MOV_B32 -# GCN: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec ---- -name: test_fmamk_imm_reg_f16 -registers: - - { id: 0, class: vreg_64 } - - { id: 1, class: vgpr_32 } - - { id: 2, class: vgpr_32 } - - { id: 3, class: vgpr_32 } -body: | - bb.0: - - %0 = IMPLICIT_DEF - %1 = COPY %0.sub1 - %2 = V_MOV_B32_e32 1078523331, implicit $exec - %3 = V_FMAC_F16_e32 %2, killed %0.sub0, killed %1, implicit $mode, implicit $exec - -... - -# GCN-LABEL: name: test_fmaak_f16 -# GCN: %1:vgpr_32 = IMPLICIT_DEF -# GCN-NOT: V_MOV_B32 -# GCN: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec ---- -name: test_fmaak_f16 -registers: - - { id: 0, class: vreg_64 } - - { id: 1, class: vgpr_32 } - - { id: 2, class: vgpr_32 } -body: | - bb.0: - - %0 = IMPLICIT_DEF - %1 = V_MOV_B32_e32 1078523331, implicit $exec - %2 = V_FMAC_F16_e32 killed %0.sub0, %0.sub1, %1, implicit $mode, implicit $exec -... # GCN-LABEL: name: test_fmaak_sgpr_src0_f32 # GCN: %1:vgpr_32 = IMPLICIT_DEF @@ -207,27 +148,6 @@ body: | ... -# GCN-LABEL: name: test_fmaak_inline_literal_f16 -# GCN: %1:vgpr_32 = IMPLICIT_DEF -# GCN-NOT: V_MOV_B32 -# GCN: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec - ---- -name: test_fmaak_inline_literal_f16 -tracksRegLiveness: true -liveins: - - { reg: '$vgpr0', virtual-reg: '%0' } -body: | - bb.0: - liveins: $vgpr0 - - %0:vgpr_32 = COPY killed $vgpr0 - - %1:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec - %2:vgpr_32 = V_FMAC_F16_e32 16384, killed %0, %1, implicit $mode, implicit $exec - S_ENDPGM 0 - -... # GCN-LABEL: name: test_fmamk_reg_imm_f32_2_folds # GCN: %2:vgpr_32 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir index 450951e..b8c9359 100644 --- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir +++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir @@ -18,15 +18,17 @@ body: | ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: V_CMP_LT_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec - ; GCN-NEXT: V_CMPX_EQ_I16_e32 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec - ; GCN-NEXT: V_CMP_CLASS_F16_e32_dpp 0, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec - ; GCN-NEXT: [[V_CMP_GE_F16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_e64_dpp 1, [[COPY1]], 0, [[COPY]], 1, 1, 15, 15, 1, implicit $mode, implicit $exec + ; GCN-NEXT: V_CMPX_EQ_I16_t16_nosdst_e64 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64_dpp 0, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit $exec + ; GCN-NEXT: [[V_CMP_GE_F16_t16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_t16_e64_dpp 1, [[COPY1]], 0, [[COPY]], 1, 1, 15, 15, 1, implicit $mode, implicit $exec ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec ; GCN-NEXT: V_CMPX_GT_U32_nosdst_e64 [[V_MOV_B32_dpp1]], [[COPY]], implicit-def $exec, implicit $mode, implicit $exec ; GCN-NEXT: V_CMP_CLASS_F32_e32_dpp 2, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec - ; GCN-NEXT: V_CMP_NGE_F16_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec - ; GCN-NEXT: [[V_CMP_NGE_F16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F16_e64_dpp 0, [[COPY1]], 0, [[COPY]], 0, 1, 15, 15, 1, implicit $mode, implicit $exec - ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 [[V_CMP_NGE_F16_e64_dpp]], 10101, implicit-def $scc + ; GCN-NEXT: V_CMP_NGE_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec + ; GCN-NEXT: [[V_CMP_NGE_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F16_t16_e64 0, [[V_CMP_NGE_F16_t16_e64_]], 0, [[COPY]], 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CMP_NGE_F32_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F32_e64_dpp 0, [[COPY1]], 0, [[COPY]], 0, 1, 15, 15, 1, implicit $mode, implicit $exec + ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 [[V_CMP_NGE_F32_e64_dpp]], 10101, implicit-def $scc ; GCN-NEXT: V_CMP_GT_I32_e32_dpp [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = COPY $vgpr1 @@ -38,33 +40,37 @@ body: | ; unsafe to combine cmpx %5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec - V_CMPX_EQ_I16_e32 %5, %0, implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec + V_CMPX_EQ_I16_t16_nosdst_e64 %5, %0, implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec %6:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec - V_CMP_CLASS_F16_e32 %6, %0, implicit-def $vcc, implicit $mode, implicit $exec + %7:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, %6, %0, implicit-def $vcc, implicit $mode, implicit $exec - %7:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec - %8:sgpr_32 = V_CMP_GE_F16_e64 1, %7, 0, %0, 1, implicit $mode, implicit $exec + %8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %9:sgpr_32 = V_CMP_GE_F16_t16_e64 1, %8, 0, %0, 1, implicit $mode, implicit $exec ; unsafe to combine cmpx - %9:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec - V_CMPX_GT_U32_nosdst_e64 %9, %0, implicit-def $exec, implicit $mode, implicit $exec + %10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + V_CMPX_GT_U32_nosdst_e64 %10, %0, implicit-def $exec, implicit $mode, implicit $exec %11:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec %12:sgpr_32 = V_CMP_CLASS_F32_e64 2, %11, %0, implicit $mode, implicit $exec ; shrink %13:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec - %14:sgpr_32 = V_CMP_NGE_F16_e64 0, %13, 0, %0, 0, implicit $mode, implicit $exec + %14:sgpr_32 = V_CMP_NGE_F32_e64 0, %13, 0, %0, 0, implicit $mode, implicit $exec - ; do not shrink, sdst used + ; do not shrink True16 instructions %15:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec - %16:sgpr_32 = V_CMP_NGE_F16_e64 0, %15, 0, %0, 0, implicit $mode, implicit $exec - %17:sgpr_32 = S_AND_B32 %16, 10101, implicit-def $scc + %16:sgpr_32 = V_CMP_NGE_F16_t16_e64 0, %16, 0, %0, 0, implicit $mode, implicit $exec + + ; do not shrink, sdst used + %17:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %18:sgpr_32 = V_CMP_NGE_F32_e64 0, %17, 0, %0, 0, implicit $mode, implicit $exec + %19:sgpr_32 = S_AND_B32 %18, 10101, implicit-def $scc ; commute - %18:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec - V_CMP_LT_I32_e32 %0, %18, implicit-def $vcc, implicit $exec + %20:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + V_CMP_LT_I32_e32 %0, %20, implicit-def $vcc, implicit $exec ... --- @@ -83,9 +89,9 @@ body: | ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 14, 1, implicit $exec - ; GCN-NEXT: V_CMP_CLASS_F16_e32 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $vcc, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, [[V_MOV_B32_dpp]], [[COPY]], implicit-def $vcc, implicit $mode, implicit $exec ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[COPY1]], 1, 13, 15, 1, implicit $exec - ; GCN-NEXT: [[V_CMP_GE_F16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_e64 1, [[V_MOV_B32_dpp1]], 0, [[COPY]], 1, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sgpr_32 = V_CMP_GE_F32_e64 1, [[V_MOV_B32_dpp1]], 0, [[COPY]], 1, implicit $mode, implicit $exec %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = IMPLICIT_DEF @@ -94,9 +100,9 @@ body: | ; Do not combine VOPC when row_mask or bank_mask is not 0xf ; All cases are covered by generic rules for creating DPP instructions %4:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 14, 1, implicit $exec - V_CMP_CLASS_F16_e32 %4, %0, implicit-def $vcc, implicit $mode, implicit $exec + %99:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, %4, %0, implicit-def $vcc, implicit $mode, implicit $exec %5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 13, 15, 1, implicit $exec - %6:sgpr_32 = V_CMP_GE_F16_e64 1, %5, 0, %0, 1, implicit $mode, implicit $exec + %6:sgpr_32 = V_CMP_GE_F32_e64 1, %5, 0, %0, 1, implicit $mode, implicit $exec ... diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s new file mode 100644 index 0000000..b62520d --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s @@ -0,0 +1,498 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error %s + +v_ceil_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_ceil_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_ceil_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_cos_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_cos_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_cos_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_f16_f32_e32 v128, 0xaf123456 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_f16_f32_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_f16_f32_e32 v255, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_f16_i16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_f16_i16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_f16_i16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_f16_u16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_f16_u16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_f16_u16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_f32_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_i16_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_i16_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_i16_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_i32_i16_e32 v5, v199 +// GFX11: error: invalid operand for instruction + +v_cvt_norm_i16_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_norm_i16_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_norm_i16_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_norm_u16_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_norm_u16_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_norm_u16_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_u16_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_u16_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_u16_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_cvt_u32_u16_e32 v5, v199 +// GFX11: error: invalid operand for instruction + +v_exp_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_exp_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_exp_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_floor_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_floor_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_floor_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_fract_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_fract_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_fract_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_frexp_exp_i16_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_frexp_exp_i16_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_frexp_exp_i16_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_frexp_mant_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_frexp_mant_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_frexp_mant_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_log_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_log_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_log_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_not_b16_e32 v128, 0xfe0b +// GFX11: error: invalid operand for instruction + +v_not_b16_e32 v255, v1 +// GFX11: error: invalid operand for instruction + +v_not_b16_e32 v5, v199 +// GFX11: error: invalid operand for instruction + +v_rcp_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_rcp_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_rcp_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_rndne_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_rndne_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_rndne_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_rsq_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_rsq_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_rsq_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_sin_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_sin_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_sin_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_sqrt_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_sqrt_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_sqrt_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_trunc_f16_e32 v128, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_trunc_f16_e32 v255, v1 +// GFX11: error: operands are not valid for this GPU or mode + +v_trunc_f16_e32 v5, v199 +// GFX11: error: operands are not valid for this GPU or mode + +v_ceil_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_ceil_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cos_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cos_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v128, 0xaf123456 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v255, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_i16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_i16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_u16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_u16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f32_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_i16_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_i16_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_i32_i16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_norm_i16_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_norm_i16_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_norm_u16_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_norm_u16_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_u16_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_u16_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_u32_u16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_exp_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_exp_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_floor_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_floor_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_fract_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_fract_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_frexp_exp_i16_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_frexp_exp_i16_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_frexp_mant_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_frexp_mant_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_log_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_log_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_not_b16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_not_b16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_rcp_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_rcp_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_rndne_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_rndne_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_rsq_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_rsq_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_sin_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_sin_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_sqrt_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_sqrt_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_trunc_f16_e32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_trunc_f16_e32 v5, v199 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_ceil_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_ceil_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cos_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cos_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v128, 0xaf123456 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v255, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_i16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_i16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_u16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f16_u16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_f32_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_i16_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_i16_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_i32_i16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_norm_i16_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_norm_i16_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_norm_u16_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_norm_u16_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_u16_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_u16_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cvt_u32_u16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_exp_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_exp_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_floor_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_floor_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_fract_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_fract_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_frexp_exp_i16_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_frexp_exp_i16_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_frexp_mant_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_frexp_mant_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_log_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_log_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_not_b16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_not_b16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_rcp_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_rcp_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_rndne_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_rndne_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_rsq_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_rsq_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_sin_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_sin_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_sqrt_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_sqrt_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_trunc_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_trunc_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s new file mode 100644 index 0000000..0fefb9b --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s @@ -0,0 +1,1473 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s + +v_ceil_f16 v128, 0xfe0b +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, -1 +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, 0.5 +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, exec_hi +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, exec_lo +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, m0 +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, null +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, s1 +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, s105 +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, src_scc +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, ttmp15 +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, v1 +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, v127 +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, vcc_hi +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, vcc_lo +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v5, v199 +// GFX11: v_ceil_f16_e64 + +v_cos_f16 v128, 0xfe0b +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, -1 +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, 0.5 +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, exec_hi +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, exec_lo +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, m0 +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, null +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, s1 +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, s105 +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, src_scc +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, ttmp15 +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, v1 +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, v127 +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, vcc_hi +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, vcc_lo +// GFX11: v_cos_f16_e64 + +v_cos_f16 v5, v199 +// GFX11: v_cos_f16_e64 + +v_cvt_f16_f32 v128, 0xaf123456 +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, -1 +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, 0.5 +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, exec_hi +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, exec_lo +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, m0 +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, null +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, s1 +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, s105 +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, src_scc +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, ttmp15 +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, v1 +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, v255 +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, vcc_hi +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, vcc_lo +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_i16 v128, 0xfe0b +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, -1 +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, 0.5 +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, exec_hi +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, exec_lo +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, m0 +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, null +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, s1 +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, s105 +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, src_scc +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, ttmp15 +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, v1 +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, v127 +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, vcc_hi +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, vcc_lo +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v5, v199 +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_u16 v128, 0xfe0b +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, -1 +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, 0.5 +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, exec_hi +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, exec_lo +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, m0 +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, null +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, s1 +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, s105 +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, src_scc +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, ttmp15 +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, v1 +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, v127 +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, vcc_hi +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, vcc_lo +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v5, v199 +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f32_f16 v5, v199 +// GFX11: v_cvt_f32_f16_e64 + +v_cvt_i16_f16 v128, 0xfe0b +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, -1 +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, 0.5 +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, exec_hi +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, exec_lo +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, m0 +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, null +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, s1 +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, s105 +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, src_scc +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, ttmp15 +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, v1 +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, v127 +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, vcc_hi +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, vcc_lo +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v5, v199 +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i32_i16 v5, v199 +// GFX11: v_cvt_i32_i16_e64 + +v_cvt_norm_i16_f16 v128, 0xfe0b +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, -1 +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, 0.5 +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, exec_hi +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, exec_lo +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, m0 +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, null +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, s1 +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, s105 +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, src_scc +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, ttmp15 +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, v1 +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, v127 +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, vcc_hi +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, vcc_lo +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v5, v199 +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_u16_f16 v128, 0xfe0b +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, -1 +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, 0.5 +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, exec_hi +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, exec_lo +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, m0 +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, null +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, s1 +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, s105 +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, src_scc +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, ttmp15 +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, v1 +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, v127 +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, vcc_hi +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, vcc_lo +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v5, v199 +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_u16_f16 v128, 0xfe0b +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, -1 +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, 0.5 +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, exec_hi +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, exec_lo +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, m0 +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, null +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, s1 +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, s105 +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, src_scc +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, ttmp15 +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, v1 +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, v127 +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, vcc_hi +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, vcc_lo +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v5, v199 +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u32_u16 v5, v199 +// GFX11: v_cvt_u32_u16_e64 + +v_exp_f16 v128, 0xfe0b +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, -1 +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, 0.5 +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, exec_hi +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, exec_lo +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, m0 +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, null +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, s1 +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, s105 +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, src_scc +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, ttmp15 +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, v1 +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, v127 +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, vcc_hi +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, vcc_lo +// GFX11: v_exp_f16_e64 + +v_exp_f16 v5, v199 +// GFX11: v_exp_f16_e64 + +v_floor_f16 v128, 0xfe0b +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, -1 +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, 0.5 +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, exec_hi +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, exec_lo +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, m0 +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, null +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, s1 +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, s105 +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, src_scc +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, ttmp15 +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, v1 +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, v127 +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, vcc_hi +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, vcc_lo +// GFX11: v_floor_f16_e64 + +v_floor_f16 v5, v199 +// GFX11: v_floor_f16_e64 + +v_fract_f16 v128, 0xfe0b +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, -1 +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, 0.5 +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, exec_hi +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, exec_lo +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, m0 +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, null +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, s1 +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, s105 +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, src_scc +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, ttmp15 +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, v1 +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, v127 +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, vcc_hi +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, vcc_lo +// GFX11: v_fract_f16_e64 + +v_fract_f16 v5, v199 +// GFX11: v_fract_f16_e64 + +v_frexp_exp_i16_f16 v128, 0xfe0b +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, -1 +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, 0.5 +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, exec_hi +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, exec_lo +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, m0 +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, null +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, s1 +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, s105 +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, src_scc +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, ttmp15 +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, v1 +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, v127 +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, vcc_hi +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, vcc_lo +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v5, v199 +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_mant_f16 v128, 0xfe0b +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, -1 +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, 0.5 +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, exec_hi +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, exec_lo +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, m0 +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, null +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, s1 +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, s105 +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, src_scc +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, ttmp15 +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, v1 +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, v127 +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, vcc_hi +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, vcc_lo +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v5, v199 +// GFX11: v_frexp_mant_f16_e64 + +v_log_f16 v128, 0xfe0b +// GFX11: v_log_f16_e64 + +v_log_f16 v255, -1 +// GFX11: v_log_f16_e64 + +v_log_f16 v255, 0.5 +// GFX11: v_log_f16_e64 + +v_log_f16 v255, exec_hi +// GFX11: v_log_f16_e64 + +v_log_f16 v255, exec_lo +// GFX11: v_log_f16_e64 + +v_log_f16 v255, m0 +// GFX11: v_log_f16_e64 + +v_log_f16 v255, null +// GFX11: v_log_f16_e64 + +v_log_f16 v255, s1 +// GFX11: v_log_f16_e64 + +v_log_f16 v255, s105 +// GFX11: v_log_f16_e64 + +v_log_f16 v255, src_scc +// GFX11: v_log_f16_e64 + +v_log_f16 v255, ttmp15 +// GFX11: v_log_f16_e64 + +v_log_f16 v255, v1 +// GFX11: v_log_f16_e64 + +v_log_f16 v255, v127 +// GFX11: v_log_f16_e64 + +v_log_f16 v255, vcc_hi +// GFX11: v_log_f16_e64 + +v_log_f16 v255, vcc_lo +// GFX11: v_log_f16_e64 + +v_log_f16 v5, v199 +// GFX11: v_log_f16_e64 + +v_not_b16 v128, 0xfe0b +// GFX11: v_not_b16_e64 + +v_not_b16 v255, -1 +// GFX11: v_not_b16_e64 + +v_not_b16 v255, 0.5 +// GFX11: v_not_b16_e64 + +v_not_b16 v255, exec_hi +// GFX11: v_not_b16_e64 + +v_not_b16 v255, exec_lo +// GFX11: v_not_b16_e64 + +v_not_b16 v255, m0 +// GFX11: v_not_b16_e64 + +v_not_b16 v255, null +// GFX11: v_not_b16_e64 + +v_not_b16 v255, s1 +// GFX11: v_not_b16_e64 + +v_not_b16 v255, s105 +// GFX11: v_not_b16_e64 + +v_not_b16 v255, src_scc +// GFX11: v_not_b16_e64 + +v_not_b16 v255, ttmp15 +// GFX11: v_not_b16_e64 + +v_not_b16 v255, v1 +// GFX11: v_not_b16_e64 + +v_not_b16 v255, v127 +// GFX11: v_not_b16_e64 + +v_not_b16 v255, vcc_hi +// GFX11: v_not_b16_e64 + +v_not_b16 v255, vcc_lo +// GFX11: v_not_b16_e64 + +v_not_b16 v5, v199 +// GFX11: v_not_b16_e64 + +v_rcp_f16 v128, 0xfe0b +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, -1 +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, 0.5 +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, exec_hi +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, exec_lo +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, m0 +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, null +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, s1 +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, s105 +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, src_scc +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, ttmp15 +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, v1 +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, v127 +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, vcc_hi +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, vcc_lo +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v5, v199 +// GFX11: v_rcp_f16_e64 + +v_rndne_f16 v128, 0xfe0b +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, -1 +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, 0.5 +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, exec_hi +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, exec_lo +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, m0 +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, null +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, s1 +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, s105 +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, src_scc +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, ttmp15 +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, v1 +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, v127 +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, vcc_hi +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, vcc_lo +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v5, v199 +// GFX11: v_rndne_f16_e64 + +v_rsq_f16 v128, 0xfe0b +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, -1 +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, 0.5 +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, exec_hi +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, exec_lo +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, m0 +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, null +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, s1 +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, s105 +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, src_scc +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, ttmp15 +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, v1 +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, v127 +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, vcc_hi +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, vcc_lo +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v5, v199 +// GFX11: v_rsq_f16_e64 + +v_sin_f16 v128, 0xfe0b +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, -1 +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, 0.5 +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, exec_hi +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, exec_lo +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, m0 +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, null +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, s1 +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, s105 +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, src_scc +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, ttmp15 +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, v1 +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, v127 +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, vcc_hi +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, vcc_lo +// GFX11: v_sin_f16_e64 + +v_sin_f16 v5, v199 +// GFX11: v_sin_f16_e64 + +v_sqrt_f16 v128, 0xfe0b +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, -1 +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, 0.5 +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, exec_hi +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, exec_lo +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, m0 +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, null +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, s1 +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, s105 +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, src_scc +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, ttmp15 +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, v1 +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, v127 +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, vcc_hi +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, vcc_lo +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v5, v199 +// GFX11: v_sqrt_f16_e64 + +v_trunc_f16 v128, 0xfe0b +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, -1 +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, 0.5 +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, exec_hi +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, exec_lo +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, m0 +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, null +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, s1 +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, s105 +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, src_scc +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, ttmp15 +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, v1 +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, v127 +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, vcc_hi +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, vcc_lo +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v5, v199 +// GFX11: v_trunc_f16_e64 + +v_ceil_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_ceil_f16_e64 + +v_cos_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_cos_f16_e64 + +v_cos_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_cos_f16_e64 + +v_cvt_f16_f32 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, v255 quad_perm:[3,2,1,0] +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_i16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_u16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f32_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_cvt_f32_f16_e64 + +v_cvt_i16_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i32_i16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_cvt_i32_i16_e64 + +v_cvt_norm_i16_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_u16_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_u16_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u32_u16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_cvt_u32_u16_e64 + +v_exp_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_exp_f16_e64 + +v_exp_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_exp_f16_e64 + +v_floor_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_floor_f16_e64 + +v_floor_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_floor_f16_e64 + +v_fract_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_fract_f16_e64 + +v_fract_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_fract_f16_e64 + +v_frexp_exp_i16_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_mant_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_frexp_mant_f16_e64 + +v_log_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_log_f16_e64 + +v_log_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_log_f16_e64 + +v_log_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_log_f16_e64 + +v_not_b16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_not_b16_e64 + +v_not_b16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_not_b16_e64 + +v_not_b16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_not_b16_e64 + +v_rcp_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_rcp_f16_e64 + +v_rndne_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_rndne_f16_e64 + +v_rsq_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_rsq_f16_e64 + +v_sin_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_sin_f16_e64 + +v_sin_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_sin_f16_e64 + +v_sqrt_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_sqrt_f16_e64 + +v_trunc_f16 v255, v1 quad_perm:[3,2,1,0] +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, v127 quad_perm:[3,2,1,0] +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v5, v199 quad_perm:[3,2,1,0] +// GFX11: v_trunc_f16_e64 + +v_ceil_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_ceil_f16_e64 + +v_ceil_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_ceil_f16_e64 + +v_cos_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cos_f16_e64 + +v_cos_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cos_f16_e64 + +v_cos_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cos_f16_e64 + +v_cvt_f16_f32 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_f32 v255, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_f16_f32_e64 + +v_cvt_f16_i16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_i16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_f16_i16_e64 + +v_cvt_f16_u16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f16_u16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_f16_u16_e64 + +v_cvt_f32_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_f32_f16_e64 + +v_cvt_i16_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i16_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_i16_f16_e64 + +v_cvt_i32_i16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_i32_i16_e64 + +v_cvt_norm_i16_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_i16_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_norm_i16_f16_e64 + +v_cvt_norm_u16_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_norm_u16_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_norm_u16_f16_e64 + +v_cvt_u16_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u16_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_u16_f16_e64 + +v_cvt_u32_u16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_u32_u16_e64 + +v_exp_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_exp_f16_e64 + +v_exp_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_exp_f16_e64 + +v_exp_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_exp_f16_e64 + +v_floor_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_floor_f16_e64 + +v_floor_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_floor_f16_e64 + +v_floor_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_floor_f16_e64 + +v_fract_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fract_f16_e64 + +v_fract_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fract_f16_e64 + +v_fract_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fract_f16_e64 + +v_frexp_exp_i16_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_exp_i16_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_frexp_exp_i16_f16_e64 + +v_frexp_mant_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_frexp_mant_f16_e64 + +v_frexp_mant_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_frexp_mant_f16_e64 + +v_log_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_log_f16_e64 + +v_log_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_log_f16_e64 + +v_log_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_log_f16_e64 + +v_not_b16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_not_b16_e64 + +v_not_b16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_not_b16_e64 + +v_not_b16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_not_b16_e64 + +v_rcp_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_rcp_f16_e64 + +v_rcp_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_rcp_f16_e64 + +v_rndne_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_rndne_f16_e64 + +v_rndne_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_rndne_f16_e64 + +v_rsq_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_rsq_f16_e64 + +v_rsq_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_rsq_f16_e64 + +v_sin_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sin_f16_e64 + +v_sin_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sin_f16_e64 + +v_sin_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sin_f16_e64 + +v_sqrt_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sqrt_f16_e64 + +v_sqrt_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sqrt_f16_e64 + +v_trunc_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_trunc_f16_e64 + +v_trunc_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_trunc_f16_e64 + diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s new file mode 100644 index 0000000..c2e6974 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s @@ -0,0 +1,228 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error %s + +v_add_f16_e32 v255, v1, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_fmaak_f16_e32 v255, v1, v2, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_fmac_f16_e32 v255, v1, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_fmamk_f16_e32 v255, v1, 0xfe0b, v3 +// GFX11: error: operands are not valid for this GPU or mode + +v_ldexp_f16_e32 v255, v1, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_max_f16_e32 v255, v1, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_min_f16_e32 v255, v1, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_mul_f16_e32 v255, v1, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_sub_f16_e32 v255, v1, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_subrev_f16_e32 v255, v1, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_add_f16_e32 v5, v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_fmaak_f16_e32 v5, v255, v2, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_fmac_f16_e32 v5, v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_fmamk_f16_e32 v5, v255, 0xfe0b, v3 +// GFX11: error: operands are not valid for this GPU or mode + +v_ldexp_f16_e32 v5, v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_max_f16_e32 v5, v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_min_f16_e32 v5, v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_mul_f16_e32 v5, v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_sub_f16_e32 v5, v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_subrev_f16_e32 v5, v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_add_f16_e32 v5, v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_fmaak_f16_e32 v5, v1, v255, 0xfe0b +// GFX11: error: operands are not valid for this GPU or mode + +v_fmac_f16_e32 v5, v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_fmamk_f16_e32 v5, v1, 0xfe0b, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_max_f16_e32 v5, v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_min_f16_e32 v5, v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_mul_f16_e32 v5, v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_sub_f16_e32 v5, v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_subrev_f16_e32 v5, v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_add_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_fmac_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_ldexp_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_max_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_min_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_mul_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_sub_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_subrev_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_add_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_fmac_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_ldexp_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_max_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_min_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_mul_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_sub_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_subrev_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_add_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_fmac_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_max_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_min_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_mul_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_sub_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_subrev_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_add_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_fmac_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_ldexp_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_max_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_min_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_mul_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_sub_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_subrev_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_add_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_fmac_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_ldexp_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_max_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_min_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_mul_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_sub_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_subrev_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_add_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_fmac_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_max_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_min_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_mul_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_sub_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + +v_subrev_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: operands are not valid for this GPU or mode + diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s new file mode 100644 index 0000000..c5f81d5 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s @@ -0,0 +1,192 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s + +v_add_f16 v255, v1, v2 +// GFX11: v_add_f16_e64 + +v_fmac_f16 v255, v1, v2 +// GFX11: v_fmac_f16_e64 + +v_ldexp_f16 v255, v1, v2 +// GFX11: v_ldexp_f16_e64 + +v_max_f16 v255, v1, v2 +// GFX11: v_max_f16_e64 + +v_min_f16 v255, v1, v2 +// GFX11: v_min_f16_e64 + +v_mul_f16 v255, v1, v2 +// GFX11: v_mul_f16_e64 + +v_sub_f16 v255, v1, v2 +// GFX11: v_sub_f16_e64 + +v_subrev_f16 v255, v1, v2 +// GFX11: v_subrev_f16_e64 + +v_add_f16 v5, v255, v2 +// GFX11: v_add_f16_e64 + +v_fmac_f16 v5, v255, v2 +// GFX11: v_fmac_f16_e64 + +v_ldexp_f16 v5, v255, v2 +// GFX11: v_ldexp_f16_e64 + +v_max_f16 v5, v255, v2 +// GFX11: v_max_f16_e64 + +v_min_f16 v5, v255, v2 +// GFX11: v_min_f16_e64 + +v_mul_f16 v5, v255, v2 +// GFX11: v_mul_f16_e64 + +v_sub_f16 v5, v255, v2 +// GFX11: v_sub_f16_e64 + +v_subrev_f16 v5, v255, v2 +// GFX11: v_subrev_f16_e64 + +v_add_f16 v5, v1, v255 +// GFX11: v_add_f16_e64 + +v_fmac_f16 v5, v1, v255 +// GFX11: v_fmac_f16_e64 + +v_max_f16 v5, v1, v255 +// GFX11: v_max_f16_e64 + +v_min_f16 v5, v1, v255 +// GFX11: v_min_f16_e64 + +v_mul_f16 v5, v1, v255 +// GFX11: v_mul_f16_e64 + +v_sub_f16 v5, v1, v255 +// GFX11: v_sub_f16_e64 + +v_subrev_f16 v5, v1, v255 +// GFX11: v_subrev_f16_e64 + +v_add_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_add_f16_e64 + +v_ldexp_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_ldexp_f16_e64 + +v_max_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_max_f16_e64 + +v_min_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_min_f16_e64 + +v_mul_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_mul_f16_e64 + +v_sub_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_sub_f16_e64 + +v_subrev_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_subrev_f16_e64 + +v_add_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_add_f16_e64 + +v_ldexp_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_ldexp_f16_e64 + +v_max_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_max_f16_e64 + +v_min_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_min_f16_e64 + +v_mul_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_mul_f16_e64 + +v_sub_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_sub_f16_e64 + +v_subrev_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_subrev_f16_e64 + +v_add_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_add_f16_e64 + +v_max_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_max_f16_e64 + +v_min_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_min_f16_e64 + +v_mul_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_mul_f16_e64 + +v_sub_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_sub_f16_e64 + +v_subrev_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_subrev_f16_e64 + +v_add_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_add_f16_e64 + +v_ldexp_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_ldexp_f16_e64 + +v_max_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max_f16_e64 + +v_min_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min_f16_e64 + +v_mul_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mul_f16_e64 + +v_sub_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sub_f16_e64 + +v_subrev_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_subrev_f16_e64 + +v_add_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_add_f16_e64 + +v_ldexp_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_ldexp_f16_e64 + +v_max_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max_f16_e64 + +v_min_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min_f16_e64 + +v_mul_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mul_f16_e64 + +v_sub_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sub_f16_e64 + +v_subrev_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_subrev_f16_e64 + +v_add_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_add_f16_e64 + +v_max_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max_f16_e64 + +v_min_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min_f16_e64 + +v_mul_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mul_f16_e64 + +v_sub_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sub_f16_e64 + +v_subrev_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_subrev_f16_e64 + diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s new file mode 100644 index 0000000..081b62a --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s @@ -0,0 +1,1973 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error %s + +v_cmp_class_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc_lo, v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc_lo, v127, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc_lo, v128, v2 +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_t_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s new file mode 100644 index 0000000..1e6754b --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s @@ -0,0 +1,1973 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 %s + +v_cmp_class_f16 vcc, v1, v255 +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc, v127, v255 +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_class_f16_e64 + +v_cmp_eq_f16 vcc, v1, v255 +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc, v127, v255 +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_i16 vcc, v1, v255 +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc, v127, v255 +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc, vcc_hi, v255 +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc, vcc_lo, v255 +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc_lo, v1, v255 +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc_lo, v127, v255 +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_u16 vcc, v1, v255 +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc, v127, v255 +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc, vcc_hi, v255 +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc, vcc_lo, v255 +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc_lo, v1, v255 +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc_lo, v127, v255 +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_f_f16 vcc, v1, v255 +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc, v127, v255 +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_f_f16_e64 + +v_cmp_ge_f16 vcc, v1, v255 +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc, v127, v255 +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_i16 vcc, v1, v255 +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc, v127, v255 +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc, vcc_hi, v255 +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc, vcc_lo, v255 +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc_lo, v1, v255 +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc_lo, v127, v255 +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_u16 vcc, v1, v255 +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc, v127, v255 +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc, vcc_hi, v255 +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc, vcc_lo, v255 +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc_lo, v1, v255 +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc_lo, v127, v255 +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_gt_f16 vcc, v1, v255 +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc, v127, v255 +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_i16 vcc, v1, v255 +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc, v127, v255 +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc, vcc_hi, v255 +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc, vcc_lo, v255 +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc_lo, v1, v255 +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc_lo, v127, v255 +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_u16 vcc, v1, v255 +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc, v127, v255 +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc, vcc_hi, v255 +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc, vcc_lo, v255 +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc_lo, v1, v255 +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc_lo, v127, v255 +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_le_f16 vcc, v1, v255 +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc, v127, v255 +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_i16 vcc, v1, v255 +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc, v127, v255 +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc, vcc_hi, v255 +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc, vcc_lo, v255 +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc_lo, v1, v255 +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc_lo, v127, v255 +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_u16 vcc, v1, v255 +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc, v127, v255 +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc, vcc_hi, v255 +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc, vcc_lo, v255 +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc_lo, v1, v255 +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc_lo, v127, v255 +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_le_u16_e64 + +v_cmp_lg_f16 vcc, v1, v255 +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc, v127, v255 +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lt_f16 vcc, v1, v255 +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc, v127, v255 +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_i16 vcc, v1, v255 +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc, v127, v255 +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc, vcc_hi, v255 +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc, vcc_lo, v255 +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc_lo, v1, v255 +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc_lo, v127, v255 +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_u16 vcc, v1, v255 +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc, v127, v255 +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc, vcc_hi, v255 +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc, vcc_lo, v255 +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc_lo, v1, v255 +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc_lo, v127, v255 +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_ne_i16 vcc, v1, v255 +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc, v127, v255 +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc, vcc_hi, v255 +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc, vcc_lo, v255 +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc_lo, v1, v255 +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc_lo, v127, v255 +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_u16 vcc, v1, v255 +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc, v127, v255 +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc, vcc_hi, v255 +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc, vcc_lo, v255 +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc_lo, v1, v255 +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc_lo, v127, v255 +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_neq_f16 vcc, v1, v255 +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc, v127, v255 +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_nge_f16 vcc, v1, v255 +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc, v127, v255 +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_ngt_f16 vcc, v1, v255 +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc, v127, v255 +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_nle_f16 vcc, v1, v255 +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc, v127, v255 +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nlg_f16 vcc, v1, v255 +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc, v127, v255 +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlt_f16 vcc, v1, v255 +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc, v127, v255 +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_o_f16 vcc, v1, v255 +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc, v127, v255 +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_o_f16_e64 + +v_cmp_t_f16 vcc, v1, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc, v127, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc, v1, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc, v127, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_u_f16 vcc, v1, v255 +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc, v127, v255 +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc_lo, v1, v255 +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc_lo, v127, v255 +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc_lo, vcc_hi, v255 +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc_lo, vcc_lo, v255 +// GFX11: v_cmp_u_f16_e64 + +v_cmp_class_f16 vcc, v128, v2 +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_class_f16_e64 + +v_cmp_eq_f16 vcc, v128, v2 +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_i16 vcc, v128, v2 +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc_lo, v128, v2 +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_u16 vcc, v128, v2 +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc_lo, v128, v2 +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_f_f16 vcc, v128, v2 +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_f_f16_e64 + +v_cmp_ge_f16 vcc, v128, v2 +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_i16 vcc, v128, v2 +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc_lo, v128, v2 +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_u16 vcc, v128, v2 +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc_lo, v128, v2 +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_gt_f16 vcc, v128, v2 +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_i16 vcc, v128, v2 +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc_lo, v128, v2 +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_u16 vcc, v128, v2 +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc_lo, v128, v2 +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_le_f16 vcc, v128, v2 +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_i16 vcc, v128, v2 +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc_lo, v128, v2 +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_u16 vcc, v128, v2 +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc_lo, v128, v2 +// GFX11: v_cmp_le_u16_e64 + +v_cmp_lg_f16 vcc, v128, v2 +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lt_f16 vcc, v128, v2 +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_i16 vcc, v128, v2 +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc_lo, v128, v2 +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_u16 vcc, v128, v2 +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc_lo, v128, v2 +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_ne_i16 vcc, v128, v2 +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc_lo, v128, v2 +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_u16 vcc, v128, v2 +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc_lo, v128, v2 +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_neq_f16 vcc, v128, v2 +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_nge_f16 vcc, v128, v2 +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_ngt_f16 vcc, v128, v2 +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_nle_f16 vcc, v128, v2 +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nlg_f16 vcc, v128, v2 +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlt_f16 vcc, v128, v2 +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_o_f16 vcc, v128, v2 +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_o_f16_e64 + +v_cmp_t_f16 vcc, v128, v2 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc, v128, v2 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_t_f16_e64 + +v_cmp_u_f16 vcc, v128, v2 +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc_lo, v128, v2 +// GFX11: v_cmp_u_f16_e64 + +v_cmp_class_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_class_f16_e64 + +v_cmp_eq_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_f_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_f_f16_e64 + +v_cmp_ge_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_gt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_le_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_u16_e64 + +v_cmp_lg_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_ne_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_neq_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_nge_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_ngt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_nle_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nlg_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_o_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_o_f16_e64 + +v_cmp_t_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_u_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_u_f16_e64 + +v_cmp_class_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_class_f16_e64 + +v_cmp_eq_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_f_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_f_f16_e64 + +v_cmp_ge_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_gt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_le_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_u16_e64 + +v_cmp_lg_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_ne_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_neq_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_nge_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_ngt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_nle_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nlg_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_o_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_o_f16_e64 + +v_cmp_t_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_u_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_u_f16_e64 + +v_cmp_class_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_class_f16_e64 + +v_cmp_eq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_f_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_f_f16_e64 + +v_cmp_ge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_gt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_le_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_u16_e64 + +v_cmp_lg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_ne_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_neq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_nge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_ngt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_nle_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nlg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_o_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_o_f16_e64 + +v_cmp_t_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_u_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_u_f16_e64 + +v_cmp_class_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_class_f16_e64 + +v_cmp_class_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_class_f16_e64 + +v_cmp_eq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_f16_e64 + +v_cmp_eq_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_i16_e64 + +v_cmp_eq_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_eq_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_u16_e64 + +v_cmp_f_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_f_f16_e64 + +v_cmp_f_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_f_f16_e64 + +v_cmp_ge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_f16_e64 + +v_cmp_ge_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_i16_e64 + +v_cmp_ge_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_ge_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_u16_e64 + +v_cmp_gt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_f16_e64 + +v_cmp_gt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_i16_e64 + +v_cmp_gt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_gt_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_u16_e64 + +v_cmp_le_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_f16_e64 + +v_cmp_le_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_i16_e64 + +v_cmp_le_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_u16_e64 + +v_cmp_le_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_u16_e64 + +v_cmp_lg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lg_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lg_f16_e64 + +v_cmp_lt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_f16_e64 + +v_cmp_lt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_i16_e64 + +v_cmp_lt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_lt_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_u16_e64 + +v_cmp_ne_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_i16_e64 + +v_cmp_ne_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_ne_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_u16_e64 + +v_cmp_neq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_neq_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_neq_f16_e64 + +v_cmp_nge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_nge_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nge_f16_e64 + +v_cmp_ngt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_ngt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64 + +v_cmp_nle_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nle_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nle_f16_e64 + +v_cmp_nlg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlg_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64 + +v_cmp_nlt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_nlt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64 + +v_cmp_o_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_o_f16_e64 + +v_cmp_o_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_o_f16_e64 + +v_cmp_t_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_t_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_tru_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64 + +v_cmp_u_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_u_f16_e64 + +v_cmp_u_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_u_f16_e64 + diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s new file mode 100644 index 0000000..b8459c8 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s @@ -0,0 +1,542 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error %s + +v_cmpx_class_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_eq_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_eq_i16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_eq_u16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_f_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_ge_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_ge_i16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_ge_u16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_gt_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_gt_i16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_gt_u16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_le_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_le_i16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_le_u16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_lg_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_lt_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_lt_i16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_lt_u16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_ne_i16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_ne_u16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_neq_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_nge_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_ngt_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_nle_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_nlg_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_nlt_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_o_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_t_f16_e32 v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmpx_tru_f16_e32 v1, v255 +// GFX11: error: invalid operand for instruction + +v_cmpx_u_f16_e32 v1, v255 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_class_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_eq_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_eq_i16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_eq_u16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_f_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_ge_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_ge_i16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_ge_u16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_gt_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_gt_i16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_gt_u16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_le_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_le_i16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_le_u16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_lg_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_lt_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_lt_i16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_lt_u16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_ne_i16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_ne_u16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_neq_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_nge_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_ngt_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_nle_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_nlg_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_nlt_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_o_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_t_f16_e32 v255, v2 +// GFX11: error: invalid operand for instruction + +v_cmpx_tru_f16_e32 v255, v2 +// GFX11: error: invalid operand for instruction + +v_cmpx_u_f16_e32 v255, v2 +// GFX11: error: operands are not valid for this GPU or mode + +v_cmpx_class_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_eq_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_eq_i16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_eq_u16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_f_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ge_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ge_i16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ge_u16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_gt_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_gt_i16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_gt_u16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_le_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_le_i16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_le_u16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lg_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lt_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lt_i16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lt_u16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ne_i16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ne_u16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_neq_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nge_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ngt_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nle_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nlg_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nlt_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_o_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_t_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_tru_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_u_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_class_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_eq_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_eq_i16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_eq_u16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_f_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ge_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ge_i16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ge_u16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_gt_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_gt_i16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_gt_u16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_le_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_le_i16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_le_u16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lg_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lt_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lt_i16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lt_u16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ne_i16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ne_u16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_neq_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nge_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ngt_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nle_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nlg_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nlt_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_o_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_t_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_tru_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_u_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_class_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_eq_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_eq_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_eq_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_f_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ge_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ge_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ge_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_gt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_gt_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_gt_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_le_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_le_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_le_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lg_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lt_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lt_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ne_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ne_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_neq_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nge_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ngt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nle_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nlg_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nlt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_o_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_t_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_tru_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_u_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_class_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_eq_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_eq_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_eq_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_f_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ge_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ge_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ge_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_gt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_gt_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_gt_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_le_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_le_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_le_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lg_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lt_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_lt_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ne_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ne_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_neq_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nge_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_ngt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nle_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nlg_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_nlt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_o_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_t_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_tru_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + +v_cmpx_u_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: error: invalid operand for instruction + diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s new file mode 100644 index 0000000..074fd10 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s @@ -0,0 +1,542 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 %s + +v_cmpx_class_f16 v1, v255 +// GFX11: v_cmpx_class_f16_e64 + +v_cmpx_eq_f16 v1, v255 +// GFX11: v_cmpx_eq_f16_e64 + +v_cmpx_eq_i16 v1, v255 +// GFX11: v_cmpx_eq_i16_e64 + +v_cmpx_eq_u16 v1, v255 +// GFX11: v_cmpx_eq_u16_e64 + +v_cmpx_f_f16 v1, v255 +// GFX11: v_cmpx_f_f16_e64 + +v_cmpx_ge_f16 v1, v255 +// GFX11: v_cmpx_ge_f16_e64 + +v_cmpx_ge_i16 v1, v255 +// GFX11: v_cmpx_ge_i16_e64 + +v_cmpx_ge_u16 v1, v255 +// GFX11: v_cmpx_ge_u16_e64 + +v_cmpx_gt_f16 v1, v255 +// GFX11: v_cmpx_gt_f16_e64 + +v_cmpx_gt_i16 v1, v255 +// GFX11: v_cmpx_gt_i16_e64 + +v_cmpx_gt_u16 v1, v255 +// GFX11: v_cmpx_gt_u16_e64 + +v_cmpx_le_f16 v1, v255 +// GFX11: v_cmpx_le_f16_e64 + +v_cmpx_le_i16 v1, v255 +// GFX11: v_cmpx_le_i16_e64 + +v_cmpx_le_u16 v1, v255 +// GFX11: v_cmpx_le_u16_e64 + +v_cmpx_lg_f16 v1, v255 +// GFX11: v_cmpx_lg_f16_e64 + +v_cmpx_lt_f16 v1, v255 +// GFX11: v_cmpx_lt_f16_e64 + +v_cmpx_lt_i16 v1, v255 +// GFX11: v_cmpx_lt_i16_e64 + +v_cmpx_lt_u16 v1, v255 +// GFX11: v_cmpx_lt_u16_e64 + +v_cmpx_ne_i16 v1, v255 +// GFX11: v_cmpx_ne_i16_e64 + +v_cmpx_ne_u16 v1, v255 +// GFX11: v_cmpx_ne_u16_e64 + +v_cmpx_neq_f16 v1, v255 +// GFX11: v_cmpx_neq_f16_e64 + +v_cmpx_nge_f16 v1, v255 +// GFX11: v_cmpx_nge_f16_e64 + +v_cmpx_ngt_f16 v1, v255 +// GFX11: v_cmpx_ngt_f16_e64 + +v_cmpx_nle_f16 v1, v255 +// GFX11: v_cmpx_nle_f16_e64 + +v_cmpx_nlg_f16 v1, v255 +// GFX11: v_cmpx_nlg_f16_e64 + +v_cmpx_nlt_f16 v1, v255 +// GFX11: v_cmpx_nlt_f16_e64 + +v_cmpx_o_f16 v1, v255 +// GFX11: v_cmpx_o_f16_e64 + +v_cmpx_t_f16 v1, v255 +// GFX11: v_cmpx_t_f16_e64 + +v_cmpx_tru_f16 v1, v255 +// GFX11: v_cmpx_t_f16_e64 + +v_cmpx_u_f16 v1, v255 +// GFX11: v_cmpx_u_f16_e64 + +v_cmpx_class_f16 v255, v2 +// GFX11: v_cmpx_class_f16_e64 + +v_cmpx_eq_f16 v255, v2 +// GFX11: v_cmpx_eq_f16_e64 + +v_cmpx_eq_i16 v255, v2 +// GFX11: v_cmpx_eq_i16_e64 + +v_cmpx_eq_u16 v255, v2 +// GFX11: v_cmpx_eq_u16_e64 + +v_cmpx_f_f16 v255, v2 +// GFX11: v_cmpx_f_f16_e64 + +v_cmpx_ge_f16 v255, v2 +// GFX11: v_cmpx_ge_f16_e64 + +v_cmpx_ge_i16 v255, v2 +// GFX11: v_cmpx_ge_i16_e64 + +v_cmpx_ge_u16 v255, v2 +// GFX11: v_cmpx_ge_u16_e64 + +v_cmpx_gt_f16 v255, v2 +// GFX11: v_cmpx_gt_f16_e64 + +v_cmpx_gt_i16 v255, v2 +// GFX11: v_cmpx_gt_i16_e64 + +v_cmpx_gt_u16 v255, v2 +// GFX11: v_cmpx_gt_u16_e64 + +v_cmpx_le_f16 v255, v2 +// GFX11: v_cmpx_le_f16_e64 + +v_cmpx_le_i16 v255, v2 +// GFX11: v_cmpx_le_i16_e64 + +v_cmpx_le_u16 v255, v2 +// GFX11: v_cmpx_le_u16_e64 + +v_cmpx_lg_f16 v255, v2 +// GFX11: v_cmpx_lg_f16_e64 + +v_cmpx_lt_f16 v255, v2 +// GFX11: v_cmpx_lt_f16_e64 + +v_cmpx_lt_i16 v255, v2 +// GFX11: v_cmpx_lt_i16_e64 + +v_cmpx_lt_u16 v255, v2 +// GFX11: v_cmpx_lt_u16_e64 + +v_cmpx_ne_i16 v255, v2 +// GFX11: v_cmpx_ne_i16_e64 + +v_cmpx_ne_u16 v255, v2 +// GFX11: v_cmpx_ne_u16_e64 + +v_cmpx_neq_f16 v255, v2 +// GFX11: v_cmpx_neq_f16_e64 + +v_cmpx_nge_f16 v255, v2 +// GFX11: v_cmpx_nge_f16_e64 + +v_cmpx_ngt_f16 v255, v2 +// GFX11: v_cmpx_ngt_f16_e64 + +v_cmpx_nle_f16 v255, v2 +// GFX11: v_cmpx_nle_f16_e64 + +v_cmpx_nlg_f16 v255, v2 +// GFX11: v_cmpx_nlg_f16_e64 + +v_cmpx_nlt_f16 v255, v2 +// GFX11: v_cmpx_nlt_f16_e64 + +v_cmpx_o_f16 v255, v2 +// GFX11: v_cmpx_o_f16_e64 + +v_cmpx_t_f16 v255, v2 +// GFX11: v_cmpx_t_f16_e64 + +v_cmpx_tru_f16 v255, v2 +// GFX11: v_cmpx_t_f16_e64 + +v_cmpx_u_f16 v255, v2 +// GFX11: v_cmpx_u_f16_e64 + +v_cmpx_class_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_class_f16_e64 + +v_cmpx_eq_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_eq_f16_e64 + +v_cmpx_eq_i16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_eq_i16_e64 + +v_cmpx_eq_u16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_eq_u16_e64 + +v_cmpx_f_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_f_f16_e64 + +v_cmpx_ge_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ge_f16_e64 + +v_cmpx_ge_i16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ge_i16_e64 + +v_cmpx_ge_u16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ge_u16_e64 + +v_cmpx_gt_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_gt_f16_e64 + +v_cmpx_gt_i16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_gt_i16_e64 + +v_cmpx_gt_u16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_gt_u16_e64 + +v_cmpx_le_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_le_f16_e64 + +v_cmpx_le_i16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_le_i16_e64 + +v_cmpx_le_u16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_le_u16_e64 + +v_cmpx_lg_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lg_f16_e64 + +v_cmpx_lt_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lt_f16_e64 + +v_cmpx_lt_i16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lt_i16_e64 + +v_cmpx_lt_u16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lt_u16_e64 + +v_cmpx_ne_i16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ne_i16_e64 + +v_cmpx_ne_u16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ne_u16_e64 + +v_cmpx_neq_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_neq_f16_e64 + +v_cmpx_nge_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_nge_f16_e64 + +v_cmpx_ngt_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ngt_f16_e64 + +v_cmpx_nle_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_nle_f16_e64 + +v_cmpx_nlg_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_nlg_f16_e64 + +v_cmpx_nlt_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_nlt_f16_e64 + +v_cmpx_o_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_o_f16_e64 + +v_cmpx_t_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_t_f16_e64 + +v_cmpx_tru_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_t_f16_e64 + +v_cmpx_u_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_u_f16_e64 + +v_cmpx_class_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_class_f16_e64 + +v_cmpx_eq_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_eq_f16_e64 + +v_cmpx_eq_i16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_eq_i16_e64 + +v_cmpx_eq_u16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_eq_u16_e64 + +v_cmpx_f_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_f_f16_e64 + +v_cmpx_ge_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ge_f16_e64 + +v_cmpx_ge_i16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ge_i16_e64 + +v_cmpx_ge_u16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ge_u16_e64 + +v_cmpx_gt_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_gt_f16_e64 + +v_cmpx_gt_i16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_gt_i16_e64 + +v_cmpx_gt_u16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_gt_u16_e64 + +v_cmpx_le_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_le_f16_e64 + +v_cmpx_le_i16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_le_i16_e64 + +v_cmpx_le_u16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_le_u16_e64 + +v_cmpx_lg_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lg_f16_e64 + +v_cmpx_lt_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lt_f16_e64 + +v_cmpx_lt_i16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lt_i16_e64 + +v_cmpx_lt_u16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lt_u16_e64 + +v_cmpx_ne_i16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ne_i16_e64 + +v_cmpx_ne_u16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ne_u16_e64 + +v_cmpx_neq_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_neq_f16_e64 + +v_cmpx_nge_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_nge_f16_e64 + +v_cmpx_ngt_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ngt_f16_e64 + +v_cmpx_nle_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_nle_f16_e64 + +v_cmpx_nlg_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_nlg_f16_e64 + +v_cmpx_nlt_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_nlt_f16_e64 + +v_cmpx_o_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_o_f16_e64 + +v_cmpx_t_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_t_f16_e64 + +v_cmpx_tru_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_t_f16_e64 + +v_cmpx_u_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_u_f16_e64 + +v_cmpx_class_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_class_f16_e64 + +v_cmpx_eq_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_eq_f16_e64 + +v_cmpx_eq_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_eq_i16_e64 + +v_cmpx_eq_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_eq_u16_e64 + +v_cmpx_f_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_f_f16_e64 + +v_cmpx_ge_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ge_f16_e64 + +v_cmpx_ge_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ge_i16_e64 + +v_cmpx_ge_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ge_u16_e64 + +v_cmpx_gt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_gt_f16_e64 + +v_cmpx_gt_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_gt_i16_e64 + +v_cmpx_gt_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_gt_u16_e64 + +v_cmpx_le_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_le_f16_e64 + +v_cmpx_le_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_le_i16_e64 + +v_cmpx_le_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_le_u16_e64 + +v_cmpx_lg_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lg_f16_e64 + +v_cmpx_lt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_f16_e64 + +v_cmpx_lt_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_i16_e64 + +v_cmpx_lt_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_u16_e64 + +v_cmpx_ne_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ne_i16_e64 + +v_cmpx_ne_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ne_u16_e64 + +v_cmpx_neq_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_neq_f16_e64 + +v_cmpx_nge_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_nge_f16_e64 + +v_cmpx_ngt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ngt_f16_e64 + +v_cmpx_nle_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_nle_f16_e64 + +v_cmpx_nlg_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_nlg_f16_e64 + +v_cmpx_nlt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_nlt_f16_e64 + +v_cmpx_o_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_o_f16_e64 + +v_cmpx_t_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_t_f16_e64 + +v_cmpx_tru_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_t_f16_e64 + +v_cmpx_u_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_u_f16_e64 + +v_cmpx_class_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_class_f16_e64 + +v_cmpx_eq_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_eq_f16_e64 + +v_cmpx_eq_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_eq_i16_e64 + +v_cmpx_eq_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_eq_u16_e64 + +v_cmpx_f_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_f_f16_e64 + +v_cmpx_ge_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ge_f16_e64 + +v_cmpx_ge_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ge_i16_e64 + +v_cmpx_ge_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ge_u16_e64 + +v_cmpx_gt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_gt_f16_e64 + +v_cmpx_gt_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_gt_i16_e64 + +v_cmpx_gt_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_gt_u16_e64 + +v_cmpx_le_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_le_f16_e64 + +v_cmpx_le_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_le_i16_e64 + +v_cmpx_le_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_le_u16_e64 + +v_cmpx_lg_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lg_f16_e64 + +v_cmpx_lt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_f16_e64 + +v_cmpx_lt_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_i16_e64 + +v_cmpx_lt_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_u16_e64 + +v_cmpx_ne_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ne_i16_e64 + +v_cmpx_ne_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ne_u16_e64 + +v_cmpx_neq_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_neq_f16_e64 + +v_cmpx_nge_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_nge_f16_e64 + +v_cmpx_ngt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ngt_f16_e64 + +v_cmpx_nle_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_nle_f16_e64 + +v_cmpx_nlg_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_nlg_f16_e64 + +v_cmpx_nlt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_nlt_f16_e64 + +v_cmpx_o_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_o_f16_e64 + +v_cmpx_t_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_t_f16_e64 + +v_cmpx_tru_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_t_f16_e64 + +v_cmpx_u_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_u_f16_e64 +