From 355103f6c0f869028f3739cea663dddaaa08da48 Mon Sep 17 00:00:00 2001 From: Valery Pykhtin Date: Fri, 23 Sep 2016 09:08:07 +0000 Subject: [PATCH] [AMDGPU] Refactor VOP1 and VOP2 instruction TD definitions Differential revision: https://reviews.llvm.org/D24738 llvm-svn: 282234 --- llvm/lib/Target/AMDGPU/CIInstructions.td | 34 +- llvm/lib/Target/AMDGPU/SIInstrFormats.td | 166 ------ llvm/lib/Target/AMDGPU/SIInstrInfo.td | 782 ++--------------------------- llvm/lib/Target/AMDGPU/SIInstructions.td | 422 ---------------- llvm/lib/Target/AMDGPU/VIInstrFormats.td | 157 ------ llvm/lib/Target/AMDGPU/VIInstructions.td | 106 ---- llvm/lib/Target/AMDGPU/VOP1Instructions.td | 539 ++++++++++++++++++++ llvm/lib/Target/AMDGPU/VOP2Instructions.td | 608 ++++++++++++++++++++++ llvm/lib/Target/AMDGPU/VOP3Instructions.td | 30 +- llvm/lib/Target/AMDGPU/VOPCInstructions.td | 50 +- llvm/lib/Target/AMDGPU/VOPInstructions.td | 176 ++++++- 11 files changed, 1379 insertions(+), 1691 deletions(-) create mode 100644 llvm/lib/Target/AMDGPU/VOP1Instructions.td create mode 100644 llvm/lib/Target/AMDGPU/VOP2Instructions.td diff --git a/llvm/lib/Target/AMDGPU/CIInstructions.td b/llvm/lib/Target/AMDGPU/CIInstructions.td index 35bf31d..26a483a 100644 --- a/llvm/lib/Target/AMDGPU/CIInstructions.td +++ b/llvm/lib/Target/AMDGPU/CIInstructions.td @@ -12,36 +12,4 @@ // S_CBRANCH_CDBGUSER // S_CBRANCH_CDBGSYS // S_CBRANCH_CDBGSYS_OR_USER -// S_CBRANCH_CDBGSYS_AND_USER - -//===----------------------------------------------------------------------===// -// VOP1 Instructions -//===----------------------------------------------------------------------===// - -let SubtargetPredicate = isCIVI in { - -let SchedRW = [WriteDoubleAdd] in { -defm V_TRUNC_F64 : VOP1Inst , "v_trunc_f64", - VOP_F64_F64, ftrunc ->; -defm V_CEIL_F64 : VOP1Inst , "v_ceil_f64", - VOP_F64_F64, fceil ->; -defm V_FLOOR_F64 : VOP1Inst , "v_floor_f64", - VOP_F64_F64, ffloor ->; -defm V_RNDNE_F64 : VOP1Inst , "v_rndne_f64", - VOP_F64_F64, frint ->; -} // End SchedRW = [WriteDoubleAdd] - -let SchedRW = [WriteQuarterRate32] in { -defm V_LOG_LEGACY_F32 : VOP1Inst , "v_log_legacy_f32", - VOP_F32_F32 ->; -defm V_EXP_LEGACY_F32 : VOP1Inst , "v_exp_legacy_f32", - VOP_F32_F32 ->; -} // End SchedRW = [WriteQuarterRate32] - -} // End SubtargetPredicate = isCIVI +// S_CBRANCH_CDBGSYS_AND_USER \ No newline at end of file diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index f1599bc..91e7065 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -145,161 +145,6 @@ class Enc64 { class VOPDstOperand : RegisterOperand ; -let Uses = [EXEC] in { - -class VOPAnyCommon pattern> : - InstSI { - - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; - let UseNamedOperandTable = 1; - let VALU = 1; -} - -class VOP1Common pattern> : - VOPAnyCommon { - - let VOP1 = 1; - let Size = 4; -} - -class VOP2Common pattern> : - VOPAnyCommon { - - let VOP2 = 1; - let Size = 4; -} - -class VOP3Common pattern = [], bit HasMods = 0, - bit VOP3Only = 0> : - VOPAnyCommon { - - // Using complex patterns gives VOP3 patterns a very high complexity rating, - // but standalone patterns are almost always prefered, so we need to adjust the - // priority lower. The goal is to use a high number to reduce complexity to - // zero (or less than zero). - let AddedComplexity = -1000; - - let VOP3 = 1; - let VALU = 1; - - let AsmMatchConverter = - !if(!eq(VOP3Only,1), - "cvtVOP3", - !if(!eq(HasMods,1), "cvtVOP3_2_mod", "")); - - let AsmVariantName = AMDGPUAsmVariants.VOP3; - - let isCodeGenOnly = 0; - - int Size = 8; - - // Because SGPRs may be allowed if there are multiple operands, we - // need a post-isel hook to insert copies in order to avoid - // violating constant bus requirements. - let hasPostISelHook = 1; -} - -} // End Uses = [EXEC] - -//===----------------------------------------------------------------------===// -// Vector ALU operations -//===----------------------------------------------------------------------===// - -class VOP1e op> : Enc32 { - bits<8> vdst; - bits<9> src0; - - let Inst{8-0} = src0; - let Inst{16-9} = op; - let Inst{24-17} = vdst; - let Inst{31-25} = 0x3f; //encoding -} - -class VOP2e op> : Enc32 { - bits<8> vdst; - bits<9> src0; - bits<8> src1; - - let Inst{8-0} = src0; - let Inst{16-9} = src1; - let Inst{24-17} = vdst; - let Inst{30-25} = op; - let Inst{31} = 0x0; //encoding -} - -class VOP2_MADKe op> : Enc64 { - - bits<8> vdst; - bits<9> src0; - bits<8> src1; - bits<32> imm; - - let Inst{8-0} = src0; - let Inst{16-9} = src1; - let Inst{24-17} = vdst; - let Inst{30-25} = op; - let Inst{31} = 0x0; // encoding - let Inst{63-32} = imm; -} - -class VOP3a op> : Enc64 { - bits<2> src0_modifiers; - bits<9> src0; - bits<2> src1_modifiers; - bits<9> src1; - bits<2> src2_modifiers; - bits<9> src2; - bits<1> clamp; - bits<2> omod; - - let Inst{8} = src0_modifiers{1}; - let Inst{9} = src1_modifiers{1}; - let Inst{10} = src2_modifiers{1}; - let Inst{11} = clamp; - let Inst{25-17} = op; - let Inst{31-26} = 0x34; //encoding - let Inst{40-32} = src0; - let Inst{49-41} = src1; - let Inst{58-50} = src2; - let Inst{60-59} = omod; - let Inst{61} = src0_modifiers{0}; - let Inst{62} = src1_modifiers{0}; - let Inst{63} = src2_modifiers{0}; -} - -class VOP3e op> : VOP3a { - bits<8> vdst; - - let Inst{7-0} = vdst; -} - -class VOP3be op> : Enc64 { - bits<8> vdst; - bits<2> src0_modifiers; - bits<9> src0; - bits<2> src1_modifiers; - bits<9> src1; - bits<2> src2_modifiers; - bits<9> src2; - bits<7> sdst; - bits<2> omod; - - let Inst{7-0} = vdst; - let Inst{14-8} = sdst; - let Inst{25-17} = op; - let Inst{31-26} = 0x34; //encoding - let Inst{40-32} = src0; - let Inst{49-41} = src1; - let Inst{58-50} = src2; - let Inst{60-59} = omod; - let Inst{61} = src0_modifiers{0}; - let Inst{62} = src1_modifiers{0}; - let Inst{63} = src2_modifiers{0}; -} - class VINTRPe op> : Enc32 { bits<8> vdst; bits<8> vsrc; @@ -369,17 +214,6 @@ class EXPe : Enc64 { let Uses = [EXEC] in { -class VOP1 op, dag outs, dag ins, string asm, list pattern> : - VOP1Common , - VOP1e { - let isCodeGenOnly = 0; -} - -class VOP2 op, dag outs, dag ins, string asm, list pattern> : - VOP2Common , VOP2e { - let isCodeGenOnly = 0; -} - class VINTRPCommon pattern> : InstSI { let mayLoad = 1; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 24e6f86..eb20e0d 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -14,38 +14,6 @@ def isCIOnly : Predicate<"Subtarget->getGeneration() ==" def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">; -class vop { - field bits<9> SI3; - field bits<10> VI3; -} - -class vop1 si, bits<8> vi = si> : vop { - field bits<8> SI = si; - field bits<8> VI = vi; - - field bits<9> SI3 = {1, 1, si{6-0}}; - field bits<10> VI3 = !add(0x140, vi); -} - -class vop2 si, bits<6> vi = si> : vop { - field bits<6> SI = si; - field bits<6> VI = vi; - - field bits<9> SI3 = {1, 0, 0, si{5-0}}; - field bits<10> VI3 = {0, 1, 0, 0, vi{5-0}}; -} - -// Specify a VOP2 opcode for SI and VOP3 opcode for VI -// that doesn't have VOP2 encoding on VI -class vop23 si, bits<10> vi> : vop2 { - let VI3 = vi; -} - -class vop3 si, bits<10> vi = {0, si}> : vop { - let SI3 = si; - let VI3 = vi; -} - // Execpt for the NONE field, this must be kept in sync with the // SIEncodingFamily enum in AMDGPUInstrInfo.cpp def SIEncodingFamily { @@ -639,18 +607,20 @@ class getVOP3SrcForVT { // Returns 1 if the source arguments have modifiers, 0 if they do not. // XXX - do f16 instructions? -class hasModifiers { +class isFloatType { bit ret = + !if(!eq(SrcVT.Value, f16.Value), 1, !if(!eq(SrcVT.Value, f32.Value), 1, !if(!eq(SrcVT.Value, f64.Value), 1, - 0)); + 0))); } -class hasIntModifiers { +class isIntType { bit ret = + !if(!eq(SrcVT.Value, i16.Value), 1, !if(!eq(SrcVT.Value, i32.Value), 1, !if(!eq(SrcVT.Value, i64.Value), 1, - 0)); + 0))); } @@ -756,39 +726,21 @@ class getInsSDWA { + bit ret = !if(a, 1, !if(b, 1, 0)); +} + +class BitAnd { + bit ret = !if(a, !if(b, 1, 0), 0); +} + class VOPProfile _ArgVT> { field list ArgVT = _ArgVT; @@ -918,19 +878,27 @@ class VOPProfile _ArgVT> { field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1); field bit HasDst32 = HasDst; + field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case field int NumSrcArgs = getNumSrcArgs.ret; field bit HasSrc0 = !if(!eq(Src0VT.Value, untyped.Value), 0, 1); field bit HasSrc1 = !if(!eq(Src1VT.Value, untyped.Value), 0, 1); field bit HasSrc2 = !if(!eq(Src2VT.Value, untyped.Value), 0, 1); - field bit HasSrc0Mods = hasModifiers.ret; - field bit HasSrc1Mods = hasModifiers.ret; - field bit HasSrc2Mods = hasModifiers.ret; - field bit HasSrc0IntMods = hasIntModifiers.ret; - field bit HasSrc1IntMods = hasIntModifiers.ret; - field bit HasSrc2IntMods = hasIntModifiers.ret; + // TODO: Modifiers logic is somewhat adhoc here, to be refined later + field bit HasModifiers = isFloatType.ret; + + field bit HasSrc0FloatMods = isFloatType.ret; + field bit HasSrc1FloatMods = isFloatType.ret; + field bit HasSrc2FloatMods = isFloatType.ret; + + field bit HasSrc0IntMods = isIntType.ret; + field bit HasSrc1IntMods = isIntType.ret; + field bit HasSrc2IntMods = isIntType.ret; + + field bit HasSrc0Mods = HasModifiers; + field bit HasSrc1Mods = !if(HasModifiers, BitOr.ret, 0); + field bit HasSrc2Mods = !if(HasModifiers, BitOr.ret, 0); - field bit HasModifiers = HasSrc0Mods; field bit HasOMod = HasModifiers; field bit HasClamp = HasModifiers; field bit HasSDWAClamp = HasSrc0; @@ -997,115 +965,11 @@ def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>; def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>; def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>; -// Restrict src0 to be VGPR -def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> { - let Src0RC32 = VRegSrc_32; - let Src0RC64 = VRegSrc_32; - - let HasExt = 0; -} - -// Special case because there are no true output operands. Hack vdst -// to be a src operand. The custom inserter must add a tied implicit -// def and use of the super register since there seems to be no way to -// add an implicit def of a virtual register in tablegen. -def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> { - let Src0RC32 = VOPDstOperand; - let Src0RC64 = VOPDstOperand; - - let Outs = (outs); - let Ins32 = (ins Src0RC32:$vdst, VSrc_b32:$src0); - let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0); - - let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, - bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); - let InsSDWA = (ins Src0RC32:$vdst, Int32InputMods:$src0_imodifiers, VCSrc_b32:$src0, - clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, - src0_sel:$src0_sel); - - let Asm32 = getAsm32<1, 1>.ret; - let Asm64 = getAsm64<1, 1, 0>.ret; - let AsmDPP = getAsmDPP<1, 1, 0>.ret; - let AsmSDWA = getAsmSDWA<1, 1, 0>.ret; - - let HasExt = 0; - let HasDst = 0; -} - -// Write out to vcc or arbitrary SGPR. -def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> { - let Asm32 = "$vdst, vcc, $src0, $src1"; - let Asm64 = "$vdst, $sdst, $src0, $src1"; - let Outs32 = (outs DstRC:$vdst); - let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); -} - -// Write out to vcc or arbitrary SGPR and read in from vcc or -// arbitrary SGPR. -def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { - // We use VCSrc_b32 to exclude literal constants, even though the - // encoding normally allows them since the implicit VCC use means - // using one would always violate the constant bus - // restriction. SGPRs are still allowed because it should - // technically be possible to use VCC again as src0. - let Src0RC32 = VCSrc_b32; - let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; - let Asm64 = "$vdst, $sdst, $src0, $src1, $src2"; - let Outs32 = (outs DstRC:$vdst); - let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); - - // Suppress src2 implied by type since the 32-bit encoding uses an - // implicit VCC use. - let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); -} - -// Read in from vcc or arbitrary SGPR -def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { - let Src0RC32 = VCSrc_b32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above. - let Asm32 = "$vdst, $src0, $src1, vcc"; - let Asm64 = "$vdst, $src0, $src1, $src2"; - let Outs32 = (outs DstRC:$vdst); - let Outs64 = (outs DstRC:$vdst); - - // Suppress src2 implied by type since the 32-bit encoding uses an - // implicit VCC use. - let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); -} - def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>; def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>; def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>; def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>; -def VOP_MADAK : VOPProfile <[f32, f32, f32, f32]> { - field dag Ins32 = (ins VCSrc_f32:$src0, VGPR_32:$src1, f32kimm:$imm); - field string Asm32 = "$vdst, $src0, $src1, $imm"; - field bit HasExt = 0; -} -def VOP_MADMK : VOPProfile <[f32, f32, f32, f32]> { - field dag Ins32 = (ins VCSrc_f32:$src0, f32kimm:$imm, VGPR_32:$src1); - field string Asm32 = "$vdst, $src0, $imm, $src1"; - field bit HasExt = 0; -} -def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> { - let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2); - let Ins64 = getIns64, 3, - HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret; - let InsDPP = (ins FP32InputMods:$src0_modifiers, Src0RC32:$src0, - FP32InputMods:$src1_modifiers, Src1RC32:$src1, - VGPR_32:$src2, // stub argument - dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, - bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); - let InsSDWA = (ins FP32InputMods:$src0_fmodifiers, Src0RC32:$src0, - FP32InputMods:$src1_fmodifiers, Src1RC32:$src1, - VGPR_32:$src2, // stub argument - clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, - src0_sel:$src0_sel, src1_sel:$src1_sel); - let Asm32 = getAsm32<1, 2, f32>.ret; - let Asm64 = getAsm64<1, 2, HasModifiers, f32>.ret; - let AsmDPP = getAsmDPP<1, 2, HasModifiers, f32>.ret; - let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, f32>.ret; -} def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>; def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>; def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>; @@ -1113,10 +977,6 @@ def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>; def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>; def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>; -class VOP { - string OpName = opName; -} - class Commutable_REV { string RevOp = revOp; bit IsOrig = isOrig; @@ -1127,556 +987,6 @@ class AtomicNoRet { bit IsRet = isRet; } -class VOP1_Pseudo pattern, string opName> : - VOP1Common , - VOP , - SIMCInstr , - MnemonicAlias { - let isPseudo = 1; - let isCodeGenOnly = 1; - - field bits<8> vdst; - field bits<9> src0; -} - -class VOP1_Real_si : - VOP1, - SIMCInstr { - let AssemblerPredicate = SIAssemblerPredicate; - let DecoderNamespace = "SICI"; - let DisableDecoder = DisableSIDecoder; -} - -class VOP1_Real_vi : - VOP1, - SIMCInstr { - let AssemblerPredicates = [isVI]; - let DecoderNamespace = "VI"; - let DisableDecoder = DisableVIDecoder; -} - -multiclass VOP1_m pattern, - string asm = opName#p.Asm32> { - def "" : VOP1_Pseudo ; - - def _si : VOP1_Real_si ; - - def _vi : VOP1_Real_vi ; - -} - -class VOP1_DPP : - VOP1_DPPe , - VOP_DPP { - let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]); - let AsmVariantName = !if(p.HasExt, AMDGPUAsmVariants.DPP, - AMDGPUAsmVariants.Disable); - let DecoderNamespace = "DPP"; - let DisableDecoder = DisableVIDecoder; - let src0_modifiers = !if(p.HasModifiers, ?, 0); - let src1_modifiers = 0; -} - -class SDWADisableFields { - bits<8> src0 = !if(!eq(p.NumSrcArgs, 0), 0, ?); - bits<3> src0_sel = !if(!eq(p.NumSrcArgs, 0), 6, ?); - bits<2> src0_fmodifiers = !if(!eq(p.NumSrcArgs, 0), - 0, - !if(p.HasModifiers, ?, 0)); - bits<1> src0_imodifiers = !if(!eq(p.NumSrcArgs, 0), - 0, - !if(p.HasModifiers, 0, ?)); - bits<3> src1_sel = !if(!eq(p.NumSrcArgs, 0), 6, - !if(!eq(p.NumSrcArgs, 1), 6, - ?)); - bits<2> src1_fmodifiers = !if(!eq(p.NumSrcArgs, 0), 0, - !if(!eq(p.NumSrcArgs, 1), 0, - !if(p.HasModifiers, ?, 0))); - bits<1> src1_imodifiers = !if(!eq(p.NumSrcArgs, 0), 0, - !if(!eq(p.NumSrcArgs, 1), 0, - !if(p.HasModifiers, 0, ?))); - bits<3> dst_sel = !if(p.HasDst, ?, 6); - bits<2> dst_unused = !if(p.HasDst, ?, 2); - bits<1> clamp = !if(!eq(p.NumSrcArgs, 0), 0, ?); -} - -class VOP1_SDWA : - VOP1_SDWAe , - VOP_SDWA , - SDWADisableFields

{ - let AsmMatchConverter = "cvtSdwaVOP1"; - let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]); - let AsmVariantName = !if(p.HasExt, AMDGPUAsmVariants.SDWA, - AMDGPUAsmVariants.Disable); - let DecoderNamespace = "SDWA"; - let DisableDecoder = DisableVIDecoder; -} - -multiclass VOP1SI_m pattern, - string asm = opName#p.Asm32> { - - def "" : VOP1_Pseudo ; - - def _si : VOP1_Real_si ; -} - -class VOP2_Pseudo pattern, string opName> : - VOP2Common , - VOP , - SIMCInstr, - MnemonicAlias { - let isPseudo = 1; - let isCodeGenOnly = 1; -} - -class VOP2_Real_si : - VOP2 , - SIMCInstr { - let AssemblerPredicates = [isSICI]; - let DecoderNamespace = "SICI"; - let DisableDecoder = DisableSIDecoder; -} - -class VOP2_Real_vi : - VOP2 , - SIMCInstr { - let AssemblerPredicates = [isVI]; - let DecoderNamespace = "VI"; - let DisableDecoder = DisableVIDecoder; -} - -multiclass VOP2SI_m pattern, - string revOp> { - - def "" : VOP2_Pseudo , - Commutable_REV; - - def _si : VOP2_Real_si ; -} - -multiclass VOP2_m pattern, - string revOp> { - - def "" : VOP2_Pseudo , - Commutable_REV; - - def _si : VOP2_Real_si ; - - def _vi : VOP2_Real_vi ; - -} - -class VOP2_DPP : - VOP2_DPPe , - VOP_DPP { - let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]); - let AsmVariantName = !if(p.HasExt, AMDGPUAsmVariants.DPP, - AMDGPUAsmVariants.Disable); - let DecoderNamespace = "DPP"; - let DisableDecoder = DisableVIDecoder; - let src0_modifiers = !if(p.HasModifiers, ?, 0); - let src1_modifiers = !if(p.HasModifiers, ?, 0); -} - -class VOP2_SDWA : - VOP2_SDWAe , - VOP_SDWA , - SDWADisableFields

{ - let AsmMatchConverter = "cvtSdwaVOP2"; - let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]); - let AsmVariantName = !if(p.HasExt, AMDGPUAsmVariants.SDWA, - AMDGPUAsmVariants.Disable); - let DecoderNamespace = "SDWA"; - let DisableDecoder = DisableVIDecoder; -} - -class VOP3DisableFields { - - bits<2> src0_modifiers = !if(HasModifiers, ?, 0); - bits<2> src1_modifiers = !if(HasModifiers, !if(HasSrc1, ?, 0), 0); - bits<2> src2_modifiers = !if(HasModifiers, !if(HasSrc2, ?, 0), 0); - bits<2> omod = !if(HasModifiers, ?, 0); - bits<1> clamp = !if(HasModifiers, ?, 0); - bits<9> src1 = !if(HasSrc1, ?, 0); - bits<9> src2 = !if(HasSrc2, ?, 0); -} - -class VOP3DisableModFields { - bits<2> src0_modifiers = !if(HasSrc0Mods, ?, 0); - bits<2> src1_modifiers = !if(HasSrc1Mods, ?, 0); - bits<2> src2_modifiers = !if(HasSrc2Mods, ?, 0); - bits<2> omod = !if(HasOutputMods, ?, 0); - bits<1> clamp = !if(HasOutputMods, ?, 0); -} - -class VOP3_Pseudo pattern, string opName, - bit HasMods = 0, bit VOP3Only = 0> : - VOP3Common , - VOP , - SIMCInstr, - MnemonicAlias { - let isPseudo = 1; - let isCodeGenOnly = 1; - - field bit vdst; - field bit src0; -} - -class VOP3_Real_si op, dag outs, dag ins, string asm, string opName, - bit HasMods = 0, bit VOP3Only = 0> : - VOP3Common , - VOP3e , - SIMCInstr { - let AssemblerPredicates = [isSICI]; - let DecoderNamespace = "SICI"; - let DisableDecoder = DisableSIDecoder; -} - -class VOP3_Real_vi op, dag outs, dag ins, string asm, string opName, - bit HasMods = 0, bit VOP3Only = 0> : - VOP3Common , - VOP3e_vi , - SIMCInstr { - let AssemblerPredicates = [isVI]; - let DecoderNamespace = "VI"; - let DisableDecoder = DisableVIDecoder; -} - -class VOP3b_Real_si op, dag outs, dag ins, string asm, string opName, - bit HasMods = 0, bit VOP3Only = 0> : - VOP3Common , - VOP3be , - SIMCInstr { - let AssemblerPredicates = [isSICI]; - let DecoderNamespace = "SICI"; - let DisableDecoder = DisableSIDecoder; -} - -class VOP3b_Real_vi op, dag outs, dag ins, string asm, string opName, - bit HasMods = 0, bit VOP3Only = 0> : - VOP3Common , - VOP3be_vi , - SIMCInstr { - let AssemblerPredicates = [isVI]; - let DecoderNamespace = "VI"; - let DisableDecoder = DisableVIDecoder; -} - -class VOP3e_Real_si op, dag outs, dag ins, string asm, string opName, - bit HasMods = 0, bit VOP3Only = 0> : - VOP3Common , - VOP3e , - SIMCInstr { - let AssemblerPredicates = [isSICI]; - let DecoderNamespace = "SICI"; - let DisableDecoder = DisableSIDecoder; -} - -class VOP3e_Real_vi op, dag outs, dag ins, string asm, string opName, - bit HasMods = 0, bit VOP3Only = 0> : - VOP3Common , - VOP3e_vi , - SIMCInstr { - let AssemblerPredicates = [isVI]; - let DecoderNamespace = "VI"; - let DisableDecoder = DisableVIDecoder; -} - -multiclass VOP3_1_m pattern, string opName, bit HasMods = 1> { - - def "" : VOP3_Pseudo ; - - def _si : VOP3_Real_si , - VOP3DisableFields<0, 0, HasMods>; - - def _vi : VOP3_Real_vi , - VOP3DisableFields<0, 0, HasMods>; -} - -multiclass VOP3SI_1_m pattern, string opName, bit HasMods = 1> { - - def "" : VOP3_Pseudo ; - - def _si : VOP3_Real_si , - VOP3DisableFields<0, 0, HasMods>; - // No VI instruction. This class is for SI only. -} - -multiclass VOP3_2_m pattern, string opName, string revOp, - bit HasMods = 1> { - - def "" : VOP3_Pseudo , - Commutable_REV; - - def _si : VOP3_Real_si , - VOP3DisableFields<1, 0, HasMods>; - - def _vi : VOP3_Real_vi , - VOP3DisableFields<1, 0, HasMods>; -} - -multiclass VOP3SI_2_m pattern, string opName, string revOp, - bit HasMods = 1> { - - def "" : VOP3_Pseudo , - Commutable_REV; - - def _si : VOP3_Real_si , - VOP3DisableFields<1, 0, HasMods>; - - // No VI instruction. This class is for SI only. -} - -// Two operand VOP3b instruction that may have a 3rd SGPR bool operand -// instead of an implicit VCC as in the VOP2b format. -multiclass VOP3b_2_3_m pattern, string opName, string revOp, - bit HasMods = 1, bit useSrc2Input = 0, bit VOP3Only = 0> { - def "" : VOP3_Pseudo ; - - def _si : VOP3b_Real_si , - VOP3DisableFields<1, useSrc2Input, HasMods>; - - def _vi : VOP3b_Real_vi , - VOP3DisableFields<1, useSrc2Input, HasMods>; -} - -// Same as VOP3b_2_3_m but no 2nd destination (sdst), e.g. v_cndmask_b32. -multiclass VOP3e_2_3_m pattern, string opName, string revOp, - bit HasMods = 1, bit useSrc2Input = 0, bit VOP3Only = 0> { - def "" : VOP3_Pseudo ; - - def _si : VOP3e_Real_si , - VOP3DisableFields<1, useSrc2Input, HasMods>; - - def _vi : VOP3e_Real_vi , - VOP3DisableFields<1, useSrc2Input, HasMods>; -} - - -// An instruction that is VOP2 on SI and VOP3 on VI, no modifiers. -multiclass VOP2SI_3VI_m pattern = []> { - let isPseudo = 1, isCodeGenOnly = 1 in { - def "" : VOPAnyCommon , - SIMCInstr; - } - - def _si : VOP2 , - SIMCInstr { - let AssemblerPredicates = [isSICI]; - let DecoderNamespace = "SICI"; - let DisableDecoder = DisableSIDecoder; - } - - def _vi : VOP3Common , - VOP3e_vi , - VOP3DisableFields <1, 0, 0>, - SIMCInstr { - let AssemblerPredicates = [isVI]; - let DecoderNamespace = "VI"; - let DisableDecoder = DisableVIDecoder; - } -} - -multiclass VOP1_Helper pat32, - list pat64> { - - defm _e32 : VOP1_m ; - - defm _e64 : VOP3_1_m ; - - def _dpp : VOP1_DPP ; - - def _sdwa : VOP1_SDWA ; -} - -multiclass VOP1Inst : VOP1_Helper < - op, opName, P, [], - !if(P.HasModifiers, - [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, - i32:$src0_modifiers, i1:$clamp, i32:$omod))))], - [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]) ->; - -multiclass VOP1InstSI { - - defm _e32 : VOP1SI_m ; - - defm _e64 : VOP3SI_1_m ; -} - -multiclass VOP2_Helper pat32, - list pat64, string revOp> { - - defm _e32 : VOP2_m ; - - defm _e64 : VOP3_2_m ; - - def _dpp : VOP2_DPP ; - - def _sdwa : VOP2_SDWA ; -} - -multiclass VOP2Inst : VOP2_Helper < - op, opName, P, [], - !if(P.HasModifiers, - [(set P.DstVT:$vdst, - (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, - i1:$clamp, i32:$omod)), - (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], - [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]), - revOp ->; - -multiclass VOP2InstSI { - - defm _e32 : VOP2SI_m ; - - defm _e64 : VOP3SI_2_m ; -} - -multiclass VOP2e_Helper pat32, list pat64, - string revOp, bit useSGPRInput> { - - let SchedRW = [Write32Bit] in { - let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in { - defm _e32 : VOP2_m ; - } - - defm _e64 : VOP3e_2_3_m ; - } -} - -multiclass VOP2eInst : VOP2e_Helper < - op, opName, P, [], - !if(P.HasModifiers, - [(set P.DstVT:$vdst, - (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, - i1:$clamp, i32:$omod)), - (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], - [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]), - revOp, !eq(P.NumSrcArgs, 3) ->; - -multiclass VOP2b_Helper pat32, list pat64, - string revOp, bit useSGPRInput> { - - let SchedRW = [Write32Bit, WriteSALU] in { - let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { - defm _e32 : VOP2_m ; - } - - defm _e64 : VOP3b_2_3_m ; - } -} - -multiclass VOP2bInst : VOP2b_Helper < - op, opName, P, [], - !if(P.HasModifiers, - [(set P.DstVT:$vdst, - (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, - i1:$clamp, i32:$omod)), - (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], - [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]), - revOp, !eq(P.NumSrcArgs, 3) ->; - -// A VOP2 instruction that is VOP3-only on VI. -multiclass VOP2_VI3_Helper pat32, list pat64, string revOp> { - - defm _e32 : VOP2SI_m ; - - defm _e64 : VOP3_2_m ; -} - -multiclass VOP2_VI3_Inst - : VOP2_VI3_Helper < - op, opName, P, [], - !if(P.HasModifiers, - [(set P.DstVT:$vdst, - (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, - i1:$clamp, i32:$omod)), - (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], - [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]), - revOp ->; - -multiclass VOP2MADK pattern = []> { - - def "" : VOP2_Pseudo ; - -let isCodeGenOnly = 0 in { - def _si : VOP2Common , - SIMCInstr , - VOP2_MADKe { - let AssemblerPredicates = [isSICI]; - let DecoderNamespace = "SICI"; - let DisableDecoder = DisableSIDecoder; - } - - def _vi : VOP2Common , - SIMCInstr , - VOP2_MADKe { - let AssemblerPredicates = [isVI]; - let DecoderNamespace = "VI"; - let DisableDecoder = DisableVIDecoder; - } -} // End isCodeGenOnly = 0 -} - -class Vop3ModPat : Pat< - (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), - (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), - (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))), - (Inst i32:$src0_modifiers, P.Src0VT:$src0, - i32:$src1_modifiers, P.Src1VT:$src1, - i32:$src2_modifiers, P.Src2VT:$src2, - i1:$clamp, - i32:$omod)>; - //===----------------------------------------------------------------------===// // Interpolation opcodes //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index de6c2bd..60c65e9 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -36,236 +36,6 @@ let SubtargetPredicate = isGCN in { defm EXP : EXP_m; //===----------------------------------------------------------------------===// -// VOP1 Instructions -//===----------------------------------------------------------------------===// - -let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in { -defm V_NOP : VOP1Inst , "v_nop", VOP_NONE>; -} - -let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in { -defm V_MOV_B32 : VOP1Inst , "v_mov_b32", VOP_I32_I32>; -} // End isMoveImm = 1 - -let Uses = [EXEC] in { - -// FIXME: Specify SchedRW for READFIRSTLANE_B32 - -def V_READFIRSTLANE_B32 : VOP1 < - 0x00000002, - (outs SReg_32:$vdst), - (ins VGPR_32:$src0), - "v_readfirstlane_b32 $vdst, $src0", - [(set i32:$vdst, (int_amdgcn_readfirstlane i32:$src0))] -> { - let isConvergent = 1; -} - -} - -let SchedRW = [WriteQuarterRate32] in { - -defm V_CVT_I32_F64 : VOP1Inst , "v_cvt_i32_f64", - VOP_I32_F64, fp_to_sint ->; -defm V_CVT_F64_I32 : VOP1Inst , "v_cvt_f64_i32", - VOP_F64_I32, sint_to_fp ->; -defm V_CVT_F32_I32 : VOP1Inst , "v_cvt_f32_i32", - VOP_F32_I32, sint_to_fp ->; -defm V_CVT_F32_U32 : VOP1Inst , "v_cvt_f32_u32", - VOP_F32_I32, uint_to_fp ->; -defm V_CVT_U32_F32 : VOP1Inst , "v_cvt_u32_f32", - VOP_I32_F32, fp_to_uint ->; -defm V_CVT_I32_F32 : VOP1Inst , "v_cvt_i32_f32", - VOP_I32_F32, fp_to_sint ->; -defm V_CVT_F16_F32 : VOP1Inst , "v_cvt_f16_f32", - VOP_I32_F32, fp_to_f16 ->; -defm V_CVT_F32_F16 : VOP1Inst , "v_cvt_f32_f16", - VOP_F32_I32, f16_to_fp ->; -defm V_CVT_RPI_I32_F32 : VOP1Inst , "v_cvt_rpi_i32_f32", - VOP_I32_F32, cvt_rpi_i32_f32>; -defm V_CVT_FLR_I32_F32 : VOP1Inst , "v_cvt_flr_i32_f32", - VOP_I32_F32, cvt_flr_i32_f32>; -defm V_CVT_OFF_F32_I4 : VOP1Inst , "v_cvt_off_f32_i4", VOP_F32_I32>; -defm V_CVT_F32_F64 : VOP1Inst , "v_cvt_f32_f64", - VOP_F32_F64, fpround ->; -defm V_CVT_F64_F32 : VOP1Inst , "v_cvt_f64_f32", - VOP_F64_F32, fpextend ->; -defm V_CVT_F32_UBYTE0 : VOP1Inst , "v_cvt_f32_ubyte0", - VOP_F32_I32, AMDGPUcvt_f32_ubyte0 ->; -defm V_CVT_F32_UBYTE1 : VOP1Inst , "v_cvt_f32_ubyte1", - VOP_F32_I32, AMDGPUcvt_f32_ubyte1 ->; -defm V_CVT_F32_UBYTE2 : VOP1Inst , "v_cvt_f32_ubyte2", - VOP_F32_I32, AMDGPUcvt_f32_ubyte2 ->; -defm V_CVT_F32_UBYTE3 : VOP1Inst , "v_cvt_f32_ubyte3", - VOP_F32_I32, AMDGPUcvt_f32_ubyte3 ->; -defm V_CVT_U32_F64 : VOP1Inst , "v_cvt_u32_f64", - VOP_I32_F64, fp_to_uint ->; -defm V_CVT_F64_U32 : VOP1Inst , "v_cvt_f64_u32", - VOP_F64_I32, uint_to_fp ->; - -} // End SchedRW = [WriteQuarterRate32] - -defm V_FRACT_F32 : VOP1Inst , "v_fract_f32", - VOP_F32_F32, AMDGPUfract ->; -defm V_TRUNC_F32 : VOP1Inst , "v_trunc_f32", - VOP_F32_F32, ftrunc ->; -defm V_CEIL_F32 : VOP1Inst , "v_ceil_f32", - VOP_F32_F32, fceil ->; -defm V_RNDNE_F32 : VOP1Inst , "v_rndne_f32", - VOP_F32_F32, frint ->; -defm V_FLOOR_F32 : VOP1Inst , "v_floor_f32", - VOP_F32_F32, ffloor ->; -defm V_EXP_F32 : VOP1Inst , "v_exp_f32", - VOP_F32_F32, fexp2 ->; - -let SchedRW = [WriteQuarterRate32] in { - -defm V_LOG_F32 : VOP1Inst , "v_log_f32", - VOP_F32_F32, flog2 ->; -defm V_RCP_F32 : VOP1Inst , "v_rcp_f32", - VOP_F32_F32, AMDGPUrcp ->; -defm V_RCP_IFLAG_F32 : VOP1Inst , "v_rcp_iflag_f32", - VOP_F32_F32 ->; -defm V_RSQ_F32 : VOP1Inst , "v_rsq_f32", - VOP_F32_F32, AMDGPUrsq ->; - -} // End SchedRW = [WriteQuarterRate32] - -let SchedRW = [WriteDouble] in { - -defm V_RCP_F64 : VOP1Inst , "v_rcp_f64", - VOP_F64_F64, AMDGPUrcp ->; -defm V_RSQ_F64 : VOP1Inst , "v_rsq_f64", - VOP_F64_F64, AMDGPUrsq ->; - -} // End SchedRW = [WriteDouble]; - -defm V_SQRT_F32 : VOP1Inst , "v_sqrt_f32", - VOP_F32_F32, fsqrt ->; - -let SchedRW = [WriteDouble] in { - -defm V_SQRT_F64 : VOP1Inst , "v_sqrt_f64", - VOP_F64_F64, fsqrt ->; - -} // End SchedRW = [WriteDouble] - -let SchedRW = [WriteQuarterRate32] in { - -defm V_SIN_F32 : VOP1Inst , "v_sin_f32", - VOP_F32_F32, AMDGPUsin ->; -defm V_COS_F32 : VOP1Inst , "v_cos_f32", - VOP_F32_F32, AMDGPUcos ->; - -} // End SchedRW = [WriteQuarterRate32] - -defm V_NOT_B32 : VOP1Inst , "v_not_b32", VOP_I32_I32>; -defm V_BFREV_B32 : VOP1Inst , "v_bfrev_b32", VOP_I32_I32>; -defm V_FFBH_U32 : VOP1Inst , "v_ffbh_u32", VOP_I32_I32>; -defm V_FFBL_B32 : VOP1Inst , "v_ffbl_b32", VOP_I32_I32>; -defm V_FFBH_I32 : VOP1Inst , "v_ffbh_i32", VOP_I32_I32>; -defm V_FREXP_EXP_I32_F64 : VOP1Inst , "v_frexp_exp_i32_f64", - VOP_I32_F64, int_amdgcn_frexp_exp ->; - -let SchedRW = [WriteDoubleAdd] in { -defm V_FREXP_MANT_F64 : VOP1Inst , "v_frexp_mant_f64", - VOP_F64_F64, int_amdgcn_frexp_mant ->; - -defm V_FRACT_F64 : VOP1Inst , "v_fract_f64", - VOP_F64_F64, AMDGPUfract ->; -} // End SchedRW = [WriteDoubleAdd] - - -defm V_FREXP_EXP_I32_F32 : VOP1Inst , "v_frexp_exp_i32_f32", - VOP_I32_F32, int_amdgcn_frexp_exp ->; -defm V_FREXP_MANT_F32 : VOP1Inst , "v_frexp_mant_f32", - VOP_F32_F32, int_amdgcn_frexp_mant ->; -let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in { -defm V_CLREXCP : VOP1Inst , "v_clrexcp", VOP_NO_EXT>; -} - -let Uses = [M0, EXEC] in { -// v_movreld_b32 is a special case because the destination output - // register is really a source. It isn't actually read (but may be - // written), and is only to provide the base register to start - // indexing from. Tablegen seems to not let you define an implicit - // virtual register output for the super register being written into, - // so this must have an implicit def of the register added to it. -defm V_MOVRELD_B32 : VOP1Inst , "v_movreld_b32", VOP_MOVRELD>; -defm V_MOVRELS_B32 : VOP1Inst , "v_movrels_b32", VOP_I32_VI32_NO_EXT>; -defm V_MOVRELSD_B32 : VOP1Inst , "v_movrelsd_b32", VOP_NO_EXT>; - -} // End Uses = [M0, EXEC] - -// These instruction only exist on SI and CI -let SubtargetPredicate = isSICI in { - -let SchedRW = [WriteQuarterRate32] in { - -defm V_MOV_FED_B32 : VOP1InstSI , "v_mov_fed_b32", VOP_I32_I32>; -defm V_LOG_CLAMP_F32 : VOP1InstSI , "v_log_clamp_f32", - VOP_F32_F32, int_amdgcn_log_clamp>; -defm V_RCP_CLAMP_F32 : VOP1InstSI , "v_rcp_clamp_f32", VOP_F32_F32>; -defm V_RCP_LEGACY_F32 : VOP1InstSI , "v_rcp_legacy_f32", - VOP_F32_F32, AMDGPUrcp_legacy>; -defm V_RSQ_CLAMP_F32 : VOP1InstSI , "v_rsq_clamp_f32", - VOP_F32_F32, AMDGPUrsq_clamp ->; -defm V_RSQ_LEGACY_F32 : VOP1InstSI , "v_rsq_legacy_f32", - VOP_F32_F32, AMDGPUrsq_legacy ->; - -} // End SchedRW = [WriteQuarterRate32] - -let SchedRW = [WriteDouble] in { - -defm V_RCP_CLAMP_F64 : VOP1InstSI , "v_rcp_clamp_f64", VOP_F64_F64>; -defm V_RSQ_CLAMP_F64 : VOP1InstSI , "v_rsq_clamp_f64", - VOP_F64_F64, AMDGPUrsq_clamp ->; - -} // End SchedRW = [WriteDouble] - -} // End SubtargetPredicate = isSICI - -//===----------------------------------------------------------------------===// // VINTRP Instructions //===----------------------------------------------------------------------===// @@ -317,198 +87,6 @@ defm V_INTERP_MOV_F32 : VINTRP_m < } // End Uses = [M0, EXEC] //===----------------------------------------------------------------------===// -// VOP2 Instructions -//===----------------------------------------------------------------------===// - -defm V_CNDMASK_B32 : VOP2eInst , "v_cndmask_b32", - VOP2e_I32_I32_I32_I1 ->; - -let isCommutable = 1 in { -defm V_ADD_F32 : VOP2Inst , "v_add_f32", - VOP_F32_F32_F32, fadd ->; - -defm V_SUB_F32 : VOP2Inst , "v_sub_f32", VOP_F32_F32_F32, fsub>; -defm V_SUBREV_F32 : VOP2Inst , "v_subrev_f32", - VOP_F32_F32_F32, null_frag, "v_sub_f32" ->; -} // End isCommutable = 1 - -let isCommutable = 1 in { - -defm V_MUL_LEGACY_F32 : VOP2Inst , "v_mul_legacy_f32", - VOP_F32_F32_F32, AMDGPUfmul_legacy ->; - -defm V_MUL_F32 : VOP2Inst , "v_mul_f32", - VOP_F32_F32_F32, fmul ->; - -defm V_MUL_I32_I24 : VOP2Inst , "v_mul_i32_i24", - VOP_I32_I32_I32, AMDGPUmul_i24 ->; - -defm V_MUL_HI_I32_I24 : VOP2Inst , "v_mul_hi_i32_i24", - VOP_I32_I32_I32, AMDGPUmulhi_i24 ->; - -defm V_MUL_U32_U24 : VOP2Inst , "v_mul_u32_u24", - VOP_I32_I32_I32, AMDGPUmul_u24 ->; - -defm V_MUL_HI_U32_U24 : VOP2Inst , "v_mul_hi_u32_u24", - VOP_I32_I32_I32, AMDGPUmulhi_u24 ->; - -defm V_MIN_F32 : VOP2Inst , "v_min_f32", VOP_F32_F32_F32, - fminnum>; -defm V_MAX_F32 : VOP2Inst , "v_max_f32", VOP_F32_F32_F32, - fmaxnum>; -defm V_MIN_I32 : VOP2Inst , "v_min_i32", VOP_I32_I32_I32>; -defm V_MAX_I32 : VOP2Inst , "v_max_i32", VOP_I32_I32_I32>; -defm V_MIN_U32 : VOP2Inst , "v_min_u32", VOP_I32_I32_I32>; -defm V_MAX_U32 : VOP2Inst , "v_max_u32", VOP_I32_I32_I32>; - -defm V_LSHRREV_B32 : VOP2Inst < - vop2<0x16, 0x10>, "v_lshrrev_b32", VOP_I32_I32_I32, null_frag, - "v_lshr_b32" ->; - -defm V_ASHRREV_I32 : VOP2Inst < - vop2<0x18, 0x11>, "v_ashrrev_i32", VOP_I32_I32_I32, null_frag, - "v_ashr_i32" ->; - -defm V_LSHLREV_B32 : VOP2Inst < - vop2<0x1a, 0x12>, "v_lshlrev_b32", VOP_I32_I32_I32, null_frag, - "v_lshl_b32" ->; - -defm V_AND_B32 : VOP2Inst , "v_and_b32", VOP_I32_I32_I32>; -defm V_OR_B32 : VOP2Inst , "v_or_b32", VOP_I32_I32_I32>; -defm V_XOR_B32 : VOP2Inst , "v_xor_b32", VOP_I32_I32_I32>; - -let Constraints = "$vdst = $src2", DisableEncoding="$src2", - isConvertibleToThreeAddress = 1 in { -defm V_MAC_F32 : VOP2Inst , "v_mac_f32", VOP_MAC>; -} -} // End isCommutable = 1 - -defm V_MADMK_F32 : VOP2MADK , "v_madmk_f32", VOP_MADMK>; - -let isCommutable = 1 in { -defm V_MADAK_F32 : VOP2MADK , "v_madak_f32", VOP_MADAK>; -} // End isCommutable = 1 - -let isCommutable = 1 in { -// No patterns so that the scalar instructions are always selected. -// The scalar versions will be replaced with vector when needed later. - -// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI, -// but the VI instructions behave the same as the SI versions. -defm V_ADD_I32 : VOP2bInst , "v_add_i32", - VOP2b_I32_I1_I32_I32 ->; -defm V_SUB_I32 : VOP2bInst , "v_sub_i32", VOP2b_I32_I1_I32_I32>; - -defm V_SUBREV_I32 : VOP2bInst , "v_subrev_i32", - VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32" ->; - -defm V_ADDC_U32 : VOP2bInst , "v_addc_u32", - VOP2b_I32_I1_I32_I32_I1 ->; -defm V_SUBB_U32 : VOP2bInst , "v_subb_u32", - VOP2b_I32_I1_I32_I32_I1 ->; -defm V_SUBBREV_U32 : VOP2bInst , "v_subbrev_u32", - VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32" ->; - -} // End isCommutable = 1 - -// These are special and do not read the exec mask. -let isConvergent = 1, Uses = [] in { - -defm V_READLANE_B32 : VOP2SI_3VI_m < - vop3 <0x001, 0x289>, - "v_readlane_b32", - (outs SReg_32:$vdst), - (ins VGPR_32:$src0, SCSrc_b32:$src1), - "v_readlane_b32 $vdst, $src0, $src1", - [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))] ->; - -defm V_WRITELANE_B32 : VOP2SI_3VI_m < - vop3 <0x002, 0x28a>, - "v_writelane_b32", - (outs VGPR_32:$vdst), - (ins SReg_32:$src0, SCSrc_b32:$src1), - "v_writelane_b32 $vdst, $src0, $src1" ->; - -} // End isConvergent = 1 - -// These instructions only exist on SI and CI -let SubtargetPredicate = isSICI in { - -let isCommutable = 1 in { -defm V_MAC_LEGACY_F32 : VOP2InstSI , "v_mac_legacy_f32", - VOP_F32_F32_F32 ->; -} // End isCommutable = 1 - -defm V_MIN_LEGACY_F32 : VOP2InstSI , "v_min_legacy_f32", - VOP_F32_F32_F32, AMDGPUfmin_legacy ->; -defm V_MAX_LEGACY_F32 : VOP2InstSI , "v_max_legacy_f32", - VOP_F32_F32_F32, AMDGPUfmax_legacy ->; - -let isCommutable = 1 in { -defm V_LSHR_B32 : VOP2InstSI , "v_lshr_b32", VOP_I32_I32_I32>; -defm V_ASHR_I32 : VOP2InstSI , "v_ashr_i32", VOP_I32_I32_I32>; -defm V_LSHL_B32 : VOP2InstSI , "v_lshl_b32", VOP_I32_I32_I32>; -} // End isCommutable = 1 -} // End let SubtargetPredicate = SICI - -defm V_BFM_B32 : VOP2_VI3_Inst , "v_bfm_b32", - VOP_I32_I32_I32 ->; -defm V_BCNT_U32_B32 : VOP2_VI3_Inst , "v_bcnt_u32_b32", - VOP_I32_I32_I32 ->; -defm V_MBCNT_LO_U32_B32 : VOP2_VI3_Inst , "v_mbcnt_lo_u32_b32", - VOP_I32_I32_I32, int_amdgcn_mbcnt_lo ->; -defm V_MBCNT_HI_U32_B32 : VOP2_VI3_Inst , "v_mbcnt_hi_u32_b32", - VOP_I32_I32_I32, int_amdgcn_mbcnt_hi ->; -defm V_LDEXP_F32 : VOP2_VI3_Inst , "v_ldexp_f32", - VOP_F32_F32_I32, AMDGPUldexp ->; - -defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst , "v_cvt_pkaccum_u8_f32", - VOP_I32_F32_I32>; // TODO: set "Uses = dst" - -defm V_CVT_PKNORM_I16_F32 : VOP2_VI3_Inst , "v_cvt_pknorm_i16_f32", - VOP_I32_F32_F32 ->; -defm V_CVT_PKNORM_U16_F32 : VOP2_VI3_Inst , "v_cvt_pknorm_u16_f32", - VOP_I32_F32_F32 ->; -defm V_CVT_PKRTZ_F16_F32 : VOP2_VI3_Inst , "v_cvt_pkrtz_f16_f32", - VOP_I32_F32_F32, int_SI_packf16 ->; -defm V_CVT_PK_U16_U32 : VOP2_VI3_Inst , "v_cvt_pk_u16_u32", - VOP_I32_I32_I32 ->; -defm V_CVT_PK_I16_I32 : VOP2_VI3_Inst , "v_cvt_pk_i16_i32", - VOP_I32_I32_I32 ->; - -//===----------------------------------------------------------------------===// // Pseudo Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/VIInstrFormats.td b/llvm/lib/Target/AMDGPU/VIInstrFormats.td index fd9f6e7..1fd1c1e 100644 --- a/llvm/lib/Target/AMDGPU/VIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/VIInstrFormats.td @@ -11,163 +11,6 @@ // //===----------------------------------------------------------------------===// -class VOP3a_vi op> : Enc64 { - bits<2> src0_modifiers; - bits<9> src0; - bits<2> src1_modifiers; - bits<9> src1; - bits<2> src2_modifiers; - bits<9> src2; - bits<1> clamp; - bits<2> omod; - - let Inst{8} = src0_modifiers{1}; - let Inst{9} = src1_modifiers{1}; - let Inst{10} = src2_modifiers{1}; - let Inst{15} = clamp; - let Inst{25-16} = op; - let Inst{31-26} = 0x34; //encoding - let Inst{40-32} = src0; - let Inst{49-41} = src1; - let Inst{58-50} = src2; - let Inst{60-59} = omod; - let Inst{61} = src0_modifiers{0}; - let Inst{62} = src1_modifiers{0}; - let Inst{63} = src2_modifiers{0}; -} - -class VOP3e_vi op> : VOP3a_vi { - bits<8> vdst; - - let Inst{7-0} = vdst; -} - -class VOP3be_vi op> : Enc64 { - bits<8> vdst; - bits<2> src0_modifiers; - bits<9> src0; - bits<2> src1_modifiers; - bits<9> src1; - bits<2> src2_modifiers; - bits<9> src2; - bits<7> sdst; - bits<2> omod; - bits<1> clamp; - - let Inst{7-0} = vdst; - let Inst{14-8} = sdst; - let Inst{15} = clamp; - let Inst{25-16} = op; - let Inst{31-26} = 0x34; //encoding - let Inst{40-32} = src0; - let Inst{49-41} = src1; - let Inst{58-50} = src2; - let Inst{60-59} = omod; - let Inst{61} = src0_modifiers{0}; - let Inst{62} = src1_modifiers{0}; - let Inst{63} = src2_modifiers{0}; -} - -class VOP_DPP pattern, bit HasMods = 0> : - VOPAnyCommon { - let DPP = 1; - let Size = 8; - - let AsmMatchConverter = !if(!eq(HasMods,1), "cvtDPP", ""); - let AsmVariantName = AMDGPUAsmVariants.DPP; -} - -class VOP_DPPe : Enc64 { - bits<2> src0_modifiers; - bits<8> src0; - bits<2> src1_modifiers; - bits<9> dpp_ctrl; - bits<1> bound_ctrl; - bits<4> bank_mask; - bits<4> row_mask; - - let Inst{39-32} = src0; - let Inst{48-40} = dpp_ctrl; - let Inst{51} = bound_ctrl; - let Inst{52} = src0_modifiers{0}; // src0_neg - let Inst{53} = src0_modifiers{1}; // src0_abs - let Inst{54} = src1_modifiers{0}; // src1_neg - let Inst{55} = src1_modifiers{1}; // src1_abs - let Inst{59-56} = bank_mask; - let Inst{63-60} = row_mask; -} - -class VOP1_DPPe op> : VOP_DPPe { - bits<8> vdst; - - let Inst{8-0} = 0xfa; // dpp - let Inst{16-9} = op; - let Inst{24-17} = vdst; - let Inst{31-25} = 0x3f; //encoding -} - -class VOP2_DPPe op> : VOP_DPPe { - bits<8> vdst; - bits<8> src1; - - let Inst{8-0} = 0xfa; //dpp - let Inst{16-9} = src1; - let Inst{24-17} = vdst; - let Inst{30-25} = op; - let Inst{31} = 0x0; //encoding -} - -class VOP_SDWA pattern, bit HasMods = 0> : - VOPAnyCommon { - let SDWA = 1; - let Size = 8; - let AsmVariantName = AMDGPUAsmVariants.SDWA; -} - -class VOP_SDWAe : Enc64 { - bits<8> src0; - bits<3> src0_sel; - bits<2> src0_fmodifiers; // {abs,neg} - bits<1> src0_imodifiers; // sext - bits<3> src1_sel; - bits<2> src1_fmodifiers; - bits<1> src1_imodifiers; - bits<3> dst_sel; - bits<2> dst_unused; - bits<1> clamp; - - let Inst{39-32} = src0; - let Inst{42-40} = dst_sel; - let Inst{44-43} = dst_unused; - let Inst{45} = clamp; - let Inst{50-48} = src0_sel; - let Inst{53-52} = src0_fmodifiers; - let Inst{51} = src0_imodifiers; - let Inst{58-56} = src1_sel; - let Inst{61-60} = src1_fmodifiers; - let Inst{59} = src1_imodifiers; -} - -class VOP1_SDWAe op> : VOP_SDWAe { - bits<8> vdst; - - let Inst{8-0} = 0xf9; // sdwa - let Inst{16-9} = op; - let Inst{24-17} = vdst; - let Inst{31-25} = 0x3f; // encoding -} - -class VOP2_SDWAe op> : VOP_SDWAe { - bits<8> vdst; - bits<8> src1; - - let Inst{8-0} = 0xf9; // sdwa - let Inst{16-9} = src1; - let Inst{24-17} = vdst; - let Inst{30-25} = op; - let Inst{31} = 0x0; // encoding -} - class EXPe_vi : EXPe { let Inst{31-26} = 0x31; //encoding } diff --git a/llvm/lib/Target/AMDGPU/VIInstructions.td b/llvm/lib/Target/AMDGPU/VIInstructions.td index 7ae7368..ead90ec 100644 --- a/llvm/lib/Target/AMDGPU/VIInstructions.td +++ b/llvm/lib/Target/AMDGPU/VIInstructions.td @@ -8,109 +8,3 @@ //===----------------------------------------------------------------------===// // Instruction definitions for VI and newer. //===----------------------------------------------------------------------===// - -let SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI in { - -let DisableSIDecoder = 1 in { - -//===----------------------------------------------------------------------===// -// VOP1 Instructions -//===----------------------------------------------------------------------===// - -defm V_CVT_F16_U16 : VOP1Inst , "v_cvt_f16_u16", VOP_F16_I16>; -defm V_CVT_F16_I16 : VOP1Inst , "v_cvt_f16_i16", VOP_F16_I16>; -defm V_CVT_U16_F16 : VOP1Inst , "v_cvt_u16_f16", VOP_I16_F16>; -defm V_CVT_I16_F16 : VOP1Inst , "v_cvt_i16_f16", VOP_I16_F16>; -defm V_RCP_F16 : VOP1Inst , "v_rcp_f16", VOP_F16_F16>; -defm V_SQRT_F16 : VOP1Inst , "v_sqrt_f16", VOP_F16_F16>; -defm V_RSQ_F16 : VOP1Inst , "v_rsq_f16", VOP_F16_F16>; -defm V_LOG_F16 : VOP1Inst , "v_log_f16", VOP_F16_F16>; -defm V_EXP_F16 : VOP1Inst , "v_exp_f16", VOP_F16_F16>; -defm V_FREXP_MANT_F16 : VOP1Inst , "v_frexp_mant_f16", - VOP_F16_F16 ->; -defm V_FREXP_EXP_I16_F16 : VOP1Inst , "v_frexp_exp_i16_f16", - VOP_I16_F16 ->; -defm V_FLOOR_F16 : VOP1Inst , "v_floor_f16", VOP_F16_F16>; -defm V_CEIL_F16 : VOP1Inst , "v_ceil_f16", VOP_F16_F16>; -defm V_TRUNC_F16 : VOP1Inst , "v_trunc_f16", VOP_F16_F16>; -defm V_RNDNE_F16 : VOP1Inst , "v_rndne_f16", VOP_F16_F16>; -defm V_FRACT_F16 : VOP1Inst , "v_fract_f16", VOP_F16_F16>; -defm V_SIN_F16 : VOP1Inst , "v_sin_f16", VOP_F16_F16>; -defm V_COS_F16 : VOP1Inst , "v_cos_f16", VOP_F16_F16>; - -//===----------------------------------------------------------------------===// -// VOP2 Instructions -//===----------------------------------------------------------------------===// - -let isCommutable = 1 in { - -defm V_ADD_F16 : VOP2Inst , "v_add_f16", VOP_F16_F16_F16>; -defm V_SUB_F16 : VOP2Inst , "v_sub_f16", VOP_F16_F16_F16>; -defm V_SUBREV_F16 : VOP2Inst , "v_subrev_f16", VOP_F16_F16_F16, - null_frag, "v_sub_f16" ->; -defm V_MUL_F16 : VOP2Inst , "v_mul_f16", VOP_F16_F16_F16>; -defm V_MAC_F16 : VOP2Inst , "v_mac_f16", VOP_F16_F16_F16>; -} // End isCommutable = 1 -defm V_MADMK_F16 : VOP2MADK , "v_madmk_f16", VOP_MADMK>; -let isCommutable = 1 in { -defm V_MADAK_F16 : VOP2MADK , "v_madak_f16", VOP_MADAK>; -defm V_ADD_U16 : VOP2Inst , "v_add_u16", VOP_I16_I16_I16>; -defm V_SUB_U16 : VOP2Inst , "v_sub_u16" , VOP_I16_I16_I16>; -defm V_SUBREV_U16 : VOP2Inst , "v_subrev_u16", VOP_I16_I16_I16>; -defm V_MUL_LO_U16 : VOP2Inst , "v_mul_lo_u16", VOP_I16_I16_I16>; -} // End isCommutable = 1 -defm V_LSHLREV_B16 : VOP2Inst , "v_lshlrev_b16", VOP_I16_I16_I16>; -defm V_LSHRREV_B16 : VOP2Inst , "v_lshrrev_b16", VOP_I16_I16_I16>; -defm V_ASHRREV_B16 : VOP2Inst , "v_ashrrev_b16", VOP_I16_I16_I16>; -let isCommutable = 1 in { -defm V_MAX_F16 : VOP2Inst , "v_max_f16", VOP_F16_F16_F16>; -defm V_MIN_F16 : VOP2Inst , "v_min_f16", VOP_F16_F16_F16>; -defm V_MAX_U16 : VOP2Inst , "v_max_u16", VOP_I16_I16_I16>; -defm V_MAX_I16 : VOP2Inst , "v_max_i16", VOP_I16_I16_I16>; -defm V_MIN_U16 : VOP2Inst , "v_min_u16", VOP_I16_I16_I16>; -defm V_MIN_I16 : VOP2Inst , "v_min_i16", VOP_I16_I16_I16>; -} // End isCommutable = 1 -defm V_LDEXP_F16 : VOP2Inst , "v_ldexp_f16", VOP_F16_F16_I16>; - -} // let DisableSIDecoder = 1 - -// Aliases to simplify matching of floating-point instructions that -// are VOP2 on SI and VOP3 on VI. - -class SI2_VI3Alias : InstAlias < - name#" $dst, $src0, $src1", - (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0) ->, PredicateControl { - let UseInstAsmMatchConverter = 0; - let AsmVariantName = AMDGPUAsmVariants.VOP3; -} - -def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>; -def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>; -def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>; -def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>; -def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>; - -} // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI - -let Predicates = [isVI] in { - -//===----------------------------------------------------------------------===// -// DPP Patterns -//===----------------------------------------------------------------------===// - -def : Pat < - (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask, - imm:$bound_ctrl), - (V_MOV_B32_dpp $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask), - (as_i32imm $bank_mask), (as_i1imm $bound_ctrl)) ->; - -//===----------------------------------------------------------------------===// -// Misc Patterns -//===----------------------------------------------------------------------===// - -} // End Predicates = [isVI] diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td new file mode 100644 index 0000000..f108922 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -0,0 +1,539 @@ +//===-- VOP1Instructions.td - Vector Instruction Defintions ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// VOP1 Classes +//===----------------------------------------------------------------------===// + +class VOP1e op, VOPProfile P> : Enc32 { + bits<8> vdst; + bits<9> src0; + + let Inst{8-0} = !if(P.HasSrc0, src0{8-0}, 0); + let Inst{16-9} = op; + let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{31-25} = 0x3f; //encoding +} + +class VOP1_Pseudo pattern=[]> : + InstSI , + VOP , + SIMCInstr , + MnemonicAlias { + + let isPseudo = 1; + let isCodeGenOnly = 1; + let UseNamedOperandTable = 1; + + string Mnemonic = opName; + string AsmOperands = P.Asm32; + + let Size = 4; + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let SubtargetPredicate = isGCN; + + let VOP1 = 1; + let VALU = 1; + let Uses = [EXEC]; + + let AsmVariantName = AMDGPUAsmVariants.Default; + + VOPProfile Pfl = P; +} + +class VOP1_Real : + InstSI , + SIMCInstr { + + let isPseudo = 0; + let isCodeGenOnly = 0; + + // copy relevant pseudo op flags + let SubtargetPredicate = ps.SubtargetPredicate; + let AsmMatchConverter = ps.AsmMatchConverter; + let AsmVariantName = ps.AsmVariantName; + let Constraints = ps.Constraints; + let DisableEncoding = ps.DisableEncoding; + let TSFlags = ps.TSFlags; +} + +class getVOP1Pat64 : LetDummies { + list ret = !if(P.HasModifiers, + [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, + i32:$src0_modifiers, i1:$clamp, i32:$omod))))], + [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]); +} + +multiclass VOP1Inst { + def _e32 : VOP1_Pseudo ; + def _e64 : VOP3_Pseudo .ret>; +} + +//===----------------------------------------------------------------------===// +// VOP1 Instructions +//===----------------------------------------------------------------------===// + +let VOPAsmPrefer32Bit = 1 in { +defm V_NOP : VOP1Inst <"v_nop", VOP_NONE>; +} + +let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in { +defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>; +} // End isMoveImm = 1 + +// FIXME: Specify SchedRW for READFIRSTLANE_B32 +// TODO: Make profile for this, there is VOP3 encoding also +def V_READFIRSTLANE_B32 : + InstSI <(outs SReg_32:$vdst), + (ins VGPR_32:$src0), + "v_readfirstlane_b32 $vdst, $src0", + [(set i32:$vdst, (int_amdgcn_readfirstlane i32:$src0))]>, + Enc32 { + + let isCodeGenOnly = 0; + let UseNamedOperandTable = 1; + + let Size = 4; + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let SubtargetPredicate = isGCN; + + let VOP1 = 1; + let VALU = 1; + let Uses = [EXEC]; + let isConvergent = 1; + + bits<8> vdst; + bits<9> src0; + + let Inst{8-0} = src0; + let Inst{16-9} = 0x2; + let Inst{24-17} = vdst; + let Inst{31-25} = 0x3f; //encoding +} + +let SchedRW = [WriteQuarterRate32] in { +defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>; +defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP_F64_I32, sint_to_fp>; +defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP_F32_I32, sint_to_fp>; +defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP_F32_I32, uint_to_fp>; +defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>; +defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>; +defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_I32_F32, fp_to_f16>; +defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_I32, f16_to_fp>; +defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; +defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; +defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP_F32_I32>; +defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; +defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>; +defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP_F32_I32, AMDGPUcvt_f32_ubyte0>; +defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP_F32_I32, AMDGPUcvt_f32_ubyte1>; +defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP_F32_I32, AMDGPUcvt_f32_ubyte2>; +defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP_F32_I32, AMDGPUcvt_f32_ubyte3>; +defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>; +defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP_F64_I32, uint_to_fp>; +} // End SchedRW = [WriteQuarterRate32] + +defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>; +defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>; +defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>; +defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>; +defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>; +defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>; + +let SchedRW = [WriteQuarterRate32] in { +defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>; +defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>; +defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32>; +defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>; +} // End SchedRW = [WriteQuarterRate32] + +let SchedRW = [WriteDouble] in { +defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>; +defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>; +} // End SchedRW = [WriteDouble]; + +defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, fsqrt>; + +let SchedRW = [WriteDouble] in { +defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, fsqrt>; +} // End SchedRW = [WriteDouble] + +let SchedRW = [WriteQuarterRate32] in { +defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>; +defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>; +} // End SchedRW = [WriteQuarterRate32] + +defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>; +defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32>; +defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32>; +defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32>; +defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32>; +defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64, int_amdgcn_frexp_exp>; + +let SchedRW = [WriteDoubleAdd] in { +defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>; +defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>; +} // End SchedRW = [WriteDoubleAdd] + +defm V_FREXP_EXP_I32_F32 : VOP1Inst <"v_frexp_exp_i32_f32", VOP_I32_F32, int_amdgcn_frexp_exp>; +defm V_FREXP_MANT_F32 : VOP1Inst <"v_frexp_mant_f32", VOP_F32_F32, int_amdgcn_frexp_mant>; + +let VOPAsmPrefer32Bit = 1 in { +defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT>; +} + +// Restrict src0 to be VGPR +def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> { + let Src0RC32 = VRegSrc_32; + let Src0RC64 = VRegSrc_32; + + let HasExt = 0; +} + +// Special case because there are no true output operands. Hack vdst +// to be a src operand. The custom inserter must add a tied implicit +// def and use of the super register since there seems to be no way to +// add an implicit def of a virtual register in tablegen. +def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> { + let Src0RC32 = VOPDstOperand; + let Src0RC64 = VOPDstOperand; + + let Outs = (outs); + let Ins32 = (ins Src0RC32:$vdst, VSrc_b32:$src0); + let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0); + + let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, + bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); + let InsSDWA = (ins Src0RC32:$vdst, Int32InputMods:$src0_modifiers, VCSrc_b32:$src0, + clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel); + + let Asm32 = getAsm32<1, 1>.ret; + let Asm64 = getAsm64<1, 1, 0>.ret; + let AsmDPP = getAsmDPP<1, 1, 0>.ret; + let AsmSDWA = getAsmSDWA<1, 1, 0>.ret; + + let HasExt = 0; + let HasDst = 0; + let EmitDst = 1; // force vdst emission +} + +let Uses = [M0, EXEC] in { +// v_movreld_b32 is a special case because the destination output + // register is really a source. It isn't actually read (but may be + // written), and is only to provide the base register to start + // indexing from. Tablegen seems to not let you define an implicit + // virtual register output for the super register being written into, + // so this must have an implicit def of the register added to it. +defm V_MOVRELD_B32 : VOP1Inst <"v_movreld_b32", VOP_MOVRELD>; +defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_I32_VI32_NO_EXT>; +defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_NO_EXT>; +} // End Uses = [M0, EXEC] + +// These instruction only exist on SI and CI +let SubtargetPredicate = isSICI in { + +let SchedRW = [WriteQuarterRate32] in { +defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>; +defm V_LOG_CLAMP_F32 : VOP1Inst <"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>; +defm V_RCP_CLAMP_F32 : VOP1Inst <"v_rcp_clamp_f32", VOP_F32_F32>; +defm V_RCP_LEGACY_F32 : VOP1Inst <"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>; +defm V_RSQ_CLAMP_F32 : VOP1Inst <"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>; +defm V_RSQ_LEGACY_F32 : VOP1Inst <"v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy>; +} // End SchedRW = [WriteQuarterRate32] + +let SchedRW = [WriteDouble] in { +defm V_RCP_CLAMP_F64 : VOP1Inst <"v_rcp_clamp_f64", VOP_F64_F64>; +defm V_RSQ_CLAMP_F64 : VOP1Inst <"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>; +} // End SchedRW = [WriteDouble] + +} // End SubtargetPredicate = isSICI + + +let SubtargetPredicate = isCIVI in { + +let SchedRW = [WriteDoubleAdd] in { +defm V_TRUNC_F64 : VOP1Inst <"v_trunc_f64", VOP_F64_F64, ftrunc>; +defm V_CEIL_F64 : VOP1Inst <"v_ceil_f64", VOP_F64_F64, fceil>; +defm V_FLOOR_F64 : VOP1Inst <"v_floor_f64", VOP_F64_F64, ffloor>; +defm V_RNDNE_F64 : VOP1Inst <"v_rndne_f64", VOP_F64_F64, frint>; +} // End SchedRW = [WriteDoubleAdd] + +let SchedRW = [WriteQuarterRate32] in { +defm V_LOG_LEGACY_F32 : VOP1Inst <"v_log_legacy_f32", VOP_F32_F32>; +defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>; +} // End SchedRW = [WriteQuarterRate32] + +} // End SubtargetPredicate = isCIVI + + +let SubtargetPredicate = isVI in { + +defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP_F16_I16>; +defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP_F16_I16>; +defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16>; +defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16>; +defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16>; +defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16>; +defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16>; +defm V_LOG_F16 : VOP1Inst <"v_log_f16", VOP_F16_F16>; +defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16>; +defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16>; +defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16>; +defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16>; +defm V_CEIL_F16 : VOP1Inst <"v_ceil_f16", VOP_F16_F16>; +defm V_TRUNC_F16 : VOP1Inst <"v_trunc_f16", VOP_F16_F16>; +defm V_RNDNE_F16 : VOP1Inst <"v_rndne_f16", VOP_F16_F16>; +defm V_FRACT_F16 : VOP1Inst <"v_fract_f16", VOP_F16_F16>; +defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16>; +defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16>; + +} + +//===----------------------------------------------------------------------===// +// Target +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// SI +//===----------------------------------------------------------------------===// + +multiclass VOP1_Real_si op> { + let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in { + def _e32_si : + VOP1_Real(NAME#"_e32"), SIEncodingFamily.SI>, + VOP1e(NAME#"_e32").Pfl>; + def _e64_si : + VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>, + VOP3e_si <{1, 1, op{6-0}}, !cast(NAME#"_e64").Pfl>; + } +} + +defm V_NOP : VOP1_Real_si <0x0>; +defm V_MOV_B32 : VOP1_Real_si <0x1>; +defm V_CVT_I32_F64 : VOP1_Real_si <0x3>; +defm V_CVT_F64_I32 : VOP1_Real_si <0x4>; +defm V_CVT_F32_I32 : VOP1_Real_si <0x5>; +defm V_CVT_F32_U32 : VOP1_Real_si <0x6>; +defm V_CVT_U32_F32 : VOP1_Real_si <0x7>; +defm V_CVT_I32_F32 : VOP1_Real_si <0x8>; +defm V_MOV_FED_B32 : VOP1_Real_si <0x9>; +defm V_CVT_F16_F32 : VOP1_Real_si <0xa>; +defm V_CVT_F32_F16 : VOP1_Real_si <0xb>; +defm V_CVT_RPI_I32_F32 : VOP1_Real_si <0xc>; +defm V_CVT_FLR_I32_F32 : VOP1_Real_si <0xd>; +defm V_CVT_OFF_F32_I4 : VOP1_Real_si <0xe>; +defm V_CVT_F32_F64 : VOP1_Real_si <0xf>; +defm V_CVT_F64_F32 : VOP1_Real_si <0x10>; +defm V_CVT_F32_UBYTE0 : VOP1_Real_si <0x11>; +defm V_CVT_F32_UBYTE1 : VOP1_Real_si <0x12>; +defm V_CVT_F32_UBYTE2 : VOP1_Real_si <0x13>; +defm V_CVT_F32_UBYTE3 : VOP1_Real_si <0x14>; +defm V_CVT_U32_F64 : VOP1_Real_si <0x15>; +defm V_CVT_F64_U32 : VOP1_Real_si <0x16>; +defm V_FRACT_F32 : VOP1_Real_si <0x20>; +defm V_TRUNC_F32 : VOP1_Real_si <0x21>; +defm V_CEIL_F32 : VOP1_Real_si <0x22>; +defm V_RNDNE_F32 : VOP1_Real_si <0x23>; +defm V_FLOOR_F32 : VOP1_Real_si <0x24>; +defm V_EXP_F32 : VOP1_Real_si <0x25>; +defm V_LOG_CLAMP_F32 : VOP1_Real_si <0x26>; +defm V_LOG_F32 : VOP1_Real_si <0x27>; +defm V_RCP_CLAMP_F32 : VOP1_Real_si <0x28>; +defm V_RCP_LEGACY_F32 : VOP1_Real_si <0x29>; +defm V_RCP_F32 : VOP1_Real_si <0x2a>; +defm V_RCP_IFLAG_F32 : VOP1_Real_si <0x2b>; +defm V_RSQ_CLAMP_F32 : VOP1_Real_si <0x2c>; +defm V_RSQ_LEGACY_F32 : VOP1_Real_si <0x2d>; +defm V_RSQ_F32 : VOP1_Real_si <0x2e>; +defm V_RCP_F64 : VOP1_Real_si <0x2f>; +defm V_RCP_CLAMP_F64 : VOP1_Real_si <0x30>; +defm V_RSQ_F64 : VOP1_Real_si <0x31>; +defm V_RSQ_CLAMP_F64 : VOP1_Real_si <0x32>; +defm V_SQRT_F32 : VOP1_Real_si <0x33>; +defm V_SQRT_F64 : VOP1_Real_si <0x34>; +defm V_SIN_F32 : VOP1_Real_si <0x35>; +defm V_COS_F32 : VOP1_Real_si <0x36>; +defm V_NOT_B32 : VOP1_Real_si <0x37>; +defm V_BFREV_B32 : VOP1_Real_si <0x38>; +defm V_FFBH_U32 : VOP1_Real_si <0x39>; +defm V_FFBL_B32 : VOP1_Real_si <0x3a>; +defm V_FFBH_I32 : VOP1_Real_si <0x3b>; +defm V_FREXP_EXP_I32_F64 : VOP1_Real_si <0x3c>; +defm V_FREXP_MANT_F64 : VOP1_Real_si <0x3d>; +defm V_FRACT_F64 : VOP1_Real_si <0x3e>; +defm V_FREXP_EXP_I32_F32 : VOP1_Real_si <0x3f>; +defm V_FREXP_MANT_F32 : VOP1_Real_si <0x40>; +defm V_CLREXCP : VOP1_Real_si <0x41>; +defm V_MOVRELD_B32 : VOP1_Real_si <0x42>; +defm V_MOVRELS_B32 : VOP1_Real_si <0x43>; +defm V_MOVRELSD_B32 : VOP1_Real_si <0x44>; + +//===----------------------------------------------------------------------===// +// CI +//===----------------------------------------------------------------------===// + +multiclass VOP1_Real_ci op> { + let AssemblerPredicates = [isCIOnly], DecoderNamespace = "CI" in { + def _e32_ci : + VOP1_Real(NAME#"_e32"), SIEncodingFamily.SI>, + VOP1e(NAME#"_e32").Pfl>; + def _e64_ci : + VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>, + VOP3e_si <{1, 1, op{6-0}}, !cast(NAME#"_e64").Pfl>; + } +} + +defm V_TRUNC_F64 : VOP1_Real_ci <0x17>; +defm V_CEIL_F64 : VOP1_Real_ci <0x18>; +defm V_FLOOR_F64 : VOP1_Real_ci <0x1A>; +defm V_RNDNE_F64 : VOP1_Real_ci <0x19>; +defm V_LOG_LEGACY_F32 : VOP1_Real_ci <0x45>; +defm V_EXP_LEGACY_F32 : VOP1_Real_ci <0x46>; + +//===----------------------------------------------------------------------===// +// VI +//===----------------------------------------------------------------------===// + +class VOP1_SDWA op, VOP1_Pseudo ps, VOPProfile P = ps.Pfl> : + VOP_SDWA { + let Defs = ps.Defs; + let Uses = ps.Uses; + let SchedRW = ps.SchedRW; + let hasSideEffects = ps.hasSideEffects; + let AsmMatchConverter = "cvtSdwaVOP1"; + + bits<8> vdst; + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = op; + let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{31-25} = 0x3f; // encoding +} + +class VOP1_DPP op, VOP1_Pseudo ps, VOPProfile P = ps.Pfl> : + VOP_DPP { + let Defs = ps.Defs; + let Uses = ps.Uses; + let SchedRW = ps.SchedRW; + let hasSideEffects = ps.hasSideEffects; + + bits<8> vdst; + let Inst{8-0} = 0xfa; // dpp + let Inst{16-9} = op; + let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{31-25} = 0x3f; //encoding +} + +multiclass VOP1_Real_vi op> { + let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in { + def _e32_vi : + VOP1_Real(NAME#"_e32"), SIEncodingFamily.VI>, + VOP1e(NAME#"_e32").Pfl>; + def _e64_vi : + VOP3_Real(NAME#"_e64"), SIEncodingFamily.VI>, + VOP3e_vi (NAME#"_e64").Pfl>; + } + + // for now left sdwa/dpp only for asm/dasm + // TODO: add corresponding pseudo + def _sdwa : VOP1_SDWA(NAME#"_e32")>; + def _dpp : VOP1_DPP(NAME#"_e32")>; +} + +defm V_NOP : VOP1_Real_vi <0x0>; +defm V_MOV_B32 : VOP1_Real_vi <0x1>; +defm V_CVT_I32_F64 : VOP1_Real_vi <0x3>; +defm V_CVT_F64_I32 : VOP1_Real_vi <0x4>; +defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>; +defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>; +defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>; +defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>; +defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>; +defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>; +defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>; +defm V_CVT_FLR_I32_F32 : VOP1_Real_vi <0xd>; +defm V_CVT_OFF_F32_I4 : VOP1_Real_vi <0xe>; +defm V_CVT_F32_F64 : VOP1_Real_vi <0xf>; +defm V_CVT_F64_F32 : VOP1_Real_vi <0x10>; +defm V_CVT_F32_UBYTE0 : VOP1_Real_vi <0x11>; +defm V_CVT_F32_UBYTE1 : VOP1_Real_vi <0x12>; +defm V_CVT_F32_UBYTE2 : VOP1_Real_vi <0x13>; +defm V_CVT_F32_UBYTE3 : VOP1_Real_vi <0x14>; +defm V_CVT_U32_F64 : VOP1_Real_vi <0x15>; +defm V_CVT_F64_U32 : VOP1_Real_vi <0x16>; +defm V_FRACT_F32 : VOP1_Real_vi <0x1b>; +defm V_TRUNC_F32 : VOP1_Real_vi <0x1c>; +defm V_CEIL_F32 : VOP1_Real_vi <0x1d>; +defm V_RNDNE_F32 : VOP1_Real_vi <0x1e>; +defm V_FLOOR_F32 : VOP1_Real_vi <0x1f>; +defm V_EXP_F32 : VOP1_Real_vi <0x20>; +defm V_LOG_F32 : VOP1_Real_vi <0x21>; +defm V_RCP_F32 : VOP1_Real_vi <0x22>; +defm V_RCP_IFLAG_F32 : VOP1_Real_vi <0x23>; +defm V_RSQ_F32 : VOP1_Real_vi <0x24>; +defm V_RCP_F64 : VOP1_Real_vi <0x25>; +defm V_RSQ_F64 : VOP1_Real_vi <0x26>; +defm V_SQRT_F32 : VOP1_Real_vi <0x27>; +defm V_SQRT_F64 : VOP1_Real_vi <0x28>; +defm V_SIN_F32 : VOP1_Real_vi <0x29>; +defm V_COS_F32 : VOP1_Real_vi <0x2a>; +defm V_NOT_B32 : VOP1_Real_vi <0x2b>; +defm V_BFREV_B32 : VOP1_Real_vi <0x2c>; +defm V_FFBH_U32 : VOP1_Real_vi <0x2d>; +defm V_FFBL_B32 : VOP1_Real_vi <0x2e>; +defm V_FFBH_I32 : VOP1_Real_vi <0x2f>; +defm V_FREXP_EXP_I32_F64 : VOP1_Real_vi <0x30>; +defm V_FREXP_MANT_F64 : VOP1_Real_vi <0x31>; +defm V_FRACT_F64 : VOP1_Real_vi <0x32>; +defm V_FREXP_EXP_I32_F32 : VOP1_Real_vi <0x33>; +defm V_FREXP_MANT_F32 : VOP1_Real_vi <0x34>; +defm V_CLREXCP : VOP1_Real_vi <0x35>; +defm V_MOVRELD_B32 : VOP1_Real_vi <0x36>; +defm V_MOVRELS_B32 : VOP1_Real_vi <0x37>; +defm V_MOVRELSD_B32 : VOP1_Real_vi <0x38>; +defm V_TRUNC_F64 : VOP1_Real_vi <0x17>; +defm V_CEIL_F64 : VOP1_Real_vi <0x18>; +defm V_FLOOR_F64 : VOP1_Real_vi <0x1A>; +defm V_RNDNE_F64 : VOP1_Real_vi <0x19>; +defm V_LOG_LEGACY_F32 : VOP1_Real_vi <0x4c>; +defm V_EXP_LEGACY_F32 : VOP1_Real_vi <0x4b>; +defm V_CVT_F16_U16 : VOP1_Real_vi <0x39>; +defm V_CVT_F16_I16 : VOP1_Real_vi <0x3a>; +defm V_CVT_U16_F16 : VOP1_Real_vi <0x3b>; +defm V_CVT_I16_F16 : VOP1_Real_vi <0x3c>; +defm V_RCP_F16 : VOP1_Real_vi <0x3d>; +defm V_SQRT_F16 : VOP1_Real_vi <0x3e>; +defm V_RSQ_F16 : VOP1_Real_vi <0x3f>; +defm V_LOG_F16 : VOP1_Real_vi <0x40>; +defm V_EXP_F16 : VOP1_Real_vi <0x41>; +defm V_FREXP_MANT_F16 : VOP1_Real_vi <0x42>; +defm V_FREXP_EXP_I16_F16 : VOP1_Real_vi <0x43>; +defm V_FLOOR_F16 : VOP1_Real_vi <0x44>; +defm V_CEIL_F16 : VOP1_Real_vi <0x45>; +defm V_TRUNC_F16 : VOP1_Real_vi <0x46>; +defm V_RNDNE_F16 : VOP1_Real_vi <0x47>; +defm V_FRACT_F16 : VOP1_Real_vi <0x48>; +defm V_SIN_F16 : VOP1_Real_vi <0x49>; +defm V_COS_F16 : VOP1_Real_vi <0x4a>; + +let Predicates = [isVI] in { + +def : Pat < + (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask, + imm:$bound_ctrl), + (V_MOV_B32_dpp $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask), + (as_i32imm $bank_mask), (as_i1imm $bound_ctrl)) +>; + +} // End Predicates = [isVI] diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td new file mode 100644 index 0000000..b0f5d8f --- /dev/null +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -0,0 +1,608 @@ +//===-- VOP2Instructions.td - Vector Instruction Defintions ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// VOP2 Classes +//===----------------------------------------------------------------------===// + +class VOP2e op, VOPProfile P> : Enc32 { + bits<8> vdst; + bits<9> src0; + bits<8> src1; + + let Inst{8-0} = !if(P.HasSrc0, src0, 0); + let Inst{16-9} = !if(P.HasSrc1, src1, 0); + let Inst{24-17} = !if(P.EmitDst, vdst, 0); + let Inst{30-25} = op; + let Inst{31} = 0x0; //encoding +} + +class VOP2_MADKe op, VOPProfile P> : Enc64 { + bits<8> vdst; + bits<9> src0; + bits<8> src1; + bits<32> imm; + + let Inst{8-0} = !if(P.HasSrc0, src0, 0); + let Inst{16-9} = !if(P.HasSrc1, src1, 0); + let Inst{24-17} = !if(P.EmitDst, vdst, 0); + let Inst{30-25} = op; + let Inst{31} = 0x0; // encoding + let Inst{63-32} = imm; +} + +class VOP2_Pseudo pattern=[], string suffix = "_e32"> : + InstSI , + VOP , + SIMCInstr , + MnemonicAlias { + + let isPseudo = 1; + let isCodeGenOnly = 1; + let UseNamedOperandTable = 1; + + string Mnemonic = opName; + string AsmOperands = P.Asm32; + + let Size = 4; + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let SubtargetPredicate = isGCN; + + let VOP2 = 1; + let VALU = 1; + let Uses = [EXEC]; + + let AsmVariantName = AMDGPUAsmVariants.Default; + + VOPProfile Pfl = P; +} + +class VOP2_Real : + InstSI , + SIMCInstr { + + let isPseudo = 0; + let isCodeGenOnly = 0; + + // copy relevant pseudo op flags + let SubtargetPredicate = ps.SubtargetPredicate; + let AsmMatchConverter = ps.AsmMatchConverter; + let AsmVariantName = ps.AsmVariantName; + let Constraints = ps.Constraints; + let DisableEncoding = ps.DisableEncoding; + let TSFlags = ps.TSFlags; +} + +class getVOP2Pat64 : LetDummies { + list ret = !if(P.HasModifiers, + [(set P.DstVT:$vdst, + (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), + (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], + [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]); +} + +multiclass VOP2Inst { + + def _e32 : VOP2_Pseudo , + Commutable_REV; + + def _e64 : VOP3_Pseudo .ret>, + Commutable_REV; +} + +multiclass VOP2bInst { + + let SchedRW = [Write32Bit, WriteSALU] in { + let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { + def _e32 : VOP2_Pseudo , + Commutable_REV; + } + def _e64 : VOP3_Pseudo .ret>, + Commutable_REV; + } +} + +multiclass VOP2eInst { + + let SchedRW = [Write32Bit] in { + let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in { + def _e32 : VOP2_Pseudo , + Commutable_REV; + } + def _e64 : VOP3_Pseudo .ret>, + Commutable_REV; + } +} + +def VOP_MADAK : VOPProfile <[f32, f32, f32, f32]> { + field dag Ins32 = (ins VCSrc_f32:$src0, VGPR_32:$src1, f32kimm:$imm); + field string Asm32 = "$vdst, $src0, $src1, $imm"; + field bit HasExt = 0; +} + +def VOP_MADMK : VOPProfile <[f32, f32, f32, f32]> { + field dag Ins32 = (ins VCSrc_f32:$src0, f32kimm:$imm, VGPR_32:$src1); + field string Asm32 = "$vdst, $src0, $imm, $src1"; + field bit HasExt = 0; +} + +def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> { + let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2); + let Ins64 = getIns64, 3, + HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret; + let InsDPP = (ins FP32InputMods:$src0_modifiers, Src0RC32:$src0, + FP32InputMods:$src1_modifiers, Src1RC32:$src1, + VGPR_32:$src2, // stub argument + dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, + bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); + let InsSDWA = (ins FP32InputMods:$src0_modifiers, Src0RC32:$src0, + FP32InputMods:$src1_modifiers, Src1RC32:$src1, + VGPR_32:$src2, // stub argument + clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel, src1_sel:$src1_sel); + let Asm32 = getAsm32<1, 2, f32>.ret; + let Asm64 = getAsm64<1, 2, HasModifiers, f32>.ret; + let AsmDPP = getAsmDPP<1, 2, HasModifiers, f32>.ret; + let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, f32>.ret; + let HasSrc2 = 0; + let HasSrc2Mods = 0; +} + +// Write out to vcc or arbitrary SGPR. +def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> { + let Asm32 = "$vdst, vcc, $src0, $src1"; + let Asm64 = "$vdst, $sdst, $src0, $src1"; + let Outs32 = (outs DstRC:$vdst); + let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); +} + +// Write out to vcc or arbitrary SGPR and read in from vcc or +// arbitrary SGPR. +def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { + // We use VCSrc_b32 to exclude literal constants, even though the + // encoding normally allows them since the implicit VCC use means + // using one would always violate the constant bus + // restriction. SGPRs are still allowed because it should + // technically be possible to use VCC again as src0. + let Src0RC32 = VCSrc_b32; + let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; + let Asm64 = "$vdst, $sdst, $src0, $src1, $src2"; + let Outs32 = (outs DstRC:$vdst); + let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); + + // Suppress src2 implied by type since the 32-bit encoding uses an + // implicit VCC use. + let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); +} + +// Read in from vcc or arbitrary SGPR +def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { + let Src0RC32 = VCSrc_b32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above. + let Asm32 = "$vdst, $src0, $src1, vcc"; + let Asm64 = "$vdst, $src0, $src1, $src2"; + let Outs32 = (outs DstRC:$vdst); + let Outs64 = (outs DstRC:$vdst); + + // Suppress src2 implied by type since the 32-bit encoding uses an + // implicit VCC use. + let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); +} + +def VOP_READLANE : VOPProfile<[i32, i32, i32]> { + let Outs32 = (outs SReg_32:$vdst); + let Outs64 = Outs32; + let Ins32 = (ins VGPR_32:$src0, SCSrc_b32:$src1); + let Ins64 = Ins32; + let Asm32 = " $vdst, $src0, $src1"; + let Asm64 = Asm32; +} + +def VOP_WRITELANE : VOPProfile<[i32, i32, i32]> { + let Outs32 = (outs VGPR_32:$vdst); + let Outs64 = Outs32; + let Ins32 = (ins SReg_32:$src0, SCSrc_b32:$src1); + let Ins64 = Ins32; + let Asm32 = " $vdst, $src0, $src1"; + let Asm64 = Asm32; +} + +//===----------------------------------------------------------------------===// +// VOP2 Instructions +//===----------------------------------------------------------------------===// + +let SubtargetPredicate = isGCN in { + +defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>; +def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK>; + +let isCommutable = 1 in { +defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, fadd>; +defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, fsub>; +defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">; +defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>; +defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, fmul>; +defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32, AMDGPUmul_i24>; +defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>; +defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32, AMDGPUmul_u24>; +defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>; +defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum>; +defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum>; +defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_I32_I32_I32>; +defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_I32_I32_I32>; +defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_I32_I32_I32>; +defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_I32_I32_I32>; +defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, null_frag, "v_lshr_b32">; +defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, null_frag, "v_ashr_i32">; +defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, null_frag, "v_lshl_b32">; +defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_I32_I32_I32>; +defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_I32_I32_I32>; +defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_I32_I32_I32>; + +let Constraints = "$vdst = $src2", DisableEncoding="$src2", + isConvertibleToThreeAddress = 1 in { +defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC>; +} + +def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK>; + +// No patterns so that the scalar instructions are always selected. +// The scalar versions will be replaced with vector when needed later. + +// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI, +// but the VI instructions behave the same as the SI versions. +defm V_ADD_I32 : VOP2bInst <"v_add_i32", VOP2b_I32_I1_I32_I32>; +defm V_SUB_I32 : VOP2bInst <"v_sub_i32", VOP2b_I32_I1_I32_I32>; +defm V_SUBREV_I32 : VOP2bInst <"v_subrev_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32">; +defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1>; +defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1>; +defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32">; +} // End isCommutable = 1 + +// These are special and do not read the exec mask. +let isConvergent = 1, Uses = [] in { +def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, + [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))], "">; + +def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, [], "">; +} // End isConvergent = 1 + +defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_I32_I32_I32>; +defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_I32_I32_I32>; +defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_lo>; +defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_hi>; +defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp>; +defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_I32_F32_I32>; // TODO: set "Uses = dst" +defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_I32_F32_F32>; +defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_I32_F32_F32>; +defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_I32_F32_F32, int_SI_packf16>; +defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_I32_I32_I32>; +defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_I32_I32_I32>; + +} // End SubtargetPredicate = isGCN + + +// These instructions only exist on SI and CI +let SubtargetPredicate = isSICI in { + +defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>; +defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>; + +let isCommutable = 1 in { +defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>; +defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_I32_I32_I32>; +defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_I32_I32_I32>; +defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32>; +} // End isCommutable = 1 + +} // End let SubtargetPredicate = SICI + +let SubtargetPredicate = isVI in { + +def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK>; +defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16>; +defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16>; +defm V_ASHRREV_B16 : VOP2Inst <"v_ashrrev_b16", VOP_I16_I16_I16>; +defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I16>; + +let isCommutable = 1 in { +defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16>; +defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16>; +defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">; +defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16>; +defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_F16_F16_F16>; +def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK>; +defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16>; +defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16>; +defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16>; +defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16>; +defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16>; +defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16>; +defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16>; +defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16>; +defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16>; +defm V_MIN_I16 : VOP2Inst <"v_min_i16", VOP_I16_I16_I16>; +} // End isCommutable = 1 + +} // End SubtargetPredicate = isVI + +//===----------------------------------------------------------------------===// +// SI +//===----------------------------------------------------------------------===// + +let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in { + +multiclass VOP2_Real_si op> { + def _si : + VOP2_Real(NAME), SIEncodingFamily.SI>, + VOP2e(NAME).Pfl>; +} + +multiclass VOP2_Real_MADK_si op> { + def _si : VOP2_Real(NAME), SIEncodingFamily.SI>, + VOP2_MADKe(NAME).Pfl>; +} + +multiclass VOP2_Real_e32_si op> { + def _e32_si : + VOP2_Real(NAME#"_e32"), SIEncodingFamily.SI>, + VOP2e(NAME#"_e32").Pfl>; +} + +multiclass VOP2_Real_e32e64_si op> : VOP2_Real_e32_si { + def _e64_si : + VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>, + VOP3e_si <{1, 0, 0, op{5-0}}, !cast(NAME#"_e64").Pfl>; +} + +multiclass VOP2be_Real_e32e64_si op> : VOP2_Real_e32_si { + def _e64_si : + VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>, + VOP3be_si <{1, 0, 0, op{5-0}}, !cast(NAME#"_e64").Pfl>; +} + +} // End AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" + +defm V_CNDMASK_B32 : VOP2_Real_e32e64_si <0x0>; +defm V_ADD_F32 : VOP2_Real_e32e64_si <0x3>; +defm V_SUB_F32 : VOP2_Real_e32e64_si <0x4>; +defm V_SUBREV_F32 : VOP2_Real_e32e64_si <0x5>; +defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_si <0x7>; +defm V_MUL_F32 : VOP2_Real_e32e64_si <0x8>; +defm V_MUL_I32_I24 : VOP2_Real_e32e64_si <0x9>; +defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_si <0xa>; +defm V_MUL_U32_U24 : VOP2_Real_e32e64_si <0xb>; +defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_si <0xc>; +defm V_MIN_F32 : VOP2_Real_e32e64_si <0xf>; +defm V_MAX_F32 : VOP2_Real_e32e64_si <0x10>; +defm V_MIN_I32 : VOP2_Real_e32e64_si <0x11>; +defm V_MAX_I32 : VOP2_Real_e32e64_si <0x12>; +defm V_MIN_U32 : VOP2_Real_e32e64_si <0x13>; +defm V_MAX_U32 : VOP2_Real_e32e64_si <0x14>; +defm V_LSHRREV_B32 : VOP2_Real_e32e64_si <0x16>; +defm V_ASHRREV_I32 : VOP2_Real_e32e64_si <0x18>; +defm V_LSHLREV_B32 : VOP2_Real_e32e64_si <0x1a>; +defm V_AND_B32 : VOP2_Real_e32e64_si <0x1b>; +defm V_OR_B32 : VOP2_Real_e32e64_si <0x1c>; +defm V_XOR_B32 : VOP2_Real_e32e64_si <0x1d>; +defm V_MAC_F32 : VOP2_Real_e32e64_si <0x1f>; +defm V_MADMK_F32 : VOP2_Real_MADK_si <0x20>; +defm V_MADAK_F32 : VOP2_Real_MADK_si <0x21>; +defm V_ADD_I32 : VOP2be_Real_e32e64_si <0x25>; +defm V_SUB_I32 : VOP2be_Real_e32e64_si <0x26>; +defm V_SUBREV_I32 : VOP2be_Real_e32e64_si <0x27>; +defm V_ADDC_U32 : VOP2be_Real_e32e64_si <0x28>; +defm V_SUBB_U32 : VOP2be_Real_e32e64_si <0x29>; +defm V_SUBBREV_U32 : VOP2be_Real_e32e64_si <0x2a>; + +defm V_READLANE_B32 : VOP2_Real_si <0x01>; +defm V_WRITELANE_B32 : VOP2_Real_si <0x02>; + +defm V_MAC_LEGACY_F32 : VOP2_Real_e32e64_si <0x6>; +defm V_MIN_LEGACY_F32 : VOP2_Real_e32e64_si <0xd>; +defm V_MAX_LEGACY_F32 : VOP2_Real_e32e64_si <0xe>; +defm V_LSHR_B32 : VOP2_Real_e32e64_si <0x15>; +defm V_ASHR_I32 : VOP2_Real_e32e64_si <0x17>; +defm V_LSHL_B32 : VOP2_Real_e32e64_si <0x19>; + +defm V_BFM_B32 : VOP2_Real_e32e64_si <0x1e>; +defm V_BCNT_U32_B32 : VOP2_Real_e32e64_si <0x22>; +defm V_MBCNT_LO_U32_B32 : VOP2_Real_e32e64_si <0x23>; +defm V_MBCNT_HI_U32_B32 : VOP2_Real_e32e64_si <0x24>; +defm V_LDEXP_F32 : VOP2_Real_e32e64_si <0x2b>; +defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e32e64_si <0x2c>; +defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e32e64_si <0x2d>; +defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e32e64_si <0x2e>; +defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e32e64_si <0x2f>; +defm V_CVT_PK_U16_U32 : VOP2_Real_e32e64_si <0x30>; +defm V_CVT_PK_I16_I32 : VOP2_Real_e32e64_si <0x31>; + + +//===----------------------------------------------------------------------===// +// VI +//===----------------------------------------------------------------------===// + +class VOP2_SDWA op, VOP2_Pseudo ps, VOPProfile P = ps.Pfl> : + VOP_SDWA { + let Defs = ps.Defs; + let Uses = ps.Uses; + let SchedRW = ps.SchedRW; + let hasSideEffects = ps.hasSideEffects; + let AsmMatchConverter = "cvtSdwaVOP2"; + + bits<8> vdst; + bits<8> src1; + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); + let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{30-25} = op; + let Inst{31} = 0x0; // encoding +} + +class VOP2_DPP op, VOP2_Pseudo ps, VOPProfile P = ps.Pfl> : + VOP_DPP { + let Defs = ps.Defs; + let Uses = ps.Uses; + let SchedRW = ps.SchedRW; + let hasSideEffects = ps.hasSideEffects; + + bits<8> vdst; + bits<8> src1; + let Inst{8-0} = 0xfa; //dpp + let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); + let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{30-25} = op; + let Inst{31} = 0x0; //encoding +} + +let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in { + +multiclass VOP32_Real_vi op> { + def _vi : + VOP2_Real(NAME), SIEncodingFamily.VI>, + VOP3e_vi(NAME).Pfl>; +} + +multiclass VOP2_Real_MADK_vi op> { + def _vi : VOP2_Real(NAME), SIEncodingFamily.VI>, + VOP2_MADKe(NAME).Pfl>; +} + +multiclass VOP2_Real_e32_vi op> { + def _e32_vi : + VOP2_Real(NAME#"_e32"), SIEncodingFamily.VI>, + VOP2e(NAME#"_e32").Pfl>; +} + +multiclass VOP2_Real_e64_vi op> { + def _e64_vi : + VOP3_Real(NAME#"_e64"), SIEncodingFamily.VI>, + VOP3e_vi (NAME#"_e64").Pfl>; +} + +multiclass VOP2be_Real_e32e64_vi op> : VOP2_Real_e32_vi { + def _e64_vi : + VOP3_Real(NAME#"_e64"), SIEncodingFamily.VI>, + VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast(NAME#"_e64").Pfl>; +} + +multiclass Base_VOP2_Real_e32e64_vi op> : + VOP2_Real_e32_vi, + VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>; + +} // End AssemblerPredicates = [isVI], DecoderNamespace = "VI" + +multiclass VOP2_Real_e32e64_vi op> : + Base_VOP2_Real_e32e64_vi { + // for now left sdwa/dpp only for asm/dasm + // TODO: add corresponding pseudo + def _sdwa : VOP2_SDWA(NAME#"_e32")>; + def _dpp : VOP2_DPP(NAME#"_e32")>; +} + +defm V_CNDMASK_B32 : Base_VOP2_Real_e32e64_vi <0x0>; +defm V_ADD_F32 : VOP2_Real_e32e64_vi <0x1>; +defm V_SUB_F32 : VOP2_Real_e32e64_vi <0x2>; +defm V_SUBREV_F32 : VOP2_Real_e32e64_vi <0x3>; +defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_vi <0x4>; +defm V_MUL_F32 : VOP2_Real_e32e64_vi <0x5>; +defm V_MUL_I32_I24 : VOP2_Real_e32e64_vi <0x6>; +defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_vi <0x7>; +defm V_MUL_U32_U24 : VOP2_Real_e32e64_vi <0x8>; +defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_vi <0x9>; +defm V_MIN_F32 : VOP2_Real_e32e64_vi <0xa>; +defm V_MAX_F32 : VOP2_Real_e32e64_vi <0xb>; +defm V_MIN_I32 : VOP2_Real_e32e64_vi <0xc>; +defm V_MAX_I32 : VOP2_Real_e32e64_vi <0xd>; +defm V_MIN_U32 : VOP2_Real_e32e64_vi <0xe>; +defm V_MAX_U32 : VOP2_Real_e32e64_vi <0xf>; +defm V_LSHRREV_B32 : VOP2_Real_e32e64_vi <0x10>; +defm V_ASHRREV_I32 : VOP2_Real_e32e64_vi <0x11>; +defm V_LSHLREV_B32 : VOP2_Real_e32e64_vi <0x12>; +defm V_AND_B32 : VOP2_Real_e32e64_vi <0x13>; +defm V_OR_B32 : VOP2_Real_e32e64_vi <0x14>; +defm V_XOR_B32 : VOP2_Real_e32e64_vi <0x15>; +defm V_MAC_F32 : VOP2_Real_e32e64_vi <0x16>; +defm V_MADMK_F32 : VOP2_Real_MADK_vi <0x17>; +defm V_MADAK_F32 : VOP2_Real_MADK_vi <0x18>; +defm V_ADD_I32 : VOP2be_Real_e32e64_vi <0x19>; +defm V_SUB_I32 : VOP2be_Real_e32e64_vi <0x1a>; +defm V_SUBREV_I32 : VOP2be_Real_e32e64_vi <0x1b>; +defm V_ADDC_U32 : VOP2be_Real_e32e64_vi <0x1c>; +defm V_SUBB_U32 : VOP2be_Real_e32e64_vi <0x1d>; +defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi <0x1e>; + +defm V_READLANE_B32 : VOP32_Real_vi <0x289>; +defm V_WRITELANE_B32 : VOP32_Real_vi <0x28a>; + +defm V_BFM_B32 : VOP2_Real_e64_vi <0x293>; +defm V_BCNT_U32_B32 : VOP2_Real_e64_vi <0x28b>; +defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64_vi <0x28c>; +defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64_vi <0x28d>; +defm V_LDEXP_F32 : VOP2_Real_e64_vi <0x288>; +defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64_vi <0x1f0>; +defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64_vi <0x294>; +defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64_vi <0x295>; +defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64_vi <0x296>; +defm V_CVT_PK_U16_U32 : VOP2_Real_e64_vi <0x297>; +defm V_CVT_PK_I16_I32 : VOP2_Real_e64_vi <0x298>; + +defm V_ADD_F16 : VOP2_Real_e32e64_vi <0x1f>; +defm V_SUB_F16 : VOP2_Real_e32e64_vi <0x20>; +defm V_SUBREV_F16 : VOP2_Real_e32e64_vi <0x21>; +defm V_MUL_F16 : VOP2_Real_e32e64_vi <0x22>; +defm V_MAC_F16 : VOP2_Real_e32e64_vi <0x23>; +defm V_MADMK_F16 : VOP2_Real_MADK_vi <0x24>; +defm V_MADAK_F16 : VOP2_Real_MADK_vi <0x25>; +defm V_ADD_U16 : VOP2_Real_e32e64_vi <0x26>; +defm V_SUB_U16 : VOP2_Real_e32e64_vi <0x27>; +defm V_SUBREV_U16 : VOP2_Real_e32e64_vi <0x28>; +defm V_MUL_LO_U16 : VOP2_Real_e32e64_vi <0x29>; +defm V_LSHLREV_B16 : VOP2_Real_e32e64_vi <0x2a>; +defm V_LSHRREV_B16 : VOP2_Real_e32e64_vi <0x2b>; +defm V_ASHRREV_B16 : VOP2_Real_e32e64_vi <0x2c>; +defm V_MAX_F16 : VOP2_Real_e32e64_vi <0x2d>; +defm V_MIN_F16 : VOP2_Real_e32e64_vi <0x2e>; +defm V_MAX_U16 : VOP2_Real_e32e64_vi <0x2f>; +defm V_MAX_I16 : VOP2_Real_e32e64_vi <0x30>; +defm V_MIN_U16 : VOP2_Real_e32e64_vi <0x31>; +defm V_MIN_I16 : VOP2_Real_e32e64_vi <0x32>; +defm V_LDEXP_F16 : VOP2_Real_e32e64_vi <0x33>; + +let SubtargetPredicate = isVI in { + +// Aliases to simplify matching of floating-point instructions that +// are VOP2 on SI and VOP3 on VI. +class SI2_VI3Alias : InstAlias < + name#" $dst, $src0, $src1", + (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0) +>, PredicateControl { + let UseInstAsmMatchConverter = 0; + let AsmVariantName = AMDGPUAsmVariants.VOP3; +} + +def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>; +def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>; +def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>; +def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>; +def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>; + +} // End SubtargetPredicate = isVI diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index b8b76ff..0f06375 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -39,7 +39,7 @@ class getVOP3Pat { } class VOP3Inst : - VOP3_PseudoNew.ret, getVOP3Pat.ret), VOP3Only>; @@ -118,7 +118,7 @@ let Uses = [VCC, EXEC] in { // if (vcc) // result *= 2^32 // -def V_DIV_FMAS_F32 : VOP3_PseudoNew <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC, +def V_DIV_FMAS_F32 : VOP3_Pseudo <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC, getVOP3VCC.ret> { let SchedRW = [WriteFloatFMA]; } @@ -127,7 +127,7 @@ def V_DIV_FMAS_F32 : VOP3_PseudoNew <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC, // if (vcc) // result *= 2^64 // -def V_DIV_FMAS_F64 : VOP3_PseudoNew <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC, +def V_DIV_FMAS_F64 : VOP3_Pseudo <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC, getVOP3VCC.ret> { let SchedRW = [WriteDouble]; } @@ -165,12 +165,12 @@ def V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile, AMDGPUldexp, 1>; } // End SchedRW = [WriteDoubleAdd] -def V_DIV_SCALE_F32 : VOP3_PseudoNew <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> { +def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> { let SchedRW = [WriteFloatFMA, WriteSALU]; } // Double precision division pre-scale. -def V_DIV_SCALE_F64 : VOP3_PseudoNew <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> { +def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> { let SchedRW = [WriteDouble, WriteSALU]; } @@ -234,13 +234,13 @@ let isCommutable = 1 in { let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in { multiclass VOP3_Real_si op> { - def _si : VOP3_Real(NAME), SIEncodingFamily.SI>, - VOP3e_siNew (NAME).Pfl>; + def _si : VOP3_Real(NAME), SIEncodingFamily.SI>, + VOP3e_si (NAME).Pfl>; } multiclass VOP3be_Real_si op> { - def _si : VOP3_Real(NAME), SIEncodingFamily.SI>, - VOP3be_siNew (NAME).Pfl>; + def _si : VOP3_Real(NAME), SIEncodingFamily.SI>, + VOP3be_si (NAME).Pfl>; } } // End AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" @@ -303,8 +303,8 @@ defm V_TRIG_PREOP_F64 : VOP3_Real_si <0x174>; //===----------------------------------------------------------------------===// multiclass VOP3_Real_ci op> { - def _ci : VOP3_Real(NAME), SIEncodingFamily.SI>, - VOP3e_siNew (NAME).Pfl> { + def _ci : VOP3_Real(NAME), SIEncodingFamily.SI>, + VOP3e_si (NAME).Pfl> { let AssemblerPredicates = [isCIOnly]; let DecoderNamespace = "CI"; } @@ -323,13 +323,13 @@ defm V_MAD_I64_I32 : VOP3_Real_ci <0x177>; let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in { multiclass VOP3_Real_vi op> { - def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, - VOP3e_viNew (NAME).Pfl>; + def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, + VOP3e_vi (NAME).Pfl>; } multiclass VOP3be_Real_vi op> { - def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, - VOP3be_viNew (NAME).Pfl>; + def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, + VOP3be_vi (NAME).Pfl>; } } // End AssemblerPredicates = [isVI], DecoderNamespace = "VI" diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td index 8c7738b..30e76aa 100644 --- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -80,7 +80,7 @@ class VOPC_Real : } // This class is used only with VOPC instructions. Use $sdst for out operand -class VOPCInstAlias : +class VOPCInstAlias : InstAlias , PredicateControl { field bit isCompare; @@ -128,7 +128,7 @@ multiclass VOPC_Pseudos sched, ValueType vt> : VOPC_Profile { let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1); let Asm64 = "$sdst, $src0_modifiers, $src1"; - let InsSDWA = (ins Src0Mod:$src0_fmodifiers, Src0RC64:$src0, - Int32InputMods:$src1_imodifiers, Src1RC64:$src1, + let InsSDWA = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, + Int32InputMods:$src1_modifiers, Src1RC64:$src1, clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel); - let AsmSDWA = " vcc, $src0_fmodifiers, $src1_imodifiers$clamp $src0_sel $src1_sel"; + let AsmSDWA = " vcc, $src0_modifiers, $src1_modifiers$clamp $src0_sel $src1_sel"; + let HasSrc1Mods = 0; let HasClamp = 0; let HasOMod = 0; } @@ -422,7 +423,7 @@ multiclass VOPC_Class_Pseudos { let SchedRW = p.Schedule; let isConvergent = DefExec; } - def _e64 : VOP3_PseudoNew.ret> { + def _e64 : VOP3_Pseudo.ret> { let Defs = !if(DefExec, [EXEC], []); let SchedRW = p.Schedule; } @@ -533,15 +534,15 @@ multiclass VOPC_Real_si op> { VOPCe; def _e64_si : - VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>, - VOP3a_siNew (NAME#"_e64").Pfl> { + VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>, + VOP3a_si (NAME#"_e64").Pfl> { // Encoding used for VOPC instructions encoded as VOP3 // Differs from VOP3e by destination name (sdst) as VOPC doesn't have vector dst bits<8> sdst; let Inst{7-0} = sdst; } } - def : VOPCInstAlias (NAME#"_e64"), + def : VOPCInstAlias (NAME#"_e64"), !cast(NAME#"_e32_si")> { let AssemblerPredicate = isSICI; } @@ -764,9 +765,15 @@ defm V_CMPX_CLASS_F64 : VOPC_Real_si <0xb8>; // VI //===----------------------------------------------------------------------===// -class VOPC_SDWAe op, VOPProfile P> : VOP_SDWAeNew

{ - bits<8> src1; +class VOPC_SDWA op, VOPC_Pseudo ps, VOPProfile P = ps.Pfl> : + VOP_SDWA { + let Defs = ps.Defs; + let hasSideEffects = ps.hasSideEffects; + let AsmMatchConverter = "cvtSdwaVOPC"; + let isCompare = ps.isCompare; + let isCommutable = ps.isCommutable; + bits<8> src1; let Inst{8-0} = 0xf9; // sdwa let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); let Inst{24-17} = op; @@ -777,21 +784,6 @@ class VOPC_SDWAe op, VOPProfile P> : VOP_SDWAeNew

{ let Inst{44-43} = SDWA_UNUSED_PRESERVE; } -class VOPC_SDWA op, VOPC_Pseudo ps, VOPProfile p = ps.Pfl> : - VOP_SDWA , - VOPC_SDWAe { - let Defs = ps.Defs; - let hasSideEffects = ps.hasSideEffects; - let AsmMatchConverter = "cvtSdwaVOPC"; - let SubtargetPredicate = isVI; - let AssemblerPredicate = !if(p.HasExt, isVI, DisableInst); - let AsmVariantName = !if(p.HasExt, AMDGPUAsmVariants.SDWA, - AMDGPUAsmVariants.Disable); - let DecoderNamespace = "SDWA"; - let isCompare = ps.isCompare; - let isCommutable = ps.isCommutable; -} - multiclass VOPC_Real_vi op> { let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in { def _e32_vi : @@ -799,8 +791,8 @@ multiclass VOPC_Real_vi op> { VOPCe; def _e64_vi : - VOP3_Real(NAME#"_e64"), SIEncodingFamily.VI>, - VOP3a_viNew (NAME#"_e64").Pfl> { + VOP3_Real(NAME#"_e64"), SIEncodingFamily.VI>, + VOP3a_vi (NAME#"_e64").Pfl> { // Encoding used for VOPC instructions encoded as VOP3 // Differs from VOP3e by destination name (sdst) as VOPC doesn't have vector dst bits<8> sdst; @@ -812,7 +804,7 @@ multiclass VOPC_Real_vi op> { // TODO: add corresponding pseudo def _sdwa : VOPC_SDWA(NAME#"_e32")>; - def : VOPCInstAlias (NAME#"_e64"), + def : VOPCInstAlias (NAME#"_e64"), !cast(NAME#"_e32_vi")> { let AssemblerPredicate = isVI; } diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index f647bff..d538c78 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -7,7 +7,70 @@ // //===----------------------------------------------------------------------===// -class VOP3_PseudoNew pattern, bit VOP3Only = 0> : +// dummies for outer let +class LetDummies { + bit isCommutable; + bit isConvertibleToThreeAddress; + bit isMoveImm; + bit isReMaterializable; + bit isAsCheapAsAMove; + bit VOPAsmPrefer32Bit; + Predicate SubtargetPredicate; + string Constraints; + string DisableEncoding; + list SchedRW; + list Uses; + list Defs; +} + +class VOP { + string OpName = opName; +} + +class VOPAnyCommon pattern> : + InstSI { + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let UseNamedOperandTable = 1; + let VALU = 1; +} + +class VOP3Common pattern = [], bit HasMods = 0, + bit VOP3Only = 0> : + VOPAnyCommon { + + // Using complex patterns gives VOP3 patterns a very high complexity rating, + // but standalone patterns are almost always prefered, so we need to adjust the + // priority lower. The goal is to use a high number to reduce complexity to + // zero (or less than zero). + let AddedComplexity = -1000; + + let VOP3 = 1; + let VALU = 1; + let Uses = [EXEC]; + + let AsmMatchConverter = + !if(!eq(VOP3Only,1), + "cvtVOP3", + !if(!eq(HasMods,1), "cvtVOP3_2_mod", "")); + + let AsmVariantName = AMDGPUAsmVariants.VOP3; + + let isCodeGenOnly = 0; + + int Size = 8; + + // Because SGPRs may be allowed if there are multiple operands, we + // need a post-isel hook to insert copies in order to avoid + // violating constant bus requirements. + let hasPostISelHook = 1; +} + + +class VOP3_Pseudo pattern=[], bit VOP3Only = 0> : InstSI , VOP , SIMCInstr, @@ -50,7 +113,7 @@ class VOP3_PseudoNew pattern, bit VOP3On VOPProfile Pfl = P; } -class VOP3_Real : +class VOP3_Real : InstSI , SIMCInstr { @@ -66,7 +129,7 @@ class VOP3_Real : let TSFlags = ps.TSFlags; } -class VOP3aNew : Enc64 { +class VOP3a : Enc64 { bits<2> src0_modifiers; bits<9> src0; bits<2> src1_modifiers; @@ -81,7 +144,7 @@ class VOP3aNew : Enc64 { let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); let Inst{31-26} = 0x34; //encoding - let Inst{40-32} = src0; + let Inst{40-32} = !if(P.HasSrc0, src0, 0); let Inst{49-41} = !if(P.HasSrc1, src1, 0); let Inst{58-50} = !if(P.HasSrc2, src2, 0); let Inst{60-59} = !if(P.HasOMod, omod, 0); @@ -90,27 +153,27 @@ class VOP3aNew : Enc64 { let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); } -class VOP3a_siNew op, VOPProfile P> : VOP3aNew

{ +class VOP3a_si op, VOPProfile P> : VOP3a

{ let Inst{25-17} = op; - let Inst{11} = !if(P.HasClamp, clamp, 0); + let Inst{11} = !if(P.HasClamp, clamp{0}, 0); } -class VOP3a_viNew op, VOPProfile P> : VOP3aNew

{ +class VOP3a_vi op, VOPProfile P> : VOP3a

{ let Inst{25-16} = op; - let Inst{15} = !if(P.HasClamp, clamp, 0); + let Inst{15} = !if(P.HasClamp, clamp{0}, 0); } -class VOP3e_siNew op, VOPProfile P> : VOP3a_siNew { +class VOP3e_si op, VOPProfile P> : VOP3a_si { bits<8> vdst; - let Inst{7-0} = vdst; + let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0); } -class VOP3e_viNew op, VOPProfile P> : VOP3a_viNew { +class VOP3e_vi op, VOPProfile P> : VOP3a_vi { bits<8> vdst; - let Inst{7-0} = vdst; + let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0); } -class VOP3beNew : Enc64 { +class VOP3be : Enc64 { bits<8> vdst; bits<2> src0_modifiers; bits<9> src0; @@ -133,24 +196,22 @@ class VOP3beNew : Enc64 { let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); } -class VOP3be_siNew op, VOPProfile P> : VOP3beNew

{ +class VOP3be_si op, VOPProfile P> : VOP3be

{ let Inst{25-17} = op; } -class VOP3be_viNew op, VOPProfile P> : VOP3beNew

{ +class VOP3be_vi op, VOPProfile P> : VOP3be

{ bits<1> clamp; let Inst{25-16} = op; - let Inst{15} = !if(P.HasClamp, clamp, 0); + let Inst{15} = !if(P.HasClamp, clamp{0}, 0); } -class VOP_SDWAeNew : Enc64 { +class VOP_SDWAe : Enc64 { bits<8> src0; bits<3> src0_sel; - bits<2> src0_fmodifiers; // {abs,neg} - bits<1> src0_imodifiers; // sext + bits<2> src0_modifiers; // float: {abs,neg}, int {sext} bits<3> src1_sel; - bits<2> src1_fmodifiers; - bits<1> src1_imodifiers; + bits<2> src1_modifiers; bits<3> dst_sel; bits<2> dst_unused; bits<1> clamp; @@ -159,16 +220,77 @@ class VOP_SDWAeNew : Enc64 { bits<2> SDWA_UNUSED_PRESERVE = 2; let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0); - let Inst{42-40} = !if(P.HasDst, dst_sel{2-0}, SDWA_DWORD{2-0}); - let Inst{44-43} = !if(P.HasDst, dst_unused{1-0}, SDWA_UNUSED_PRESERVE{1-0}); + let Inst{42-40} = !if(P.EmitDst, dst_sel{2-0}, SDWA_DWORD{2-0}); + let Inst{44-43} = !if(P.EmitDst, dst_unused{1-0}, SDWA_UNUSED_PRESERVE{1-0}); let Inst{45} = !if(P.HasSDWAClamp, clamp{0}, 0); let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, SDWA_DWORD{2-0}); - let Inst{53-52} = !if(P.HasSrc0Mods, src0_fmodifiers{1-0}, 0); - let Inst{51} = !if(P.HasSrc0IntMods, src0_imodifiers{0}, 0); + let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0); + let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0); let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, SDWA_DWORD{2-0}); - let Inst{61-60} = !if(P.HasSrc1Mods, src1_fmodifiers{1-0}, 0); - let Inst{59} = !if(P.HasSrc1IntMods, src1_imodifiers{0}, 0); + let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0); + let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0); +} + +class VOP_SDWA : + InstSI , + VOP_SDWAe

{ + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let UseNamedOperandTable = 1; + let VALU = 1; + let SDWA = 1; + let Size = 8; + + let SubtargetPredicate = isVI; + let AssemblerPredicate = !if(P.HasExt, isVI, DisableInst); + let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.SDWA, + AMDGPUAsmVariants.Disable); + let DecoderNamespace = "SDWA"; +} + +class VOP_DPPe : Enc64 { + bits<2> src0_modifiers; + bits<8> src0; + bits<2> src1_modifiers; + bits<9> dpp_ctrl; + bits<1> bound_ctrl; + bits<4> bank_mask; + bits<4> row_mask; + + let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0); + let Inst{48-40} = dpp_ctrl; + let Inst{51} = bound_ctrl; + let Inst{52} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // src0_neg + let Inst{53} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // src0_abs + let Inst{54} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); // src1_neg + let Inst{55} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // src1_abs + let Inst{59-56} = bank_mask; + let Inst{63-60} = row_mask; +} + +class VOP_DPP : + InstSI , + VOP_DPPe

{ + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let UseNamedOperandTable = 1; + + let VALU = 1; + let DPP = 1; + let Size = 8; + + let AsmMatchConverter = !if(!eq(P.HasModifiers,1), "cvtDPP", ""); + let SubtargetPredicate = isVI; + let AssemblerPredicate = !if(P.HasExt, isVI, DisableInst); + let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.DPP, + AMDGPUAsmVariants.Disable); + let DecoderNamespace = "DPP"; } include "VOPCInstructions.td" +include "VOP1Instructions.td" +include "VOP2Instructions.td" include "VOP3Instructions.td" -- 2.7.4