From edd9f701638e28c4419658c1daed25ea0c6e8841 Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Mon, 18 Nov 2019 17:23:40 +0300 Subject: [PATCH] [AMDGPU][MC][GFX10] Enabled v_movrel*[sdwa|dpp|dpp8] opcodes See https://bugs.llvm.org/show_bug.cgi?id=43712 Reviewers: arsenm, rampitec Differential Revision: https://reviews.llvm.org/D70170 --- .../Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 40 ++++++++++ llvm/lib/Target/AMDGPU/VOP1Instructions.td | 63 ++++++--------- llvm/test/MC/AMDGPU/gfx10_asm_all.s | 91 +++++++++++++++++++++- llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s | 12 +++ llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s | 12 +++ .../test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt | 36 +++++++++ .../MC/Disassembler/AMDGPU/gfx10_dasm_dpp16.txt | 12 +++ .../MC/Disassembler/AMDGPU/gfx10_dasm_dpp8.txt | 12 +++ 8 files changed, 235 insertions(+), 43 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 9dd511f..1f0f9f2 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1320,6 +1320,7 @@ private: bool validateIntClampSupported(const MCInst &Inst); bool validateMIMGAtomicDMask(const MCInst &Inst); bool validateMIMGGatherDMask(const MCInst &Inst); + bool validateMovrels(const MCInst &Inst); bool validateMIMGDataSize(const MCInst &Inst); bool validateMIMGAddrSize(const MCInst &Inst); bool validateMIMGD16(const MCInst &Inst); @@ -3049,6 +3050,41 @@ bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; } +static bool IsMovrelsSDWAOpcode(const unsigned Opcode) +{ + switch (Opcode) { + case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: + case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: + case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: + return true; + default: + return false; + } +} + +// movrels* opcodes should only allow VGPRS as src0. +// This is specified in .td description for vop1/vop3, +// but sdwa is handled differently. See isSDWAOperand. +bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) { + + const unsigned Opc = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opc); + + if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) + return true; + + const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); + assert(Src0Idx != -1); + + const MCOperand &Src0 = Inst.getOperand(Src0Idx); + if (!Src0.isReg()) + return false; + + auto Reg = Src0.getReg(); + const MCRegisterInfo *TRI = getContext().getRegisterInfo(); + return !isSGPR(mc2PseudoReg(Reg), TRI); +} + bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { const unsigned Opc = Inst.getOpcode(); @@ -3469,6 +3505,10 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, "invalid image_gather dmask: only one bit must be set"); return false; } + if (!validateMovrels(Inst)) { + Error(IDLoc, "source operand must be a VGPR"); + return false; + } if (!validateFlatOffset(Inst, Operands)) { return false; } diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index a871aba..2d8f488 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -260,14 +260,9 @@ defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT>; } // Restrict src0 to be VGPR -def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> { +def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> { let Src0RC32 = VRegSrc_32; let Src0RC64 = VRegSrc_32; - - let HasExt = 0; - let HasExtDPP = 0; - let HasExtSDWA = 0; - let HasExtSDWA9 = 0; } // Special case because there are no true output operands. Hack vdst @@ -281,26 +276,24 @@ class VOP_MOVREL : VOPProfile<[untyped, i32, untyped, un let Outs = (outs); let Ins32 = (ins Src0RC32:$vdst, Src1RC:$src0); let Ins64 = (ins Src0RC64:$vdst, Src1RC:$src0); - let InsDPP = (ins DstRC:$vdst, DstRC:$old, Src0RC32:$src0, - dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, - bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); - let InsDPP16 = !con(InsDPP, (ins FI:$fi)); + let Asm32 = getAsm32<1, 1>.ret; + let Asm64 = getAsm64<1, 1, 0, 0, 1>.ret; - let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, - clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused, + let OutsSDWA = (outs Src0RC32:$vdst); + let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, + clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel); + let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret; - let Asm32 = getAsm32<1, 1>.ret; - let Asm64 = getAsm64<1, 1, 0, 0, 1>.ret; - let AsmDPP = getAsmDPP<1, 1, 0>.ret; + let OutsDPP = (outs Src0RC32:$vdst); + let InsDPP16 = (ins Src0RC32:$old, Src0RC32:$src0, + dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, + bank_mask:$bank_mask, bound_ctrl:$bound_ctrl, FI:$fi); let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret; - let AsmSDWA = getAsmSDWA<1, 1>.ret; - let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret; - let HasExt = 0; - let HasExtDPP = 0; - let HasExtSDWA = 0; - let HasExtSDWA9 = 0; + let OutsDPP8 = (outs Src0RC32:$vdst); + let InsDPP8 = (ins Src0RC32:$old, Src0RC32:$src0, dpp8:$dpp8, FI:$fi); + let AsmDPP8 = getAsmDPP8<1, 1, 0>.ret; let HasDst = 0; let EmitDst = 1; // force vdst emission @@ -310,14 +303,14 @@ def VOP_MOVRELD : VOP_MOVREL; def VOP_MOVRELSD : VOP_MOVREL; let SubtargetPredicate = HasMovrel, Uses = [M0, EXEC] in { -// v_movreld_b32 is a special case because the destination output + // v_movreld_b32 is a special case because the destination output // register is really a source. It isn't actually read (but may be // written), and is only to provide the base register to start // indexing from. Tablegen seems to not let you define an implicit // virtual register output for the super register being written into, // so this must have an implicit def of the register added to it. defm V_MOVRELD_B32 : VOP1Inst <"v_movreld_b32", VOP_MOVRELD>; -defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_I32_VI32_NO_EXT>; +defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_MOVRELS>; defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_MOVRELSD>; } // End Uses = [M0, EXEC] @@ -528,16 +521,10 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { } } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" -multiclass VOP1_Real_gfx10_no_dpp op> : - VOP1_Real_e32_gfx10, VOP1_Real_e64_gfx10, - VOP1_Real_sdwa_gfx10; - -multiclass VOP1_Real_gfx10_no_dpp8 op> : - VOP1_Real_e32_gfx10, VOP1_Real_e64_gfx10, - VOP1_Real_sdwa_gfx10, VOP1_Real_dpp_gfx10; - multiclass VOP1_Real_gfx10 op> : - VOP1_Real_gfx10_no_dpp8, VOP1_Real_dpp8_gfx10; + VOP1_Real_e32_gfx10, VOP1_Real_e64_gfx10, + VOP1_Real_sdwa_gfx10, VOP1_Real_dpp_gfx10, + VOP1_Real_dpp8_gfx10; defm V_PIPEFLUSH : VOP1_Real_gfx10<0x01b>; defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10<0x048>; @@ -620,12 +607,6 @@ multiclass VOP1_Real_gfx6_gfx7 op> : multiclass VOP1_Real_gfx6_gfx7_gfx10 op> : VOP1_Real_gfx6_gfx7, VOP1_Real_gfx10; -multiclass VOP1_Real_gfx6_gfx7_gfx10_no_dpp8 op> : - VOP1_Real_gfx6_gfx7, VOP1_Real_gfx10_no_dpp8; - -multiclass VOP1_Real_gfx6_gfx7_gfx10_no_dpp op> : - VOP1_Real_gfx6_gfx7, VOP1_Real_gfx10_no_dpp; - defm V_LOG_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x026>; defm V_RCP_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x028>; defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>; @@ -683,9 +664,9 @@ defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03e>; defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x03f>; defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x040>; defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>; -defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10_no_dpp<0x042>; -defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10_no_dpp8<0x043>; -defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10_no_dpp8<0x044>; +defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x042>; +defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x043>; +defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x044>; //===----------------------------------------------------------------------===// // GFX8, GFX9 (VI). diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_all.s b/llvm/test/MC/AMDGPU/gfx10_asm_all.s index 79e5514..220f5b2 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_all.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_all.s @@ -32163,6 +32163,18 @@ v_movreld_b32_e64 v5, 0.5 v_movreld_b32_e64 v5, -4.0 // GFX10: encoding: [0x05,0x00,0xc2,0xd5,0xf7,0x00,0x00,0x00] +v_movreld_b32_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// GFX10: encoding: [0xf9,0x84,0x00,0x7e,0x02,0x06,0x06,0x00] + +v_movreld_b32_sdwa v0, s2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// GFX10: encoding: [0xf9,0x84,0x00,0x7e,0x02,0x06,0x86,0x00] + +v_movreld_b32_sdwa v0, 64 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// GFX10: encoding: [0xf9,0x84,0x00,0x7e,0xc0,0x06,0x86,0x00] + +v_movreld_b32_sdwa v0, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// GFX10: encoding: [0xf9,0x84,0x00,0x7e,0x00,0x06,0x0e,0x00] + v_movrels_b32 v5, v1 // GFX10: encoding: [0x01,0x87,0x0a,0x7e] @@ -32181,6 +32193,30 @@ v_movrels_b32_e64 v255, v1 v_movrels_b32_e64 v5, v255 // GFX10: encoding: [0x05,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00] +v_movrels_b32_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// GFX10: encoding: [0xf9,0x86,0x00,0x7e,0x02,0x06,0x06,0x00] + +v_movrels_b32_sdwa v0, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// GFX10: encoding: [0xf9,0x86,0x00,0x7e,0x00,0x06,0x0e,0x00] + +v_movrels_b32_e32 v5, s1 +// GFX10-ERR: error: invalid operand for instruction + +v_movrels_b32_e32 v5, 1 +// GFX10-ERR: error: invalid operand for instruction + +v_movrels_b32_e64 v5, s1 +// GFX10-ERR: error: invalid operand for instruction + +v_movrels_b32_e64 v5, 1 +// GFX10-ERR: error: invalid operand for instruction + +v_movrels_b32_sdwa v0, s2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// GFX10-ERR: error: source operand must be a VGPR + +v_movrels_b32_sdwa v0, 1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// GFX10-ERR: error: source operand must be a VGPR + v_movrelsd_b32 v5, v1 // GFX10: encoding: [0x01,0x89,0x0a,0x7e] @@ -32199,9 +32235,33 @@ v_movrelsd_b32_e64 v255, v1 v_movrelsd_b32_e64 v5, v255 // GFX10: encoding: [0x05,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00] -v_movrelsd_b32 v5, s1 +v_movrelsd_b32_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// GFX10: encoding: [0xf9,0x88,0x00,0x7e,0x02,0x06,0x06,0x00] + +v_movrelsd_b32_sdwa v0, v0 dst_unused:UNUSED_PAD src0_sel:DWORD +// GFX10: encoding: [0xf9,0x88,0x00,0x7e,0x00,0x06,0x06,0x00] + +v_movrelsd_b32_sdwa v0, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// GFX10: encoding: [0xf9,0x88,0x00,0x7e,0x00,0x06,0x0e,0x00] + +v_movrelsd_b32_e32 v5, s1 // GFX10-ERR: error: invalid operand for instruction +v_movrelsd_b32_e32 v5, 1 +// GFX10-ERR: error: invalid operand for instruction + +v_movrelsd_b32_e64 v5, s1 +// GFX10-ERR: error: invalid operand for instruction + +v_movrelsd_b32_e64 v5, 1 +// GFX10-ERR: error: invalid operand for instruction + +v_movrelsd_b32_sdwa v0, s2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// GFX10-ERR: error: source operand must be a VGPR + +v_movrelsd_b32_sdwa v0, 1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// GFX10-ERR: error: source operand must be a VGPR + v_movrelsd_2_b32 v5, v1 // GFX10: encoding: [0x01,0x91,0x0a,0x7e] @@ -32220,9 +32280,36 @@ v_movrelsd_2_b32_e64 v255, v1 v_movrelsd_2_b32_e64 v5, v255 // GFX10: encoding: [0x05,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00] -v_movrelsd_2_b32 v5, s1 +v_movrelsd_2_b32_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// GFX10: encoding: [0xf9,0x90,0x00,0x7e,0x02,0x06,0x06,0x00] + +v_movrelsd_2_b32_sdwa v0, v0 dst_unused:UNUSED_PAD src0_sel:DWORD +// GFX10: encoding: [0xf9,0x90,0x00,0x7e,0x00,0x06,0x06,0x00] + +v_movrelsd_2_b32_sdwa v0, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// GFX10: encoding: [0xf9,0x90,0x00,0x7e,0x00,0x06,0x0e,0x00] + +v_movrelsd_2_b32_e32 v5, s1 // GFX10-ERR: error: invalid operand for instruction +v_movrelsd_2_b32_e32 v5, 1 +// GFX10-ERR: error: invalid operand for instruction + +v_movrelsd_2_b32_e64 v5, s1 +// GFX10-ERR: error: invalid operand for instruction + +v_movrelsd_2_b32_e64 v5, 1 +// GFX10-ERR: error: invalid operand for instruction + +v_movrelsd_2_b32_sdwa v0, s2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// GFX10-ERR: error: source operand must be a VGPR + +v_movrelsd_2_b32_sdwa v0, 0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// GFX10-ERR: error: source operand must be a VGPR + +v_movrelsd_2_b32_sdwa v0, null dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD +// GFX10-ERR: error: source operand must be a VGPR + v_cvt_f16_u16_e32 v5, v1 // GFX10: encoding: [0x01,0xa1,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s b/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s index e2f6733..ce3cef5 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s @@ -680,3 +680,15 @@ v_subrev_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x1 bank_mask:0x0 v_subrev_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 // GFX10: [0xfa,0x04,0x0a,0x4e,0x01,0x1b,0x04,0x00] + +v_movreld_b32_dpp v1, v0 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX10: [0xfa,0x84,0x02,0x7e,0x00,0x1b,0x00,0x00] + +v_movrels_b32_dpp v1, v0 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX10: [0xfa,0x86,0x02,0x7e,0x00,0x1b,0x04,0x00] + +v_movrelsd_2_b32_dpp v0, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX10: [0xfa,0x90,0x00,0x7e,0x02,0x1b,0x00,0x00] + +v_movrelsd_b32_dpp v0, v255 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX10: [0xfa,0x88,0x00,0x7e,0xff,0x1b,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s b/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s index b148356..70d779a 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s @@ -577,3 +577,15 @@ v_mac_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_mac_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX10: v_mac_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0a,0x3e,0x01,0x77,0x39,0x05] + +v_movreld_b32 v0, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX10: [0xea,0x84,0x00,0x7e,0x01,0x77,0x39,0x05] + +v_movrels_b32 v0, v2 dpp8:[0,0,0,0,0,0,0,0] +// GFX10: [0xe9,0x86,0x00,0x7e,0x02,0x00,0x00,0x00] + +v_movrelsd_2_b32 v0, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX10: [0xe9,0x90,0x00,0x7e,0xff,0x77,0x39,0x05] + +v_movrelsd_b32 v0, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX10: [0xe9,0x88,0x00,0x7e,0x02,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt index cc342f9..c1ec51e 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt @@ -86292,6 +86292,18 @@ # GFX10: v_movreld_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xc2,0xd5,0xff,0x01,0x00,0x00] 0x05,0x00,0xc2,0xd5,0xff,0x01,0x00,0x00 +# GFX10: v_movreld_b32_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x84,0x00,0x7e,0x02,0x06,0x06,0x00] +0xf9,0x84,0x00,0x7e,0x02,0x06,0x06,0x00 + +# GFX10: v_movreld_b32_sdwa v0, s2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x84,0x00,0x7e,0x02,0x06,0x86,0x00] +0xf9,0x84,0x00,0x7e,0x02,0x06,0x86,0x00 + +# GFX10: v_movreld_b32_sdwa v0, 64 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x84,0x00,0x7e,0xc0,0x06,0x86,0x00] +0xf9,0x84,0x00,0x7e,0xc0,0x06,0x86,0x00 + +# GFX10: v_movreld_b32_sdwa v0, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x84,0x00,0x7e,0x00,0x06,0x0e,0x00] +0xf9,0x84,0x00,0x7e,0x00,0x06,0x0e,0x00 + # GFX10: v_movrels_b32_e32 v255, v1 ; encoding: [0x01,0x87,0xfe,0x7f] 0x01,0x87,0xfe,0x7f @@ -86310,6 +86322,12 @@ # GFX10: v_movrels_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00] 0x05,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00 +# GFX10: v_movrels_b32_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x86,0x00,0x7e,0x02,0x06,0x06,0x00] +0xf9,0x86,0x00,0x7e,0x02,0x06,0x06,0x00 + +# GFX10: v_movrels_b32_sdwa v0, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x86,0x00,0x7e,0x00,0x06,0x0e,0x00] +0xf9,0x86,0x00,0x7e,0x00,0x06,0x0e,0x00 + # GFX10: v_movrelsd_2_b32_e32 v255, v1 ; encoding: [0x01,0x91,0xfe,0x7f] 0x01,0x91,0xfe,0x7f @@ -86328,6 +86346,15 @@ # GFX10: v_movrelsd_2_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00] 0x05,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00 +# GFX10: v_movrelsd_2_b32_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x90,0x00,0x7e,0x02,0x06,0x06,0x00] +0xf9,0x90,0x00,0x7e,0x02,0x06,0x06,0x00 + +# GFX10: v_movrelsd_2_b32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x90,0x00,0x7e,0x00,0x06,0x06,0x00] +0xf9,0x90,0x00,0x7e,0x00,0x06,0x06,0x00 + +# GFX10: v_movrelsd_2_b32_sdwa v0, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x90,0x00,0x7e,0x00,0x06,0x0e,0x00] +0xf9,0x90,0x00,0x7e,0x00,0x06,0x0e,0x00 + # GFX10: v_movrelsd_b32_e32 v255, v1 ; encoding: [0x01,0x89,0xfe,0x7f] 0x01,0x89,0xfe,0x7f @@ -86346,6 +86373,15 @@ # GFX10: v_movrelsd_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00] 0x05,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00 +# GFX10: v_movrelsd_b32_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x88,0x00,0x7e,0x02,0x06,0x06,0x00] +0xf9,0x88,0x00,0x7e,0x02,0x06,0x06,0x00 + +# GFX10: v_movrelsd_b32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x88,0x00,0x7e,0x00,0x06,0x06,0x00] +0xf9,0x88,0x00,0x7e,0x00,0x06,0x06,0x00 + +# GFX10: v_movrelsd_b32_sdwa v0, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x88,0x00,0x7e,0x00,0x06,0x0e,0x00] +0xf9,0x88,0x00,0x7e,0x00,0x06,0x0e,0x00 + # GFX10: v_mqsad_pk_u16_u8 v[254:255], v[1:2], v2, v[3:4] ; encoding: [0xfe,0x00,0x73,0xd5,0x01,0x05,0x0e,0x04] 0xfe,0x00,0x73,0xd5,0x01,0x05,0x0e,0x04 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp16.txt index 2e6df6e..6e2c95f 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp16.txt @@ -656,3 +656,15 @@ # GFX10: v_subrev_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1b,0x04,0x00] 0xfa,0x04,0x0a,0x4e,0x01,0x1b,0x04,0x00 + +# GFX10: v_movreld_b32_dpp v1, v0 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x84,0x02,0x7e,0x00,0x1b,0x00,0x00] +0xfa,0x84,0x02,0x7e,0x00,0x1b,0x00,0x00 + +# GFX10: v_movrels_b32_dpp v1, v0 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x86,0x02,0x7e,0x00,0x1b,0x04,0x00] +0xfa,0x86,0x02,0x7e,0x00,0x1b,0x04,0x00 + +# GFX10: v_movrelsd_2_b32_dpp v0, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x90,0x00,0x7e,0x02,0x1b,0x00,0x00] +0xfa,0x90,0x00,0x7e,0x02,0x1b,0x00,0x00 + +# GFX10: v_movrelsd_b32_dpp v0, v255 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x88,0x00,0x7e,0xff,0x1b,0x00,0x00] +0xfa,0x88,0x00,0x7e,0xff,0x1b,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp8.txt index d21c043..afd0e63 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp8.txt @@ -544,3 +544,15 @@ # GFX10: v_mac_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0a,0x3e,0x01,0x77,0x39,0x05] 0xea,0x04,0x0a,0x3e,0x01,0x77,0x39,0x05 + +# GFX10: v_movreld_b32_dpp v0, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x84,0x00,0x7e,0x01,0x77,0x39,0x05] +0xea,0x84,0x00,0x7e,0x01,0x77,0x39,0x05 + +# GFX10: v_movrels_b32_dpp v0, v2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x86,0x00,0x7e,0x02,0x00,0x00,0x00] +0xe9,0x86,0x00,0x7e,0x02,0x00,0x00,0x00 + +# GFX10: v_movrelsd_2_b32_dpp v0, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x90,0x00,0x7e,0xff,0x77,0x39,0x05] +0xe9,0x90,0x00,0x7e,0xff,0x77,0x39,0x05 + +# GFX10: v_movrelsd_b32_dpp v0, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x88,0x00,0x7e,0x02,0x77,0x39,0x05] +0xe9,0x88,0x00,0x7e,0x02,0x77,0x39,0x05 -- 2.7.4