From 6e279f5bb663b8edca53c1195edd11e3502677e1 Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Mon, 7 Nov 2022 15:45:55 +0300 Subject: [PATCH] [AMDGPU][MC][GFX10+] Enable literal operands with permlane16/permlanex16 Differential Revision: https://reviews.llvm.org/D137332 --- llvm/lib/Target/AMDGPU/VOP3Instructions.td | 7 ++----- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll | 6 ++---- llvm/test/MC/AMDGPU/gfx10_asm_vop3.s | 18 ++++++++++++++++++ llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 18 ++++++++++++++++++ llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt | 12 ++++++++++++ 5 files changed, 52 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index bb2b918..fdbdfe5 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -660,12 +660,9 @@ let SubtargetPredicate = isGFX11Only in defm : IMAD32_Pats; def VOP3_PERMLANE_Profile : VOP3_Profile, VOP3_OPSEL> { - let Src0RC64 = VRegSrc_32; - let Src1RC64 = SCSrc_b32; - let Src2RC64 = SCSrc_b32; let InsVOP3OpSel = (ins IntOpSelMods:$src0_modifiers, VRegSrc_32:$src0, - IntOpSelMods:$src1_modifiers, SCSrc_b32:$src1, - IntOpSelMods:$src2_modifiers, SCSrc_b32:$src2, + IntOpSelMods:$src1_modifiers, SSrc_b32:$src1, + IntOpSelMods:$src2_modifiers, SSrc_b32:$src2, VGPR_32:$vdst_in, op_sel0:$op_sel); let HasClamp = 0; let HasExtVOP3DPP = 0; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll index 862dfe7..6b233f9 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll @@ -27,9 +27,8 @@ define amdgpu_kernel void @v_permlane16_b32_vii(i32 addrspace(1)* %out, i32 %src ; GCN-LABEL: {{^}}v_permlane16_b32_vll: ; FIXME-GFX10PLUS: It is allowed to have both immediates as literals ; GFX10PLUS-DAG: s_movk_i32 [[SRC1:s[0-9]+]], 0x1234 -; GFX10PLUS-DAG: s_mov_b32 [[SRC2:s[0-9]+]], 0xc1d1 ; GFX10PLUS-NOT: v_readfirstlane_b32 -; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}} +; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], 0xc1d1{{$}} define amdgpu_kernel void @v_permlane16_b32_vll(i32 addrspace(1)* %out, i32 %src0) #1 { %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 0, i1 0) store i32 %v, i32 addrspace(1)* %out @@ -124,9 +123,8 @@ define amdgpu_kernel void @v_permlanex16_b32_vii(i32 addrspace(1)* %out, i32 %sr ; GCN-LABEL: {{^}}v_permlanex16_b32_vll: ; FIXME-GFX10PLUS: It is allowed to have both immediates as literals ; GFX10PLUS-DAG: s_movk_i32 [[SRC1:s[0-9]+]], 0x1234 -; GFX10PLUS-DAG: s_mov_b32 [[SRC2:s[0-9]+]], 0xc1d1 ; GFX10PLUS-NOT: v_readfirstlane_b32 -; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}} +; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], 0xc1d1{{$}} define amdgpu_kernel void @v_permlanex16_b32_vll(i32 addrspace(1)* %out, i32 %src0) #1 { %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 0, i1 0) store i32 %v, i32 addrspace(1)* %out diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s index d369973..b05bab1 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s @@ -12797,6 +12797,9 @@ v_permlane16_b32 v5, v1, 0.5, s3 v_permlane16_b32 v5, v1, -4.0, s3 // GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0xef,0x0d,0x00] +v_permlane16_b32 v5, v1, 0xaf123456, s3 +// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] + v_permlane16_b32 v5, v1, s2, s103 // GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0x9c,0x01] @@ -12830,6 +12833,12 @@ v_permlane16_b32 v5, v1, s2, 0.5 v_permlane16_b32 v5, v1, s2, -4.0 // GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0xdc,0x03] +v_permlane16_b32 v5, v1, s2, 0xaf123456 +// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_permlane16_b32 v5, v1, 0x12345678, 0x12345678 +// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12] + v_permlane16_b32 v5, v1, s2, s3 op_sel:[1,0] // GFX10: encoding: [0x05,0x08,0x77,0xd7,0x01,0x05,0x0c,0x00] @@ -12923,6 +12932,9 @@ v_permlanex16_b32 v5, v1, 0.5, s3 v_permlanex16_b32 v5, v1, -4.0, s3 // GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0xef,0x0d,0x00] +v_permlanex16_b32 v5, v1, 0xaf123456, s3 +// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] + v_permlanex16_b32 v5, v1, s2, s103 // GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0x9c,0x01] @@ -12956,6 +12968,12 @@ v_permlanex16_b32 v5, v1, s2, 0.5 v_permlanex16_b32 v5, v1, s2, -4.0 // GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0xdc,0x03] +v_permlanex16_b32 v5, v1, s2, 0xaf123456 +// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_permlanex16_b32 v5, v1, 0x12345678, 0x12345678 +// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12] + v_permlanex16_b32 v5, v1, s2, s3 op_sel:[1,0] // GFX10: encoding: [0x05,0x08,0x78,0xd7,0x01,0x05,0x0c,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index 693e12f..991ef34 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -5287,6 +5287,15 @@ v_permlane16_b32 v5, v1, 0.5, null op_sel:[1,0] v_permlane16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1] // GFX11: encoding: [0xff,0x10,0x5b,0xd6,0xff,0xfb,0xfd,0x01] +v_permlane16_b32 v5, v1, 0xaf123456, s3 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] + +v_permlane16_b32 v5, v1, s2, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_permlane16_b32 v5, v1, 0x12345678, 0x12345678 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12] + v_permlanex16_b32 v5, v1, s2, s3 // GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00] @@ -5323,6 +5332,15 @@ v_permlanex16_b32 v5, v1, 0.5, null op_sel:[1,0] v_permlanex16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1] // GFX11: encoding: [0xff,0x10,0x5c,0xd6,0xff,0xfb,0xfd,0x01] +v_permlanex16_b32 v5, v1, 0xaf123456, s3 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] + +v_permlanex16_b32 v5, v1, s2, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_permlanex16_b32 v5, v1, 0x12345678, 0x12345678 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12] + v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] // GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xea,0x01] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt index 66ce6b8..0785ba2 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt @@ -16044,6 +16044,9 @@ # GFX10: v_permlane16_b32 v5, v1, s103, s3 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0xcf,0x0c,0x00] 0x05,0x00,0x77,0xd7,0x01,0xcf,0x0c,0x00 +# GFX10: v_permlane16_b32 v5, v1, 0xaf123456, s3 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x77,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf + # GFX10: v_permlane16_b32 v5, v1, s2, -1 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0x04,0x03] 0x05,0x00,0x77,0xd7,0x01,0x05,0x04,0x03 @@ -16071,6 +16074,9 @@ # GFX10: v_permlane16_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0x0c,0x00] 0x05,0x00,0x77,0xd7,0x01,0x05,0x0c,0x00 +# GFX10: v_permlane16_b32 v5, v1, s2, 0xaf123456 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x77,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf + # GFX10: v_permlane16_b32 v5, v1, s2, s3 op_sel:[0,1] ; encoding: [0x05,0x10,0x77,0xd7,0x01,0x05,0x0c,0x00] 0x05,0x10,0x77,0xd7,0x01,0x05,0x0c,0x00 @@ -16149,6 +16155,9 @@ # GFX10: v_permlanex16_b32 v5, v1, s103, s3 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0xcf,0x0c,0x00] 0x05,0x00,0x78,0xd7,0x01,0xcf,0x0c,0x00 +# GFX10: v_permlanex16_b32 v5, v1, 0xaf123456, s3 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x78,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf + # GFX10: v_permlanex16_b32 v5, v1, s2, -1 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0x04,0x03] 0x05,0x00,0x78,0xd7,0x01,0x05,0x04,0x03 @@ -16176,6 +16185,9 @@ # GFX10: v_permlanex16_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0x0c,0x00] 0x05,0x00,0x78,0xd7,0x01,0x05,0x0c,0x00 +# GFX10: v_permlanex16_b32 v5, v1, s2, 0xaf123456 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x78,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf + # GFX10: v_permlanex16_b32 v5, v1, s2, s3 op_sel:[0,1] ; encoding: [0x05,0x10,0x78,0xd7,0x01,0x05,0x0c,0x00] 0x05,0x10,0x78,0xd7,0x01,0x05,0x0c,0x00 -- 2.7.4