From: Rhys Perry Date: Fri, 17 Jun 2022 16:42:35 +0000 (+0100) Subject: aco/tests: add GFX11 assembly tests X-Git-Tag: upstream/22.3.5~2372 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=826ed52174c5816c45841de43a015990064bdc22;p=platform%2Fupstream%2Fmesa.git aco/tests: add GFX11 assembly tests Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py index 86f34b7..db0c4e3 100644 --- a/src/amd/compiler/aco_builder_h.py +++ b/src/amd/compiler/aco_builder_h.py @@ -519,7 +519,7 @@ formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.prod ("mubuf", [Format.MUBUF], 'MUBUF_instruction', [(0, 4), (1, 3)]), ("mtbuf", [Format.MTBUF], 'MTBUF_instruction', [(0, 4), (1, 3)]), ("mimg", [Format.MIMG], 'MIMG_instruction', itertools.product([0, 1], [3, 4, 5, 6, 7])), - ("exp", [Format.EXP], 'Export_instruction', [(0, 4)]), + ("exp", [Format.EXP], 'Export_instruction', [(0, 4), (0, 5)]), ("branch", [Format.PSEUDO_BRANCH], 'Pseudo_branch_instruction', itertools.product([1], [0, 1])), ("barrier", [Format.PSEUDO_BARRIER], 'Pseudo_barrier_instruction', [(0, 0)]), ("reduction", [Format.PSEUDO_REDUCTION], 'Pseudo_reduction_instruction', [(3, 2)]), diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 038d6dc..44d91f8 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -80,6 +80,8 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info, case GFX8: program->family = CHIP_POLARIS10; break; case GFX9: program->family = CHIP_VEGA10; break; case GFX10: program->family = CHIP_NAVI10; break; + case GFX10_3: program->family = CHIP_NAVI21; break; + case GFX11: program->family = CHIP_GFX1100; break; default: program->family = CHIP_UNKNOWN; break; } } else { diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py index 49f2af7..098733c 100644 --- a/src/amd/compiler/aco_opcodes.py +++ b/src/amd/compiler/aco_opcodes.py @@ -126,7 +126,7 @@ class Format(Enum): elif self == Format.MIMG: return [('unsigned', 'dmask', '0xF'), ('bool', 'da', 'false'), - ('bool', 'unrm', 'true'), + ('bool', 'unrm', 'false'), ('bool', 'disable_wqm', 'false'), ('bool', 'glc', 'false'), ('bool', 'dlc', 'false'), diff --git a/src/amd/compiler/tests/test_assembler.cpp b/src/amd/compiler/tests/test_assembler.cpp index 1cca50e..365903c 100644 --- a/src/amd/compiler/tests/test_assembler.cpp +++ b/src/amd/compiler/tests/test_assembler.cpp @@ -22,6 +22,9 @@ * */ #include "helpers.h" +#include "sid.h" + +#include using namespace aco; @@ -373,3 +376,435 @@ BEGIN_TEST(assembler.vopc_sdwa) finish_assembler_test(); } END_TEST + +#if LLVM_VERSION_MAJOR >= 15 +BEGIN_TEST(assembler.gfx11.smem) + if (!setup_cs(NULL, GFX11)) + return; + + Definition dst = bld.def(s1); + dst.setFixed(PhysReg(4)); + + Operand op_s1(bld.tmp(s1)); + op_s1.setFixed(PhysReg(8)); + + Operand op_s2(bld.tmp(s2)); + op_s2.setFixed(PhysReg(16)); + + Operand op_s4(bld.tmp(s4)); + op_s4.setFixed(PhysReg(32)); + + //>> s_dcache_inv ; f4840000 f8000000 + bld.smem(aco_opcode::s_dcache_inv); + + //! s_load_b32 s4, s[16:17], 0x2a ; f4000108 f800002a + bld.smem(aco_opcode::s_load_dword, dst, op_s2, Operand::c32(42)); + + //! s_load_b32 s4, s[16:17], s8 ; f4000108 10000000 + bld.smem(aco_opcode::s_load_dword, dst, op_s2, op_s1); + + //! s_load_b32 s4, s[16:17], s8 offset:0x2a ; f4000108 1000002a + bld.smem(aco_opcode::s_load_dword, dst, op_s2, Operand::c32(42), op_s1); + + //! s_buffer_load_b32 s4, s[32:35], s8 glc ; f4204110 10000000 + bld.smem(aco_opcode::s_buffer_load_dword, dst, op_s4, op_s1).instr->smem().glc = true; + + //! s_buffer_load_b32 s4, s[32:35], s8 dlc ; f4202110 10000000 + bld.smem(aco_opcode::s_buffer_load_dword, dst, op_s4, op_s1).instr->smem().dlc = true; + + finish_assembler_test(); +END_TEST + +BEGIN_TEST(assembler.gfx11.mubuf) + if (!setup_cs(NULL, GFX11)) + return; + + Definition dst = bld.def(v1); + dst.setFixed(PhysReg(256 + 42)); + + Operand op_s4(bld.tmp(s4)); + op_s4.setFixed(PhysReg(32)); + + Operand op_v1(bld.tmp(v1)); + op_v1.setFixed(PhysReg(256 + 10)); + + Operand op_v2(bld.tmp(v2)); + op_v2.setFixed(PhysReg(256 + 20)); + + Operand op_s1(bld.tmp(s1)); + op_s1.setFixed(PhysReg(30)); + + Operand op_m0(bld.tmp(s1)); + op_m0.setFixed(m0); + + /* Addressing */ + //>> buffer_load_b32 v42, off, s[32:35], s30 ; e0500000 1e082a80 + bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), op_s1, 0, false); + + //! buffer_load_b32 v42, off, s[32:35], 42 ; e0500000 aa082a80 + bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::c32(42), 0, false); + + //! buffer_load_b32 v42, v10, s[32:35], s30 offen ; e0500000 1e482a0a + bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v1, op_s1, 0, true); + + //! buffer_load_b32 v42, v10, s[32:35], s30 idxen ; e0500000 1e882a0a + bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v1, op_s1, 0, false).instr->mubuf().idxen = true; + + //! buffer_load_b32 v42, v[20:21], s[32:35], s30 idxen offen ; e0500000 1ec82a14 + bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v2, op_s1, 0, true).instr->mubuf().idxen = true; + + //! buffer_load_b32 v42, off, s[32:35], s30 offset:84 ; e0500054 1e082a80 + bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), op_s1, 84, false); + + /* Various flags */ + //! buffer_load_b32 v42, off, s[32:35], 0 glc ; e0504000 80082a80 + bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false).instr->mubuf().glc = true; + + //! buffer_load_b32 v42, off, s[32:35], 0 dlc ; e0502000 80082a80 + bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false).instr->mubuf().dlc = true; + + //! buffer_load_b32 v42, off, s[32:35], 0 slc ; e0501000 80082a80 + bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false).instr->mubuf().slc = true; + + //! buffer_load_b32 v42, off, s[32:35], 0 tfe ; e0500000 80282a80 + bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false).instr->mubuf().tfe = true; + + /* LDS */ + //! buffer_load_lds_b32 off, s[32:35], 0 ; e0c40000 80080080 + bld.mubuf(aco_opcode::buffer_load_dword, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false).instr->mubuf().lds = true; + + //! buffer_load_lds_i8 off, s[32:35], 0 ; e0b80000 80080080 + bld.mubuf(aco_opcode::buffer_load_sbyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false).instr->mubuf().lds = true; + + //! buffer_load_lds_i16 off, s[32:35], 0 ; e0c00000 80080080 + bld.mubuf(aco_opcode::buffer_load_sshort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false).instr->mubuf().lds = true; + + //! buffer_load_lds_u8 off, s[32:35], 0 ; e0b40000 80080080 + bld.mubuf(aco_opcode::buffer_load_ubyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false).instr->mubuf().lds = true; + + //! buffer_load_lds_u16 off, s[32:35], 0 ; e0bc0000 80080080 + bld.mubuf(aco_opcode::buffer_load_ushort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false).instr->mubuf().lds = true; + + //! buffer_load_lds_format_x off, s[32:35], 0 ; e0c80000 80080080 + bld.mubuf(aco_opcode::buffer_load_format_x, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false).instr->mubuf().lds = true; + + /* Stores */ + //! buffer_store_b32 v10, off, s[32:35], s30 ; e0680000 1e080a80 + bld.mubuf(aco_opcode::buffer_store_dword, op_s4, Operand(v1), op_s1, op_v1, 0, false); + + //! buffer_store_b64 v[20:21], v10, s[32:35], s30 offen ; e06c0000 1e48140a + bld.mubuf(aco_opcode::buffer_store_dwordx2, op_s4, op_v1, op_s1, op_v2, 0, true); + + finish_assembler_test(); +END_TEST + +BEGIN_TEST(assembler.gfx11.mtbuf) + if (!setup_cs(NULL, GFX11)) + return; + + Definition dst = bld.def(v1); + dst.setFixed(PhysReg(256 + 42)); + + Operand op_s4(bld.tmp(s4)); + op_s4.setFixed(PhysReg(32)); + + Operand op_v1(bld.tmp(v1)); + op_v1.setFixed(PhysReg(256 + 10)); + + Operand op_v2(bld.tmp(v2)); + op_v2.setFixed(PhysReg(256 + 20)); + + Operand op_s1(bld.tmp(s1)); + op_s1.setFixed(PhysReg(30)); + + unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_32_32; + unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_FLOAT; + + /* Addressing */ + //>> tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; e9900000 1e082a80 + bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 0, false); + + //! tbuffer_load_format_x v42, off, s[32:35], 42 format:[BUF_FMT_32_32_FLOAT] ; e9900000 aa082a80 + bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::c32(42), dfmt, nfmt, 0, false); + + //! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen ; e9900000 1e482a0a + bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v1, op_s1, dfmt, nfmt, 0, true); + + //! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen ; e9900000 1e882a0a + bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v1, op_s1, dfmt, nfmt, 0, false).instr->mtbuf().idxen = true; + + //! tbuffer_load_format_x v42, v[20:21], s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen offen ; e9900000 1ec82a14 + bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v2, op_s1, dfmt, nfmt, 0, true).instr->mtbuf().idxen = true; + + //! tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offset:84 ; e9900054 1e082a80 + bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 84, false); + + /* Various flags */ + //! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] glc ; e9904000 80082a80 + bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false).instr->mtbuf().glc = true; + + //! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] dlc ; e9902000 80082a80 + bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false).instr->mtbuf().dlc = true; + + //! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] slc ; e9901000 80082a80 + bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false).instr->mtbuf().slc = true; + + //! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] tfe ; e9900000 80282a80 + bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false).instr->mtbuf().tfe = true; + + /* Stores */ + //! tbuffer_store_format_x v10, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; e9920000 1e080a80 + bld.mtbuf(aco_opcode::tbuffer_store_format_x, op_s4, Operand(v1), op_s1, op_v1, dfmt, nfmt, 0, false); + + //! tbuffer_store_format_xy v[20:21], v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen ; e9928000 1e48140a + bld.mtbuf(aco_opcode::tbuffer_store_format_xy, op_s4, op_v1, op_s1, op_v2, dfmt, nfmt, 0, true); + + finish_assembler_test(); +END_TEST + +BEGIN_TEST(assembler.gfx11.mimg) + if (!setup_cs(NULL, GFX11)) + return; + + Definition dst_v1 = bld.def(v1); + dst_v1.setFixed(PhysReg(256 + 42)); + + Definition dst_v4 = bld.def(v4); + dst_v4.setFixed(PhysReg(256 + 84)); + + Operand op_s4(bld.tmp(s4)); + op_s4.setFixed(PhysReg(32)); + + Operand op_s8(bld.tmp(s8)); + op_s8.setFixed(PhysReg(64)); + + Operand op_v1(bld.tmp(v1)); + op_v1.setFixed(PhysReg(256 + 10)); + + Operand op_v2(bld.tmp(v2)); + op_v2.setFixed(PhysReg(256 + 20)); + + Operand op_v4(bld.tmp(v4)); + op_v4.setFixed(PhysReg(256 + 30)); + + //>> image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D ; f06c0f00 2010540a + bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1); + + //! image_sample v[84:87], v[20:21], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; f06c0f04 20105414 + bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v2).instr->mimg().dim = ac_image_2d; + + //! image_sample v42, v10, s[64:71], s[32:35] dmask:0x1 dim:SQ_RSRC_IMG_1D ; f06c0100 20102a0a + bld.mimg(aco_opcode::image_sample, dst_v1, op_s8, op_s4, Operand(v1), op_v1).instr->mimg().dmask = 0x1; + + /* Various flags */ + //! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D dlc ; f06c2f00 2010540a + bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1).instr->mimg().dlc = true; + + //! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D glc ; f06c4f00 2010540a + bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1).instr->mimg().glc = true; + + //! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D slc ; f06c1f00 2010540a + bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1).instr->mimg().slc = true; + + //! image_sample v[84:88], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; f06c0f00 2030540a + bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1).instr->mimg().tfe = true; + + //! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D lwe ; f06c0f00 2050540a + bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1).instr->mimg().lwe = true; + + //! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D r128 ; f06c8f00 2010540a + bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1).instr->mimg().r128 = true; + + //! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; f06d0f00 2010540a + bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1).instr->mimg().a16 = true; + + //! image_sample v[84:85], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D d16 ; f06e0f00 2010540a + bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1).instr->mimg().d16 = true; + + /* NSA */ + //! image_sample v[84:87], [v10, v40], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; f06c0f05 2010540a 00000028 + bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1, Operand(bld.tmp(v1), PhysReg(256 + 40))).instr->mimg().dim = ac_image_2d; + + /* Stores */ + //! image_store v[30:33], v10, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_1D ; f0180f00 00101e0a + bld.mimg(aco_opcode::image_store, op_s8, Operand(s4), op_v4, op_v1); + + //! image_atomic_add v10, v20, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D ; f0300f04 00100a14 + bld.mimg(aco_opcode::image_atomic_add, Definition(op_v1.physReg(), v1), op_s8, Operand(s4), op_v1, op_v2).instr->mimg().dim = ac_image_2d; + + finish_assembler_test(); +END_TEST + +BEGIN_TEST(assembler.gfx11.flat) + if (!setup_cs(NULL, GFX11)) + return; + + Definition dst_v1 = bld.def(v1); + dst_v1.setFixed(PhysReg(256 + 42)); + + Operand op_s1(bld.tmp(s1)); + op_s1.setFixed(PhysReg(32)); + + Operand op_s2(bld.tmp(s2)); + op_s2.setFixed(PhysReg(64)); + + Operand op_v1(bld.tmp(v1)); + op_v1.setFixed(PhysReg(256 + 10)); + + Operand op_v2(bld.tmp(v2)); + op_v2.setFixed(PhysReg(256 + 20)); + + /* Addressing */ + //>> flat_load_b32 v42, v[20:21] ; dc500000 2a7c0014 + bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1)); + + //! global_load_b32 v42, v[20:21], off ; dc520000 2a7c0014 + bld.global(aco_opcode::global_load_dword, dst_v1, op_v2, Operand(s1)); + + //! global_load_b32 v42, v10, s[64:65] ; dc520000 2a40000a + bld.global(aco_opcode::global_load_dword, dst_v1, op_v1, op_s2); + + //! scratch_load_b32 v42, v10, off ; dc510000 2afc000a + bld.scratch(aco_opcode::scratch_load_dword, dst_v1, op_v1, Operand(s1)); + + //! scratch_load_b32 v42, off, s32 ; dc510000 2a200080 + bld.scratch(aco_opcode::scratch_load_dword, dst_v1, Operand(v1), op_s1); + + //! scratch_load_b32 v42, v10, s32 ; dc510000 2aa0000a + bld.scratch(aco_opcode::scratch_load_dword, dst_v1, op_v1, op_s1); + + //! global_load_b32 v42, v[20:21], off offset:-42 ; dc521fd6 2a7c0014 + bld.global(aco_opcode::global_load_dword, dst_v1, op_v2, Operand(s1), -42); + + //! global_load_b32 v42, v[20:21], off offset:84 ; dc520054 2a7c0014 + bld.global(aco_opcode::global_load_dword, dst_v1, op_v2, Operand(s1), 84); + + /* Various flags */ + //! flat_load_b32 v42, v[20:21] slc ; dc508000 2a7c0014 + bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1)).instr->flat().slc = true; + + //! flat_load_b32 v42, v[20:21] glc ; dc504000 2a7c0014 + bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1)).instr->flat().glc = true; + + //! flat_load_b32 v42, v[20:21] dlc ; dc502000 2a7c0014 + bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1)).instr->flat().dlc = true; + + /* Stores */ + //! flat_store_b32 v[20:21], v10 ; dc680000 007c0a14 + bld.flat(aco_opcode::flat_store_dword, op_v2, Operand(s1), op_v1); + + finish_assembler_test(); +END_TEST + +BEGIN_TEST(assembler.gfx11.exp) + if (!setup_cs(NULL, GFX11)) + return; + + Operand op[4]; + for (unsigned i = 0; i < 4; i++) + op[i] = Operand(PhysReg(256 + i), v1); + + Operand op_m0(bld.tmp(s1)); + op_m0.setFixed(m0); + + //>> exp mrt3 v1, v0, v3, v2 ; f800003f 02030001 + bld.exp(aco_opcode::exp, op[1], op[0], op[3], op[2], 0xf, 3); + + //! exp mrt3 v1, off, v0, off ; f8000035 80008001 + bld.exp(aco_opcode::exp, op[1], Operand(v1), op[0], Operand(v1), 0x5, 3); + + //! exp mrt3 v1, v0, v3, v2 done ; f800083f 02030001 + bld.exp(aco_opcode::exp, op[1], op[0], op[3], op[2], 0xf, 3, false, true); + + //>> exp mrt3 v1, v0, v3, v2 row_en ; f800203f 02030001 + bld.exp(aco_opcode::exp, op[1], op[0], op[3], op[2], op_m0, 0xf, 3).instr->exp().row_en = true; + + finish_assembler_test(); +END_TEST + +BEGIN_TEST(assembler.gfx11.vinterp) + if (!setup_cs(NULL, GFX11)) + return; + + Definition dst = bld.def(v1); + dst.setFixed(PhysReg(256 + 42)); + + Operand op0(bld.tmp(v1)); + op0.setFixed(PhysReg(256 + 10)); + + Operand op1(bld.tmp(v1)); + op1.setFixed(PhysReg(256 + 20)); + + Operand op2(bld.tmp(s1)); + op2.setFixed(PhysReg(30)); + + //>> v_interp_p10_f32 v42, v10, v20, s30 wait_exp:7 ; cd00072a 007a290a + bld.vinterp(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2); + + //! v_interp_p10_f32 v42, v10, v20, s30 wait_exp:6 ; cd00062a 007a290a + bld.vinterp(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 6); + + //! v_interp_p2_f32 v42, v10, v20, s30 ; cd01002a 007a290a + bld.vinterp(aco_opcode::v_interp_p2_f32_inreg, dst, op0, op1, op2, 0); + + //! v_interp_p10_f32 v42, -v10, v20, s30 ; cd00002a 207a290a + bld.vinterp(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0).instr->vinterp().neg[0] = true; + + //! v_interp_p10_f32 v42, v10, -v20, s30 ; cd00002a 407a290a + bld.vinterp(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0).instr->vinterp().neg[1] = true; + + //! v_interp_p10_f32 v42, v10, v20, -s30 ; cd00002a 807a290a + bld.vinterp(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0).instr->vinterp().neg[2] = true; + + //! v_interp_p10_f16_f32 v42, v10, v20, s30 op_sel:[1,0,0,0] ; cd02082a 007a290a + bld.vinterp(aco_opcode::v_interp_p10_f16_f32_inreg, dst, op0, op1, op2, 0, 0x1); + + //! v_interp_p2_f16_f32 v42, v10, v20, s30 op_sel:[0,1,0,0] ; cd03102a 007a290a + bld.vinterp(aco_opcode::v_interp_p2_f16_f32_inreg, dst, op0, op1, op2, 0, 0x2); + + //! v_interp_p10_rtz_f16_f32 v42, v10, v20, s30 op_sel:[0,0,1,0] ; cd04202a 007a290a + bld.vinterp(aco_opcode::v_interp_p10_rtz_f16_f32_inreg, dst, op0, op1, op2, 0, 0x4); + + //! v_interp_p2_rtz_f16_f32 v42, v10, v20, s30 op_sel:[0,0,0,1] ; cd05402a 007a290a + bld.vinterp(aco_opcode::v_interp_p2_rtz_f16_f32_inreg, dst, op0, op1, op2, 0, 0x8); + + //! v_interp_p10_f32 v42, v10, v20, s30 clamp ; cd00802a 007a290a + bld.vinterp(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0).instr->vinterp().clamp = true; + + finish_assembler_test(); +END_TEST + +BEGIN_TEST(assembler.gfx11.ldsdir) + if (!setup_cs(NULL, GFX11)) + return; + + Definition dst = bld.def(v1); + dst.setFixed(PhysReg(256 + 42)); + + Operand op(bld.tmp(s1)); + op.setFixed(m0); + + //>> lds_direct_load v42 wait_vdst:15 ; ce1f002a + bld.ldsdir(aco_opcode::lds_direct_load, dst, op).instr->ldsdir().wait_vdst = 15; + + //! lds_direct_load v42 wait_vdst:6 ; ce16002a + bld.ldsdir(aco_opcode::lds_direct_load, dst, op).instr->ldsdir().wait_vdst = 6; + + //! lds_direct_load v42 ; ce10002a + bld.ldsdir(aco_opcode::lds_direct_load, dst, op); + + //! lds_param_load v42, attr56.x wait_vdst:8 ; ce08e02a + bld.ldsdir(aco_opcode::lds_param_load, dst, op, 56, 0).instr->ldsdir().wait_vdst = 8; + + //! lds_param_load v42, attr56.x ; ce00e02a + bld.ldsdir(aco_opcode::lds_param_load, dst, op, 56, 0); + + //! lds_param_load v42, attr34.y ; ce00892a + bld.ldsdir(aco_opcode::lds_param_load, dst, op, 34, 1); + + //! lds_param_load v42, attr12.z ; ce00322a + bld.ldsdir(aco_opcode::lds_param_load, dst, op, 12, 2); + + finish_assembler_test(); +END_TEST +#endif