From 08064a55424c54a7ccf506d46840811f7234e040 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Tue, 6 Jun 2023 10:55:16 +0100 Subject: [PATCH] aco: mask bits source of s_bfe The s_bfe instructions use 7 bits, not 5 like the NIR opcode requires. No fossil-db changes (navi21). Signed-off-by: Rhys Perry Reviewed-by: Georg Lehmann Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9162 Cc: mesa-stable Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 9f7a170..5ff0415 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -3720,15 +3720,19 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) aco_opcode opcode = instr->op == nir_op_ubfe ? aco_opcode::s_bfe_u32 : aco_opcode::s_bfe_i32; if (const_offset && const_bits) { - uint32_t extract = (const_bits->u32 << 16) | (const_offset->u32 & 0x1f); + uint32_t extract = ((const_bits->u32 & 0x1f) << 16) | (const_offset->u32 & 0x1f); bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, Operand::c32(extract)); break; } Temp offset = get_alu_src(ctx, instr->src[1]); Temp bits = get_alu_src(ctx, instr->src[2]); + if (ctx->program->gfx_level >= GFX9) { - Temp extract = bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), offset, bits); + Operand bits_op = const_bits ? Operand::c32(const_bits->u32 & 0x1f) + : bld.sop2(aco_opcode::s_and_b32, bld.def(s1), + bld.def(s1, scc), bits, Operand::c32(0x1fu)); + Temp extract = bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), offset, bits_op); bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, extract); } else if (instr->op == nir_op_ubfe) { Temp mask = bld.sop2(aco_opcode::s_bfm_b32, bld.def(s1), bits, offset); @@ -3736,9 +3740,12 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), base, mask); bld.sop2(aco_opcode::s_lshr_b32, Definition(dst), bld.def(s1, scc), masked, offset); } else { - Operand bits_op = const_bits ? Operand::c32(const_bits->u32 << 16) - : bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), - bld.def(s1, scc), bits, Operand::c32(16u)); + Operand bits_op = const_bits + ? Operand::c32((const_bits->u32 & 0x1f) << 16) + : bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), + bld.sop2(aco_opcode::s_and_b32, bld.def(s1), + bld.def(s1, scc), bits, Operand::c32(0x1fu)), + Operand::c32(16u)); Operand offset_op = const_offset ? Operand::c32(const_offset->u32 & 0x1fu) : bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), -- 2.7.4