From 7028e9875f88d4d60aeb0e3bdfe7873fef5678fc Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 2 Nov 2020 14:46:03 +0100 Subject: [PATCH] aco: select v_mad_u32_u16 for 16-bit multiplications on GFX9+ MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit No fossils-db changes. Signed-off-by: Samuel Pitoiset Reviewed-by: Timur Kristóf Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index ab4aba0..3bae0a8 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -1724,7 +1724,15 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) uint32_t src0_ub = get_alu_src_ub(ctx, instr, 0); uint32_t src1_ub = get_alu_src_ub(ctx, instr, 1); - if (src0_ub <= 0xffffff && src1_ub <= 0xffffff) { + if (src0_ub <= 0xffff && src1_ub <= 0xffff && + ctx->options->chip_class >= GFX9) { + /* Initialize the accumulator to 0 to allow further combinations + * in the optimizer. + */ + Operand op0(get_alu_src(ctx, instr->src[0])); + Operand op1(get_alu_src(ctx, instr->src[1])); + bld.vop3(aco_opcode::v_mad_u32_u16, Definition(dst), bld.set16bit(op0), bld.set16bit(op1), Operand(0u)); + } else if (src0_ub <= 0xffffff && src1_ub <= 0xffffff) { emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_u32_u24, dst, true); } else { emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_lo_u32, dst); -- 2.7.4