aco: select v_mad_u32_u16 for 16-bit multiplications on GFX9+
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 2 Nov 2020 13:46:03 +0000 (14:46 +0100)
committerMarge Bot <eric+marge@anholt.net>
Thu, 12 Nov 2020 12:32:26 +0000 (12:32 +0000)
No fossils-db changes.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7425>

src/amd/compiler/aco_instruction_selection.cpp

index ab4aba0..3bae0a8 100644 (file)
@@ -1724,7 +1724,15 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
          uint32_t src0_ub = get_alu_src_ub(ctx, instr, 0);
          uint32_t src1_ub = get_alu_src_ub(ctx, instr, 1);
 
-         if (src0_ub <= 0xffffff && src1_ub <= 0xffffff) {
+         if (src0_ub <= 0xffff && src1_ub <= 0xffff &&
+             ctx->options->chip_class >= GFX9) {
+            /* Initialize the accumulator to 0 to allow further combinations
+             * in the optimizer.
+             */
+            Operand op0(get_alu_src(ctx, instr->src[0]));
+            Operand op1(get_alu_src(ctx, instr->src[1]));
+            bld.vop3(aco_opcode::v_mad_u32_u16, Definition(dst), bld.set16bit(op0), bld.set16bit(op1), Operand(0u));
+         } else if (src0_ub <= 0xffffff && src1_ub <= 0xffffff) {
             emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_u32_u24, dst, true);
          } else {
             emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_lo_u32, dst);