aco/gfx11: support vinterp as fma_mix
authorGeorg Lehmann <dadschoorse@gmail.com>
Thu, 14 Sep 2023 11:25:07 +0000 (13:25 +0200)
committerMarge Bot <emma+marge@anholt.net>
Thu, 5 Oct 2023 20:02:53 +0000 (20:02 +0000)
Totals from 718 (0.94% of 76572) affected shaders:
Instrs: 657897 -> 654219 (-0.56%)
CodeSize: 3471668 -> 3457352 (-0.41%); split: -0.41%, +0.00%
VGPRs: 34200 -> 34164 (-0.11%)
Latency: 11687698 -> 11677030 (-0.09%); split: -0.10%, +0.00%
InvThroughput: 1455371 -> 1451537 (-0.26%); split: -0.26%, +0.00%
VClause: 7598 -> 7600 (+0.03%)
SClause: 18293 -> 18241 (-0.28%); split: -0.44%, +0.15%
Copies: 34641 -> 34644 (+0.01%); split: -0.05%, +0.06%

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25220>

src/amd/compiler/aco_optimizer.cpp
src/amd/compiler/tests/test_optimizer.cpp

index 5bb52baf1770e9a382d9e353f3730ae041926277..311b8ad4d0483b0f926ac31ce78c7b4d392ba03c 100644 (file)
@@ -3993,13 +3993,16 @@ combine_output_conversion(opt_ctx& ctx, aco_ptr<Instruction>& instr)
       return false;
    Instruction* conv = def_info.instr;
 
-   if (!can_use_mad_mix(ctx, instr) || ctx.uses[instr->definitions[0].tempId()] != 1)
+   if (!ctx.uses[conv->definitions[0].tempId()] || ctx.uses[instr->definitions[0].tempId()] != 1)
       return false;
 
-   if (!ctx.uses[conv->definitions[0].tempId()])
+   if (conv->usesModifiers())
       return false;
 
-   if (conv->usesModifiers())
+   if (instr->opcode == aco_opcode::v_interp_p2_f32_inreg)
+      interp_p2_f32_inreg_to_fma_dpp(instr);
+
+   if (!can_use_mad_mix(ctx, instr))
       return false;
 
    if (!instr->isVOP3P())
index 04a710d6bfa55b280bc906f285910157de9b90d2..f09b94cc43b70a77f0d7fadfaeb9e85a752fdf3f 100644 (file)
@@ -2225,5 +2225,11 @@ BEGIN_TEST(optimize.vinterp_inreg_output_modifiers)
    tmp = bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand::c16(0x4000u), tmp);
    writeout(3, tmp);
 
+   //! v2b: %res4 = v_fma_mixlo_f16 %c, %b, %a quad_perm:[2,2,2,2] fi
+   //! p_unit_test 4, %res4
+   tmp = bld.vinterp_inreg(aco_opcode::v_interp_p2_f32_inreg, bld.def(v1), inputs[2], inputs[1],
+                           inputs[0]);
+   writeout(4, f2f16(tmp));
+
    finish_opt_test();
 END_TEST