From 7b4f0e714c2d4f3b8f227e29ef1bb38a9bf404c8 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Thu, 14 Sep 2023 13:25:07 +0200 Subject: [PATCH] aco/gfx11: support vinterp as fma_mix Totals from 718 (0.94% of 76572) affected shaders: Instrs: 657897 -> 654219 (-0.56%) CodeSize: 3471668 -> 3457352 (-0.41%); split: -0.41%, +0.00% VGPRs: 34200 -> 34164 (-0.11%) Latency: 11687698 -> 11677030 (-0.09%); split: -0.10%, +0.00% InvThroughput: 1455371 -> 1451537 (-0.26%); split: -0.26%, +0.00% VClause: 7598 -> 7600 (+0.03%) SClause: 18293 -> 18241 (-0.28%); split: -0.44%, +0.15% Copies: 34641 -> 34644 (+0.01%); split: -0.05%, +0.06% Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_optimizer.cpp | 9 ++++++--- src/amd/compiler/tests/test_optimizer.cpp | 6 ++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 5bb52ba..311b8ad 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -3993,13 +3993,16 @@ combine_output_conversion(opt_ctx& ctx, aco_ptr& instr) return false; Instruction* conv = def_info.instr; - if (!can_use_mad_mix(ctx, instr) || ctx.uses[instr->definitions[0].tempId()] != 1) + if (!ctx.uses[conv->definitions[0].tempId()] || ctx.uses[instr->definitions[0].tempId()] != 1) return false; - if (!ctx.uses[conv->definitions[0].tempId()]) + if (conv->usesModifiers()) return false; - if (conv->usesModifiers()) + if (instr->opcode == aco_opcode::v_interp_p2_f32_inreg) + interp_p2_f32_inreg_to_fma_dpp(instr); + + if (!can_use_mad_mix(ctx, instr)) return false; if (!instr->isVOP3P()) diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp index 04a710d..f09b94c 100644 --- a/src/amd/compiler/tests/test_optimizer.cpp +++ b/src/amd/compiler/tests/test_optimizer.cpp @@ -2225,5 +2225,11 @@ BEGIN_TEST(optimize.vinterp_inreg_output_modifiers) tmp = bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand::c16(0x4000u), tmp); writeout(3, tmp); + //! v2b: %res4 = v_fma_mixlo_f16 %c, %b, %a quad_perm:[2,2,2,2] fi + //! p_unit_test 4, %res4 + tmp = bld.vinterp_inreg(aco_opcode::v_interp_p2_f32_inreg, bld.def(v1), inputs[2], inputs[1], + inputs[0]); + writeout(4, f2f16(tmp)); + finish_opt_test(); END_TEST -- 2.7.4