From: Georg Lehmann Date: Fri, 22 Sep 2023 17:08:18 +0000 (+0200) Subject: aco/optimizer: copy propagate to output modifier instructions X-Git-Tag: upstream/23.3.3~1373 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9508cadadb7d3b393f1cd86f2be516371e5ada85;p=platform%2Fupstream%2Fmesa.git aco/optimizer: copy propagate to output modifier instructions Foz-DB Navi21: Totals from 847 (1.11% of 76572) affected shaders: Instrs: 2331245 -> 2330335 (-0.04%); split: -0.04%, +0.00% CodeSize: 12451040 -> 12451736 (+0.01%); split: -0.00%, +0.01% Latency: 26230953 -> 26229153 (-0.01%); split: -0.01%, +0.00% InvThroughput: 6297802 -> 6296788 (-0.02%); split: -0.02%, +0.00% VClause: 64527 -> 64528 (+0.00%); split: -0.00%, +0.01% SClause: 73150 -> 73121 (-0.04%); split: -0.06%, +0.02% Copies: 180083 -> 179172 (-0.51%); split: -0.53%, +0.02% PreSGPRs: 62311 -> 62316 (+0.01%) PreVGPRs: 51720 -> 51710 (-0.02%) Reviewed-by: Rhys Perry Part-of: --- diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 3a9ed09..b58fc36 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -298,6 +298,8 @@ struct ssa_info { void set_omod2(Instruction* mul) { + if (label & temp_labels) + return; add_label(label_omod2); instr = mul; } @@ -306,6 +308,8 @@ struct ssa_info { void set_omod4(Instruction* mul) { + if (label & temp_labels) + return; add_label(label_omod4); instr = mul; } @@ -314,6 +318,8 @@ struct ssa_info { void set_omod5(Instruction* mul) { + if (label & temp_labels) + return; add_label(label_omod5); instr = mul; } @@ -322,6 +328,8 @@ struct ssa_info { void set_clamp(Instruction* med3) { + if (label & temp_labels) + return; add_label(label_clamp); instr = med3; } @@ -330,6 +338,8 @@ struct ssa_info { void set_f2f16(Instruction* conv) { + if (label & temp_labels) + return; add_label(label_f2f16); instr = conv; } @@ -466,6 +476,8 @@ struct ssa_info { void set_insert(Instruction* insert) { + if (label & temp_labels) + return; add_label(label_insert); instr = insert; } diff --git a/src/amd/compiler/tests/test_sdwa.cpp b/src/amd/compiler/tests/test_sdwa.cpp index f544cc7..791b9d7 100644 --- a/src/amd/compiler/tests/test_sdwa.cpp +++ b/src/amd/compiler/tests/test_sdwa.cpp @@ -296,10 +296,8 @@ BEGIN_TEST(optimize.sdwa.extract_modifiers) Temp neg_byte0 = fneg(byte0); writeout(0, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], neg_byte0)); - //~gfx8! v1: %neg = v_mul_f32 -1.0, %b - //~gfx8! v1: %res1 = v_mul_f32 %a, %neg dst_sel:dword src0_sel:dword src1_sel:ubyte0 - //~gfx(9|10)! v1: %neg_byte0 = v_mul_f32 -1.0, %b dst_sel:ubyte0 src0_sel:dword src1_sel:dword - //~gfx(9|10)! v1: %res1 = v_mul_f32 %a, %neg_byte0 + //! v1: %neg = v_mul_f32 -1.0, %b + //! v1: %res1 = v_mul_f32 %a, %neg dst_sel:dword src0_sel:dword src1_sel:ubyte0 //! p_unit_test 1, %res1 Temp neg = fneg(inputs[1]); Temp byte0_neg = @@ -324,10 +322,8 @@ BEGIN_TEST(optimize.sdwa.extract_modifiers) Temp neg_abs_byte0 = fneg(abs_byte0); writeout(4, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], neg_abs_byte0)); - //~gfx8! v1: %neg_abs = v_mul_f32 -1.0, %abs - //~gfx8! v1: %res5 = v_mul_f32 %a, %neg_abs dst_sel:dword src0_sel:dword src1_sel:ubyte0 - //~gfx(9|10)! v1: %neg_abs_byte0 = v_mul_f32 -1.0, %abs dst_sel:ubyte0 src0_sel:dword src1_sel:dword - //~gfx(9|10)! v1: %res5 = v_mul_f32 %a, %neg_abs_byte0 + //! v1: %neg_abs = v_mul_f32 -1.0, |%b| + //! v1: %res5 = v_mul_f32 %a, %neg_abs dst_sel:dword src0_sel:dword src1_sel:ubyte0 //! p_unit_test 5, %res5 Temp neg_abs = fneg(abs); Temp byte0_neg_abs =