From 0f60c18f298bc809c37c9fb917c42c37b5061c13 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 30 Mar 2023 15:33:57 +0100 Subject: [PATCH] aco: don't optimize s_or_b64(v_cmp_u_f32(a, b), cmp(a, a)) Signed-off-by: Rhys Perry Reviewed-by: Georg Lehmann Part-of: --- src/amd/compiler/aco_optimizer.cpp | 2 ++ src/amd/compiler/tests/test_optimizer.cpp | 42 +++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index bd4cf9a..272ef84 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -2320,6 +2320,8 @@ combine_comparison_ordering(opt_ctx& ctx, aco_ptr& instr) if ((prop_cmp1 != prop_nan0 || cmp_valu.opsel[1] != nan_valu.opsel[0]) && (prop_cmp1 != prop_nan1 || cmp_valu.opsel[1] != nan_valu.opsel[1])) return false; + if (prop_cmp0 == prop_cmp1 && cmp_valu.opsel[0] == cmp_valu.opsel[1]) + return false; aco_opcode new_op = is_or ? get_unordered(cmp->opcode) : get_ordered(cmp->opcode); VALU_instruction* new_instr = create_instruction( diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp index 82c2efc..9692ff2 100644 --- a/src/amd/compiler/tests/test_optimizer.cpp +++ b/src/amd/compiler/tests/test_optimizer.cpp @@ -2037,6 +2037,48 @@ BEGIN_TEST(optimize.apply_sgpr_swap_opsel) finish_opt_test(); END_TEST +BEGIN_TEST(optimize.combine_comparison_ordering) + //>> v1: %a, v1: %b, v1: %c = p_startpgm + if (!setup_cs("v1 v1 v1", GFX11)) + return; + + Temp a = inputs[0]; + Temp b = inputs[1]; + Temp c = inputs[2]; + + Temp a_unordered = bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), a, a); + Temp b_unordered = bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), b, b); + Temp unordered = + bld.sop2(Builder::s_or, bld.def(bld.lm), bld.def(bld.lm, scc), a_unordered, b_unordered); + + Temp a_lt_a = bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), a, a); + Temp unordered_cmp = + bld.sop2(Builder::s_or, bld.def(bld.lm), bld.def(bld.lm, scc), unordered, a_lt_a); + + //! s2: %res0_unordered = v_cmp_u_f32 %a, %b + //! s2: %res0_cmp = v_cmp_lt_f32 %a, %a + //! s2: %res0, s2: %_:scc = s_or_b64 %res0_unordered, %res0_cmp + //! p_unit_test 0, %res0 + writeout(0, unordered_cmp); + + Temp c_hi = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), c, Operand::c32(1)); + + Temp c_unordered = bld.vopc(aco_opcode::v_cmp_neq_f16, bld.def(bld.lm), c, c); + Temp c_hi_unordered = bld.vopc(aco_opcode::v_cmp_neq_f16, bld.def(bld.lm), c_hi, c_hi); + unordered = + bld.sop2(Builder::s_or, bld.def(bld.lm), bld.def(bld.lm, scc), c_unordered, c_hi_unordered); + + Temp c_lt_c_hi = bld.vopc(aco_opcode::v_cmp_lt_f16, bld.def(bld.lm), c, c_hi); + unordered_cmp = + bld.sop2(Builder::s_or, bld.def(bld.lm), bld.def(bld.lm, scc), unordered, c_lt_c_hi); + + //! s2: %res1 = v_cmp_nge_f16 %c, hi(%c) + //! p_unit_test 1, %res1 + writeout(1, unordered_cmp); + + finish_opt_test(); +END_TEST + BEGIN_TEST(optimize.combine_comparison_ordering_opsel) //>> v1: %a, v2b: %b = p_startpgm if (!setup_cs("v1 v2b", GFX11)) -- 2.7.4