aco: don't optimize s_or_b64(v_cmp_u_f32(a, b), cmp(a, a))
authorRhys Perry <pendingchaos02@gmail.com>
Thu, 30 Mar 2023 14:33:57 +0000 (15:33 +0100)
committerMarge Bot <emma+marge@anholt.net>
Fri, 31 Mar 2023 19:41:54 +0000 (19:41 +0000)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22214>

src/amd/compiler/aco_optimizer.cpp
src/amd/compiler/tests/test_optimizer.cpp

index bd4cf9a..272ef84 100644 (file)
@@ -2320,6 +2320,8 @@ combine_comparison_ordering(opt_ctx& ctx, aco_ptr<Instruction>& instr)
    if ((prop_cmp1 != prop_nan0 || cmp_valu.opsel[1] != nan_valu.opsel[0]) &&
        (prop_cmp1 != prop_nan1 || cmp_valu.opsel[1] != nan_valu.opsel[1]))
       return false;
+   if (prop_cmp0 == prop_cmp1 && cmp_valu.opsel[0] == cmp_valu.opsel[1])
+      return false;
 
    aco_opcode new_op = is_or ? get_unordered(cmp->opcode) : get_ordered(cmp->opcode);
    VALU_instruction* new_instr = create_instruction<VALU_instruction>(
index 82c2efc..9692ff2 100644 (file)
@@ -2037,6 +2037,48 @@ BEGIN_TEST(optimize.apply_sgpr_swap_opsel)
    finish_opt_test();
 END_TEST
 
+BEGIN_TEST(optimize.combine_comparison_ordering)
+   //>> v1: %a, v1: %b, v1: %c = p_startpgm
+   if (!setup_cs("v1 v1 v1", GFX11))
+      return;
+
+   Temp a = inputs[0];
+   Temp b = inputs[1];
+   Temp c = inputs[2];
+
+   Temp a_unordered = bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), a, a);
+   Temp b_unordered = bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), b, b);
+   Temp unordered =
+      bld.sop2(Builder::s_or, bld.def(bld.lm), bld.def(bld.lm, scc), a_unordered, b_unordered);
+
+   Temp a_lt_a = bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), a, a);
+   Temp unordered_cmp =
+      bld.sop2(Builder::s_or, bld.def(bld.lm), bld.def(bld.lm, scc), unordered, a_lt_a);
+
+   //! s2: %res0_unordered = v_cmp_u_f32 %a, %b
+   //! s2: %res0_cmp = v_cmp_lt_f32 %a, %a
+   //! s2: %res0,  s2: %_:scc = s_or_b64 %res0_unordered, %res0_cmp
+   //! p_unit_test 0, %res0
+   writeout(0, unordered_cmp);
+
+   Temp c_hi = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), c, Operand::c32(1));
+
+   Temp c_unordered = bld.vopc(aco_opcode::v_cmp_neq_f16, bld.def(bld.lm), c, c);
+   Temp c_hi_unordered = bld.vopc(aco_opcode::v_cmp_neq_f16, bld.def(bld.lm), c_hi, c_hi);
+   unordered =
+      bld.sop2(Builder::s_or, bld.def(bld.lm), bld.def(bld.lm, scc), c_unordered, c_hi_unordered);
+
+   Temp c_lt_c_hi = bld.vopc(aco_opcode::v_cmp_lt_f16, bld.def(bld.lm), c, c_hi);
+   unordered_cmp =
+      bld.sop2(Builder::s_or, bld.def(bld.lm), bld.def(bld.lm, scc), unordered, c_lt_c_hi);
+
+   //! s2: %res1 = v_cmp_nge_f16 %c, hi(%c)
+   //! p_unit_test 1, %res1
+   writeout(1, unordered_cmp);
+
+   finish_opt_test();
+END_TEST
+
 BEGIN_TEST(optimize.combine_comparison_ordering_opsel)
    //>> v1: %a, v2b: %b = p_startpgm
    if (!setup_cs("v1  v2b", GFX11))