aco: don't combine precise max(min()) to med3

author Rhys Perry <pendingchaos02@gmail.com>

Wed, 7 Oct 2020 10:09:16 +0000 (11:09 +0100)

committer Marge Bot <eric+marge@anholt.net>

Fri, 13 Nov 2020 12:34:27 +0000 (12:34 +0000)
author Rhys Perry <pendingchaos02@gmail.com>
Wed, 7 Oct 2020 10:09:16 +0000 (11:09 +0100)
committer Marge Bot <eric+marge@anholt.net>
Fri, 13 Nov 2020 12:34:27 +0000 (12:34 +0000)
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp

index 506653c..4add86b 100644 (file)
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -1976,7 +1976,8 @@ bool match_op3_for_vop3(opt_ctx &ctx, aco_opcode op1, aco_opcode op2,
                          Instruction* op1_instr, bool swap, const char *shuffle_str,
                          Operand operands[3], bool neg[3], bool abs[3], uint8_t *opsel,
                          bool *op1_clamp, uint8_t *op1_omod,
-                        bool *inbetween_neg, bool *inbetween_abs, bool *inbetween_opsel)
+                        bool *inbetween_neg, bool *inbetween_abs, bool *inbetween_opsel,
+                        bool *precise)
  {
     /* checks */
     if (op1_instr->opcode != op1)
@@ -2017,6 +2018,9 @@ bool match_op3_for_vop3(opt_ctx &ctx, aco_opcode op1, aco_opcode op2,
     else if (op1_vop3 && op1_vop3->opsel & (1 << swap))
        return false;
  
+   *precise = op1_instr->definitions[0].isPrecise() ||
+              op2_instr->definitions[0].isPrecise();
+
     int shuffle[3];
     shuffle[shuffle_str[0] - '0'] = 0;
     shuffle[shuffle_str[1] - '0'] = 1;
@@ -2069,12 +2073,12 @@ bool combine_three_valu_op(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode
           continue;
  
        Operand operands[3];
-      bool neg[3], abs[3], clamp;
+      bool neg[3], abs[3], clamp, precise;
        uint8_t opsel = 0, omod = 0;
        if (match_op3_for_vop3(ctx, instr->opcode, op2,
                               instr.get(), swap, shuffle,
                               operands, neg, abs, &opsel,
-                             &clamp, &omod, NULL, NULL, NULL)) {
+                             &clamp, &omod, NULL, NULL, NULL, &precise)) {
           ctx.uses[instr->operands[swap].tempId()]--;
           create_vop3_for_op3(ctx, new_op, instr, operands, neg, abs, opsel, clamp, omod);
           return true;
@@ -2092,13 +2096,13 @@ bool combine_minmax(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode opposi
      * max(-min(a, b), c) -> max3(-a, -b, c) */
     for (unsigned swap = 0; swap < 2; swap++) {
        Operand operands[3];
-      bool neg[3], abs[3], clamp;
+      bool neg[3], abs[3], clamp, precise;
        uint8_t opsel = 0, omod = 0;
        bool inbetween_neg;
        if (match_op3_for_vop3(ctx, instr->opcode, opposite,
                               instr.get(), swap, "012",
                               operands, neg, abs, &opsel,
-                             &clamp, &omod, &inbetween_neg, NULL, NULL) &&
+                             &clamp, &omod, &inbetween_neg, NULL, NULL, &precise) &&
            inbetween_neg) {
           ctx.uses[instr->operands[swap].tempId()]--;
           neg[1] = true;
@@ -2378,11 +2382,17 @@ bool combine_clamp(opt_ctx& ctx, aco_ptr<Instruction>& instr,
  
     for (unsigned swap = 0; swap < 2; swap++) {
        Operand operands[3];
-      bool neg[3], abs[3], clamp;
+      bool neg[3], abs[3], clamp, precise;
        uint8_t opsel = 0, omod = 0;
        if (match_op3_for_vop3(ctx, instr->opcode, other_op, instr.get(), swap,
                               "012", operands, neg, abs, &opsel,
-                             &clamp, &omod, NULL, NULL, NULL)) {
+                             &clamp, &omod, NULL, NULL, NULL, &precise)) {
+         /* max(min(src, upper), lower) returns upper if src is NaN, but
+          * med3(src, lower, upper) returns lower.
+          */
+         if (precise && instr->opcode != min)
+            continue;
+
           int const0_idx = -1, const1_idx = -1;
           uint32_t const0 = 0, const1 = 0;
           for (int i = 0; i < 3; i++) {
diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp

index 2453567..2275d60 100644 (file)
--- a/src/amd/compiler/tests/test_optimizer.cpp
+++ b/src/amd/compiler/tests/test_optimizer.cpp
@@ -264,3 +264,38 @@ BEGIN_TEST(optimize.bcnt)
        finish_opt_test();
     }
  END_TEST
+
+BEGIN_TEST(optimize.clamp)
+   //>> v1: %a, v1: %b, v1: %c, s2: %_:exec = p_startpgm
+   if (!setup_cs("v1 v1 v1", GFX9))
+      return;
+
+   //! v1: %res0 = v_med3_f32 4.0, 0, %a
+   //! p_unit_test 0, %res0
+   writeout(0, bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0x40800000u),
+                        bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u), inputs[0])));
+
+   //! v1: %res1 = v_med3_f32 0, 4.0, %a
+   //! p_unit_test 1, %res1
+   writeout(1, bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u),
+                        bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0x40800000u), inputs[0])));
+
+   /* correct NaN behaviour with precise */
+
+   //! v1: %res2 = v_med3_f32 4.0, 0, %a
+   //! p_unit_test 2, %res2
+   Builder::Result max = bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u), inputs[0]);
+   max.def(0).setPrecise(true);
+   Builder::Result min = bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0x40800000u), max);
+   max.def(0).setPrecise(true);
+   writeout(2, min);
+
+   //! v1: (precise)%res3_tmp = v_min_f32 4.0, %a
+   //! v1: %res3 = v_max_f32 0, %res3_tmp
+   //! p_unit_test 3, %res3
+   min = bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0x40800000u), inputs[0]);
+   min.def(0).setPrecise(true);
+   writeout(3, bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u), min));
+
+   finish_opt_test();
+END_TEST
author	Rhys Perry <pendingchaos02@gmail.com>
	Wed, 7 Oct 2020 10:09:16 +0000 (11:09 +0100)
committer	Marge Bot <eric+marge@anholt.net>
	Fri, 13 Nov 2020 12:34:27 +0000 (12:34 +0000)
src/amd/compiler/aco_optimizer.cpp		patch \| blob \| history
src/amd/compiler/tests/test_optimizer.cpp		patch \| blob \| history