aco: ignore precise flag when optimizing integer clamps

author Rhys Perry <pendingchaos02@gmail.com>

Fri, 29 Apr 2022 16:23:20 +0000 (17:23 +0100)

committer Marge Bot <emma+marge@anholt.net>

Tue, 5 Jul 2022 16:39:56 +0000 (16:39 +0000)
author Rhys Perry <pendingchaos02@gmail.com>
Fri, 29 Apr 2022 16:23:20 +0000 (17:23 +0100)
committer Marge Bot <emma+marge@anholt.net>
Tue, 5 Jul 2022 16:39:56 +0000 (16:39 +0000)
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp

index 71497cb..26cb5ca 100644 (file)
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -2851,7 +2851,8 @@ combine_clamp(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode min, aco_opc
           /* max(min(src, upper), lower) returns upper if src is NaN, but
            * med3(src, lower, upper) returns lower.
            */
-         if (precise && instr->opcode != min)
+         if (precise && instr->opcode != min &&
+             (min == aco_opcode::v_min_f16 || min == aco_opcode::v_min_f32))
              continue;
  
           int const0_idx = -1, const1_idx = -1;
diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp

index 1684b9a..8afb56c 100644 (file)
--- a/src/amd/compiler/tests/test_optimizer.cpp
+++ b/src/amd/compiler/tests/test_optimizer.cpp
@@ -530,21 +530,22 @@ BEGIN_TEST(optimize.clamp)
                             bld.vop2(cfg.max, bld.def(v1), inputs[2], inputs[0])));
  
        /* correct NaN behaviour with precise */
-
-      //! v1: %res7 = @med3 @ub, @lb, %a
-      //! p_unit_test 7, %res7
-      Builder::Result max = bld.vop2(cfg.max, bld.def(v1), cfg.lb, inputs[0]);
-      max.def(0).setPrecise(true);
-      Builder::Result min = bld.vop2(cfg.min, bld.def(v1), cfg.ub, max);
-      max.def(0).setPrecise(true);
-      writeout(7, min);
-
-      //! v1: (precise)%res8_tmp = @min @ub, %a
-      //! v1: %res8 = @max @lb, %res8_tmp
-      //! p_unit_test 8, %res8
-      min = bld.vop2(cfg.min, bld.def(v1), cfg.ub, inputs[0]);
-      min.def(0).setPrecise(true);
-      writeout(8, bld.vop2(cfg.max, bld.def(v1), cfg.lb, min));
+      if (cfg.min == aco_opcode::v_min_f16 || cfg.min == aco_opcode::v_min_f32) {
+         //~f(16|32)! v1: %res7 = @med3 @ub, @lb, %a
+         //~f(16|32)! p_unit_test 7, %res7
+         Builder::Result max = bld.vop2(cfg.max, bld.def(v1), cfg.lb, inputs[0]);
+         max.def(0).setPrecise(true);
+         Builder::Result min = bld.vop2(cfg.min, bld.def(v1), cfg.ub, max);
+         max.def(0).setPrecise(true);
+         writeout(7, min);
+
+         //~f(16|32)! v1: (precise)%res8_tmp = @min @ub, %a
+         //~f(16|32)! v1: %res8 = @max @lb, %res8_tmp
+         //~f(16|32)! p_unit_test 8, %res8
+         min = bld.vop2(cfg.min, bld.def(v1), cfg.ub, inputs[0]);
+         min.def(0).setPrecise(true);
+         writeout(8, bld.vop2(cfg.max, bld.def(v1), cfg.lb, min));
+      }
  
        finish_opt_test();
     }
author	Rhys Perry <pendingchaos02@gmail.com>
	Fri, 29 Apr 2022 16:23:20 +0000 (17:23 +0100)
committer	Marge Bot <emma+marge@anholt.net>
	Tue, 5 Jul 2022 16:39:56 +0000 (16:39 +0000)
src/amd/compiler/aco_optimizer.cpp		patch \| blob \| history
src/amd/compiler/tests/test_optimizer.cpp		patch \| blob \| history