nir/loop_analyze: Determine iteration counts for more kinds of loops
authorYevhenii Kolesnikov <yevhenii.kolesnikov@globallogic.com>
Fri, 17 Jan 2020 11:01:01 +0000 (13:01 +0200)
committerMarge Bot <emma+marge@anholt.net>
Thu, 6 Apr 2023 23:50:27 +0000 (23:50 +0000)
If loop iterator is incremented with something other than regular
addition, it would be more error prone to calculate the number of
iterations theoretically. What we can do instead, is try to emulate the
loop, and determine the number of iterations empirically.

These operations are covered:
 - imul
 - fmul
 - ishl
 - ishr
 - ushr

Also add unit tests for loop unrollment.

Improves performance of Aztec Ruins (sixonix
gfxbench5.aztec_ruins_vk_high) by -1.28042% +/- 0.498555% (N=5) on Intel
Arc A770.

v2 (idr): Rebase on 3 years. :( Use nir_phi_instr_add_src in the test
cases.

v3 (idr): Use try_eval_const_alu in to evaluate loop termination
condition in get_iteration_empirical. Also restructure the loop
slightly. This fixed off by one iteration errors in "inverted" loop
tests (e.g., nir_loop_analyze_test.ushr_ieq_known_count_invert_31).

v4 (idr): Use try_eval_const_alu in to evaluate induction variable
update in get_iteration_empirical. This fixes non-commutative update
operations (e.g., shifts) when the induction varible is not the first
source. This fixes the unit test
nir_loop_analyze_test.ishl_rev_ieq_infinite_loop_unknown_count.

v5 (idr): Fix _type parameter for fadd and fadd_rev loop unroll
tests. Hopefully that fixes the failure on s390x. Temporarily disable
fmul. This works-around the revealed problem in
glsl-fs-loop-unroll-mul-fp64, and there were no shader-db or fossil-db
changes.

v6 (idr): Plumb max_unroll_iterations into get_iteration_empirical. I
was going to do this, but I forgot. Suggested by Tim.

v7 (idr): Disable fadd tests on s390x. They fail because S390 is weird.

Almost all of the shaders affected (OpenGL or Vulkan) are from gfxbench
or geekbench. A couple shaders in Deus Ex (OpenGL), Dirt Rally (OpenGL),
Octopath Traveler (Vulkan), and Rise of the Tomb Raider (Vulkan) are
helped.

The lost / gained shaders in OpenGL are an Aztec Ruins shader that goes
from SIMD16 to SIMD8. The spills / fills affected are in a single Aztec
Ruins (Vulkan) compute shader.

shader-db results:

Skylake, Ice Lake, and Tiger Lake had similar results. (Tiger Lake shown)
total loops in shared programs: 5514 -> 5470 (-0.80%)
loops in affected programs: 62 -> 18 (-70.97%)
helped: 37 / HURT: 0

LOST:   2
GAINED: 2

Haswell and Broadwell had similar results. (Broadwell shown)
total loops in shared programs: 5346 -> 5298 (-0.90%)
loops in affected programs: 66 -> 18 (-72.73%)
helped: 39 / HURT: 0

fossil-db results:

Skylake, Ice Lake, and Tiger Lake had similar results. (Tiger Lake shown)
Instructions in all programs: 157374679 -> 157397421 (+0.0%)
Instructions hurt: 28

SENDs in all programs: 7463800 -> 7467639 (+0.1%)
SENDs hurt: 28

Loops in all programs: 38980 -> 38950 (-0.1%)
Loops helped: 28

Cycles in all programs: 7559486451 -> 7557455384 (-0.0%)
Cycles helped: 28

Spills in all programs: 11405 -> 11403 (-0.0%)
Spills helped: 1

Fills in all programs: 19578 -> 19588 (+0.1%)
Fills hurt: 1

Lost: 1

Signed-off-by: Yevhenii Kolesnikov <yevhenii.kolesnikov@globallogic.com>
Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3445>

src/compiler/nir/meson.build
src/compiler/nir/nir_loop_analyze.c
src/compiler/nir/tests/loop_analyze_tests.cpp
src/compiler/nir/tests/loop_unroll_tests.cpp [new file with mode: 0644]

index 1c82ea7..8ecba3b 100644 (file)
@@ -411,6 +411,7 @@ if with_tests
         'tests/control_flow_tests.cpp',
         'tests/core_tests.cpp',
         'tests/loop_analyze_tests.cpp',
+        'tests/loop_unroll_tests.cpp',
         'tests/lower_alu_width_tests.cpp',
         'tests/lower_returns_tests.cpp',
         'tests/mod_analysis_tests.cpp',
index b263028..861e7b0 100644 (file)
@@ -907,6 +907,43 @@ get_iteration(nir_op cond_op, nir_const_value initial, nir_const_value step,
    return iter_u64 > INT_MAX ? -1 : (int)iter_u64;
 }
 
+static int32_t
+get_iteration_empirical(nir_alu_instr *cond_alu, nir_alu_instr *incr_alu,
+                        nir_ssa_def *basis, nir_const_value initial,
+                        bool invert_cond, unsigned execution_mode,
+                        unsigned max_unroll_iterations)
+{
+   int iter_count = 0;
+   nir_const_value result;
+   nir_const_value iter = initial;
+
+   const nir_ssa_def *originals[2] = { basis, NULL };
+   const nir_const_value *replacements[2] = { &iter, NULL };
+
+   while (iter_count <= max_unroll_iterations) {
+      bool success;
+
+      success = try_eval_const_alu(&result, cond_alu, originals, replacements,
+                                   1, execution_mode);
+      if (!success)
+         return -1;
+
+      const bool cond_succ = invert_cond ? !result.b : result.b;
+      if (cond_succ)
+         return iter_count;
+
+      iter_count++;
+
+      success = try_eval_const_alu(&result, incr_alu, originals, replacements,
+                                   1, execution_mode);
+      assert(success);
+
+      iter = result;
+   }
+
+   return -1;
+}
+
 static bool
 will_break_on_first_iteration(nir_alu_instr *cond_alu, nir_ssa_def *basis,
                               nir_ssa_def *limit_basis,
@@ -980,7 +1017,8 @@ calculate_iterations(nir_ssa_def *basis, nir_ssa_def *limit_basis,
                      nir_const_value initial, nir_const_value step,
                      nir_const_value limit, nir_alu_instr *alu,
                      nir_ssa_scalar cond, nir_op alu_op, bool limit_rhs,
-                     bool invert_cond, unsigned execution_mode)
+                     bool invert_cond, unsigned execution_mode,
+                     unsigned max_unroll_iterations)
 {
    /* nir_op_isub should have been lowered away by this point */
    assert(alu->op != nir_op_isub);
@@ -1027,6 +1065,12 @@ calculate_iterations(nir_ssa_def *basis, nir_ssa_def *limit_basis,
       return 0;
    }
 
+   /* For loops incremented with addition operation, it's easy to
+    * calculate the number of iterations theoretically. Even though it
+    * is possible for other operations as well, it is much more error
+    * prone, and doesn't cover all possible cases. So, we try to
+    * emulate the loop.
+    */
    int iter_int;
    switch (alu->op) {
    case nir_op_iadd:
@@ -1037,12 +1081,20 @@ calculate_iterations(nir_ssa_def *basis, nir_ssa_def *limit_basis,
       iter_int = get_iteration(alu_op, initial, step, limit, bit_size,
                                execution_mode);
       break;
-   case nir_op_imul:
    case nir_op_fmul:
+      /* Detecting non-zero loop counts when the loop increment is floating
+       * point multiplication triggers a preexisting problem in
+       * glsl-fs-loop-unroll-mul-fp64.shader_test. See
+       * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3445#note_1779438.
+       */
+      return -1;
+   case nir_op_imul:
    case nir_op_ishl:
    case nir_op_ishr:
    case nir_op_ushr:
-      return -1;
+      return get_iteration_empirical(cond_alu, alu, basis, initial,
+                                     invert_cond, execution_mode,
+                                     max_unroll_iterations);
    default:
       unreachable("Invalid induction variable increment operation.");
    }
@@ -1206,7 +1258,8 @@ try_find_trip_count_vars_in_iand(nir_ssa_scalar *cond,
  * loop.
  */
 static void
-find_trip_count(loop_info_state *state, unsigned execution_mode)
+find_trip_count(loop_info_state *state, unsigned execution_mode,
+                unsigned max_unroll_iterations)
 {
    bool trip_count_known = true;
    bool guessed_trip_count = false;
@@ -1329,7 +1382,8 @@ find_trip_count(loop_info_state *state, unsigned execution_mode)
                                             cond,
                                             alu_op, limit_rhs,
                                             invert_cond,
-                                            execution_mode);
+                                            execution_mode,
+                                            max_unroll_iterations);
 
       /* Where we not able to calculate the iteration count */
       if (iterations == -1) {
@@ -1488,7 +1542,9 @@ get_loop_info(loop_info_state *state, nir_function_impl *impl)
       return;
 
    /* Run through each of the terminators and try to compute a trip-count */
-   find_trip_count(state, impl->function->shader->info.float_controls_execution_mode);
+   find_trip_count(state,
+                   impl->function->shader->info.float_controls_execution_mode,
+                   impl->function->shader->options->max_unroll_iterations);
 
    nir_foreach_block_in_cf_node(block, &state->loop->cf_node) {
       nir_foreach_instr(instr, block) {
index 7cd3fdd..6f79a0a 100644 (file)
@@ -36,7 +36,10 @@ nir_loop_analyze_test::nir_loop_analyze_test()
 {
    glsl_type_singleton_init_or_ref();
 
-   static const nir_shader_compiler_options options = { };
+   static nir_shader_compiler_options options = { };
+
+   options.max_unroll_iterations = 32;
+
    b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, &options,
                                       "loop analyze");
 }
@@ -917,6 +920,7 @@ COMPARE_REVERSE(ilt)
 COMPARE_REVERSE(ige)
 COMPARE_REVERSE(ult)
 COMPARE_REVERSE(uge)
+COMPARE_REVERSE(ishl)
 
 #define INOT_COMPARE(comp)                                              \
    static nir_ssa_def *                                                 \
@@ -1195,7 +1199,7 @@ INFINITE_LOOP_UNKNOWN_COUNT_TEST_INVERT(0x80000000, 0x00000003, 0x40000000, uge,
  *       i >>= 1;
  *    }
  */
-UNKNOWN_COUNT_TEST(0x80000000, 0x80000000, 0x00000001, ine, ushr)
+KNOWN_COUNT_TEST(0x80000000, 0x80000000, 0x00000001, ine, ushr, 1)
 
 /*    uint i = 0x80000000;
  *    while (true) {
@@ -1205,7 +1209,7 @@ UNKNOWN_COUNT_TEST(0x80000000, 0x80000000, 0x00000001, ine, ushr)
  *       i >>= 1;
  *    }
  */
-UNKNOWN_COUNT_TEST(0x80000000, 0x00000000, 0x00000001, ieq, ushr)
+KNOWN_COUNT_TEST(0x80000000, 0x00000000, 0x00000001, ieq, ushr, 32)
 
 /*    uint i = 0x80000000;
  *    while (true) {
@@ -1215,7 +1219,7 @@ UNKNOWN_COUNT_TEST(0x80000000, 0x00000000, 0x00000001, ieq, ushr)
  *       i >>= 1;
  *    }
  */
-UNKNOWN_COUNT_TEST(0x80000000, 0x00000002, 0x00000001, ult, ushr)
+KNOWN_COUNT_TEST(0x80000000, 0x00000002, 0x00000001, ult, ushr, 31)
 
 /*    uint i = 0x80000000;
  *    while (true) {
@@ -1245,7 +1249,7 @@ KNOWN_COUNT_TEST(0x80000000, 0x80000000, 0x00000001, uge, ushr, 0)
  *       i >>= 1;
  *    }
  */
-UNKNOWN_COUNT_TEST(0x80000000, 0x00008000, 0x00000001, uge_rev, ushr)
+KNOWN_COUNT_TEST(0x80000000, 0x00008000, 0x00000001, uge_rev, ushr, 16)
 
 /*    uint i = 0x80000000;
  *    while (true) {
@@ -1265,7 +1269,7 @@ KNOWN_COUNT_TEST_INVERT(0x80000000, 0x00000001, 0x80000000, ine, ushr, 0)
  *          break;
  *    }
  */
-UNKNOWN_COUNT_TEST_INVERT(0x80000000, 0x00000001, 0x00000000, ieq, ushr)
+KNOWN_COUNT_TEST_INVERT(0x80000000, 0x00000001, 0x00000000, ieq, ushr, 31)
 
 /*    uint i = 0x80000000;
  *    while (true) {
@@ -1305,7 +1309,7 @@ KNOWN_COUNT_TEST_INVERT(0x80000000, 0x00000001, 0x00000000, uge, ushr, 0)
  *          break;
  *    }
  */
-UNKNOWN_COUNT_TEST_INVERT(0x80000000, 0x00000001, 0x00000008, uge_rev, ushr)
+KNOWN_COUNT_TEST_INVERT(0x80000000, 0x00000001, 0x00000008, uge_rev, ushr, 27)
 
 /*    int i = 0xffffffff;
  *    while (true) {
@@ -1445,7 +1449,7 @@ KNOWN_COUNT_TEST(0x7fffffff, 0x00000000, 0x00000001, ine, ishr, 0)
  *       i >>= 1;
  *    }
  */
-UNKNOWN_COUNT_TEST(0x40000000, 0x00000001, 0x00000001, ieq, ishr)
+KNOWN_COUNT_TEST(0x40000000, 0x00000001, 0x00000001, ieq, ishr, 30)
 
 /*    int i = 0x7fffffff;
  *    while (true) {
@@ -1455,7 +1459,7 @@ UNKNOWN_COUNT_TEST(0x40000000, 0x00000001, 0x00000001, ieq, ishr)
  *       i >>= 1;
  *    }
  */
-UNKNOWN_COUNT_TEST(0x7fffffff, 0x00000001, 0x00000001, ilt, ishr)
+KNOWN_COUNT_TEST(0x7fffffff, 0x00000001, 0x00000001, ilt, ishr, 31)
 
 /*    int i = 0x80000000;
  *    while (true) {
@@ -1465,7 +1469,7 @@ UNKNOWN_COUNT_TEST(0x7fffffff, 0x00000001, 0x00000001, ilt, ishr)
  *       i >>= 1;
  *    }
  */
-UNKNOWN_COUNT_TEST(0x80000000, 0xffff0000, 0x00000001, ilt_rev, ishr)
+KNOWN_COUNT_TEST(0x80000000, 0xffff0000, 0x00000001, ilt_rev, ishr, 16)
 
 /*    int i = 0x80000000;
  *    while (true) {
@@ -1475,7 +1479,7 @@ UNKNOWN_COUNT_TEST(0x80000000, 0xffff0000, 0x00000001, ilt_rev, ishr)
  *       i >>= 1;
  *    }
  */
-UNKNOWN_COUNT_TEST(0x80000000, 0xffffffff, 0x00000001, ige, ishr)
+KNOWN_COUNT_TEST(0x80000000, 0xffffffff, 0x00000001, ige, ishr, 31)
 
 /*    int i = 0x12345678;
  *    while (true) {
@@ -1485,7 +1489,7 @@ UNKNOWN_COUNT_TEST(0x80000000, 0xffffffff, 0x00000001, ige, ishr)
  *       i >>= 4;
  *    }
  */
-UNKNOWN_COUNT_TEST(0x12345678, 0x00000001, 0x00000004, ige_rev, ishr)
+KNOWN_COUNT_TEST(0x12345678, 0x00000001, 0x00000004, ige_rev, ishr, 7)
 
 /*    int i = 0x7fffffff;
  *    while (true) {
@@ -1505,7 +1509,7 @@ KNOWN_COUNT_TEST_INVERT(0x7fffffff, 0x00000001, 0x00000000, ine, ishr, 0)
  *          break;
  *    }
  */
-UNKNOWN_COUNT_TEST_INVERT(0x7fffffff, 0x00000001, 0x00000000, ieq, ishr)
+KNOWN_COUNT_TEST_INVERT(0x7fffffff, 0x00000001, 0x00000000, ieq, ishr, 30)
 
 /*    int i = 0x7fffffff;
  *    while (true) {
@@ -1515,7 +1519,7 @@ UNKNOWN_COUNT_TEST_INVERT(0x7fffffff, 0x00000001, 0x00000000, ieq, ishr)
  *          break;
  *    }
  */
-UNKNOWN_COUNT_TEST_INVERT(0x7fffffff, 0x00000001, 0x00000001, ilt, ishr)
+KNOWN_COUNT_TEST_INVERT(0x7fffffff, 0x00000001, 0x00000001, ilt, ishr, 30)
 
 /*    int i = 0x80000000;
  *    while (true) {
@@ -1525,7 +1529,7 @@ UNKNOWN_COUNT_TEST_INVERT(0x7fffffff, 0x00000001, 0x00000001, ilt, ishr)
  *          break;
  *    }
  */
-UNKNOWN_COUNT_TEST_INVERT(0x80000000, 0x00000001, 0xfffffffe, ilt_rev, ishr)
+KNOWN_COUNT_TEST_INVERT(0x80000000, 0x00000001, 0xfffffffe, ilt_rev, ishr, 30)
 
 /*    int i = 0xbfffffff;
  *    while (true) {
@@ -1535,7 +1539,7 @@ UNKNOWN_COUNT_TEST_INVERT(0x80000000, 0x00000001, 0xfffffffe, ilt_rev, ishr)
  *          break;
  *    }
  */
-UNKNOWN_COUNT_TEST_INVERT(0xbfffffff, 0x00000001, 0xfffffffe, ige, ishr)
+KNOWN_COUNT_TEST_INVERT(0xbfffffff, 0x00000001, 0xfffffffe, ige, ishr, 29)
 
 /*    int i = 0x7fffffff;
  *    while (true) {
@@ -1545,7 +1549,7 @@ UNKNOWN_COUNT_TEST_INVERT(0xbfffffff, 0x00000001, 0xfffffffe, ige, ishr)
  *          break;
  *    }
  */
-UNKNOWN_COUNT_TEST_INVERT(0x7fffffff, 0x00000001, 0x00000002, ige_rev, ishr)
+KNOWN_COUNT_TEST_INVERT(0x7fffffff, 0x00000001, 0x00000002, ige_rev, ishr, 29)
 
 /*    int i = 0;
  *    while (true) {
@@ -1675,7 +1679,7 @@ INFINITE_LOOP_UNKNOWN_COUNT_TEST_INVERT(0x77777777, 0x00000004, 0xffffffff, ige_
  *       i <<= 1;
  *    }
  */
-UNKNOWN_COUNT_TEST(0x00000001, 0x00000001, 0x00000001, ine, ishl)
+KNOWN_COUNT_TEST(0x00000001, 0x00000001, 0x00000001, ine, ishl, 1)
 
 /*    int i = 1;
  *    while (true) {
@@ -1685,7 +1689,17 @@ UNKNOWN_COUNT_TEST(0x00000001, 0x00000001, 0x00000001, ine, ishl)
  *       i <<= 4;
  *    }
  */
-UNKNOWN_COUNT_TEST(0x00000001, 0x00001000, 0x00000004, ieq, ishl)
+KNOWN_COUNT_TEST(0x00000001, 0x00001000, 0x00000004, ieq, ishl, 3)
+
+/*    uint i = 1;
+ *    while (true) {
+ *       if (i < 1)
+ *          break;
+ *
+ *       i <<= 1;
+ *    }
+ */
+KNOWN_COUNT_TEST(0x00000001, 0x00000001, 0x00000001, ult, ishl, 32)
 
 /*    int i = 1;
  *    while (true) {
@@ -1695,7 +1709,7 @@ UNKNOWN_COUNT_TEST(0x00000001, 0x00001000, 0x00000004, ieq, ishl)
  *       i <<= 1;
  *    }
  */
-UNKNOWN_COUNT_TEST(0x00000001, 0x00000001, 0x00000001, ilt, ishl)
+KNOWN_COUNT_TEST(0x00000001, 0x00000001, 0x00000001, ilt, ishl, 31)
 
 /*    int i = 0xffff0000;
  *    while (true) {
@@ -1705,7 +1719,7 @@ UNKNOWN_COUNT_TEST(0x00000001, 0x00000001, 0x00000001, ilt, ishl)
  *       i <<= 2;
  *    }
  */
-UNKNOWN_COUNT_TEST(0xffff0000, 0xffffffff, 0x00000002, ilt_rev, ishl)
+KNOWN_COUNT_TEST(0xffff0000, 0xffffffff, 0x00000002, ilt_rev, ishl, 8)
 
 /*    int i = 0xf;
  *    while (true) {
@@ -1715,7 +1729,7 @@ UNKNOWN_COUNT_TEST(0xffff0000, 0xffffffff, 0x00000002, ilt_rev, ishl)
  *       i <<= 3;
  *    }
  */
-UNKNOWN_COUNT_TEST(0x0000000f, 0x0000ffff, 0x00000003, ige, ishl)
+KNOWN_COUNT_TEST(0x0000000f, 0x0000ffff, 0x00000003, ige, ishl, 5)
 
 /*    int i = 0x0000000f;
  *    while (true) {
@@ -1725,7 +1739,7 @@ UNKNOWN_COUNT_TEST(0x0000000f, 0x0000ffff, 0x00000003, ige, ishl)
  *       i <<= 4;
  *    }
  */
-UNKNOWN_COUNT_TEST(0x0000000f, 0xfffd0000, 0x00000004, ige_rev, ishl)
+KNOWN_COUNT_TEST(0x0000000f, 0xfffd0000, 0x00000004, ige_rev, ishl, 7)
 
 /*    int i = 1;
  *    while (true) {
@@ -1735,7 +1749,7 @@ UNKNOWN_COUNT_TEST(0x0000000f, 0xfffd0000, 0x00000004, ige_rev, ishl)
  *          break;
  *    }
  */
-UNKNOWN_COUNT_TEST_INVERT(0x00000001, 0x00000001, 0x00000002, ine, ishl)
+KNOWN_COUNT_TEST_INVERT(0x00000001, 0x00000001, 0x00000002, ine, ishl, 1)
 
 /*    int i = 1;
  *    while (true) {
@@ -1745,7 +1759,7 @@ UNKNOWN_COUNT_TEST_INVERT(0x00000001, 0x00000001, 0x00000002, ine, ishl)
  *          break;
  *    }
  */
-UNKNOWN_COUNT_TEST_INVERT(0x00000001, 0x00000008, 0x01000000, ieq, ishl)
+KNOWN_COUNT_TEST_INVERT(0x00000001, 0x00000008, 0x01000000, ieq, ishl, 2)
 
 /*    int i = 0x7fffffff;
  *    while (true) {
@@ -1765,7 +1779,7 @@ KNOWN_COUNT_TEST_INVERT(0x7fffffff, 0x00000001, 0x00000001, ilt, ishl, 0)
  *          break;
  *    }
  */
-UNKNOWN_COUNT_TEST_INVERT(0x00007fff, 0x00000002, 0x1fffffff, ilt_rev, ishl)
+KNOWN_COUNT_TEST_INVERT(0x00007fff, 0x00000002, 0x1fffffff, ilt_rev, ishl, 7)
 
 /*    int i = 0xffff7fff;
  *    while (true) {
@@ -1775,7 +1789,7 @@ UNKNOWN_COUNT_TEST_INVERT(0x00007fff, 0x00000002, 0x1fffffff, ilt_rev, ishl)
  *          break;
  *    }
  */
-UNKNOWN_COUNT_TEST_INVERT(0xffff7fff, 0x00000004, 0xfffffffe, ige, ishl)
+KNOWN_COUNT_TEST_INVERT(0xffff7fff, 0x00000004, 0xfffffffe, ige, ishl, 3)
 
 /*    int i = 0x0000f0f0;
  *    while (true) {
@@ -1785,7 +1799,21 @@ UNKNOWN_COUNT_TEST_INVERT(0xffff7fff, 0x00000004, 0xfffffffe, ige, ishl)
  *          break;
  *    }
  */
-UNKNOWN_COUNT_TEST_INVERT(0x0000f0f0, 0x00000004, 0xfffffffe, ige_rev, ishl)
+KNOWN_COUNT_TEST_INVERT(0x0000f0f0, 0x00000004, 0xfffffffe, ige_rev, ishl, 3)
+
+/* This infinite loop makes no sense, but it's a good test to make sure the
+ * loop analysis code doesn't incorrectly treat left-shift as a commutative
+ * operation.
+ *
+ *    int i = 1;
+ *    while (true) {
+ *       if (i == 0)
+ *          break;
+ *
+ *       i = 1 << i;
+ *    }
+ */
+INFINITE_LOOP_UNKNOWN_COUNT_TEST(0x00000001, 0x00000000, 0x00000001, ieq, ishl_rev)
 
 /*    int i = 0;
  *    while (true) {
@@ -1882,7 +1910,7 @@ INFINITE_LOOP_UNKNOWN_COUNT_TEST_INVERT(0x88888888, 0x00000010, 0x00000001, ige,
  *       i = i * 7;
  *    }
  */
-UNKNOWN_COUNT_TEST(0x00000001, 0x00000001, 0x00000007, ine, imul)
+KNOWN_COUNT_TEST(0x00000001, 0x00000001, 0x00000007, ine, imul, 1)
 
 /*    int i = 2;
  *    while (true) {
@@ -1892,7 +1920,7 @@ UNKNOWN_COUNT_TEST(0x00000001, 0x00000001, 0x00000007, ine, imul)
  *       i = i * 3;
  *    }
  */
-UNKNOWN_COUNT_TEST(0x00000002, 0x00000036, 0x00000003, ieq, imul)
+KNOWN_COUNT_TEST(0x00000002, 0x00000036, 0x00000003, ieq, imul, 3)
 
 /*    int i = 5;
  *    while (true) {
@@ -1902,7 +1930,7 @@ UNKNOWN_COUNT_TEST(0x00000002, 0x00000036, 0x00000003, ieq, imul)
  *       i = i * -3;
  *    }
  */
-UNKNOWN_COUNT_TEST(0x00000005, 0x00000001, 0xfffffffd, ilt, imul)
+KNOWN_COUNT_TEST(0x00000005, 0x00000001, 0xfffffffd, ilt, imul, 1)
 
 /*    int i = 0xf;
  *    while (true) {
@@ -1912,7 +1940,7 @@ UNKNOWN_COUNT_TEST(0x00000005, 0x00000001, 0xfffffffd, ilt, imul)
  *       i = i * 11;
  *    }
  */
-UNKNOWN_COUNT_TEST(0x0000000f, 0x0000ffff, 0x0000000b, ige, imul)
+KNOWN_COUNT_TEST(0x0000000f, 0x0000ffff, 0x0000000b, ige, imul, 4)
 
 /*    int i = 3;
  *    while (true) {
@@ -1922,7 +1950,7 @@ UNKNOWN_COUNT_TEST(0x0000000f, 0x0000ffff, 0x0000000b, ige, imul)
  *          break;
  *    }
  */
-UNKNOWN_COUNT_TEST_INVERT(0x00000003, 0xfffffffb, 0xfffffff1, ine, imul)
+KNOWN_COUNT_TEST_INVERT(0x00000003, 0xfffffffb, 0xfffffff1, ine, imul, 1)
 
 /*    int i = 3;
  *    while (true) {
@@ -1932,7 +1960,7 @@ UNKNOWN_COUNT_TEST_INVERT(0x00000003, 0xfffffffb, 0xfffffff1, ine, imul)
  *          break;
  *    }
  */
-UNKNOWN_COUNT_TEST_INVERT(0x00000003, 0xfffffff9, 0x000562b3, ieq, imul)
+KNOWN_COUNT_TEST_INVERT(0x00000003, 0xfffffff9, 0x000562b3, ieq, imul, 5)
 
 /*    int i = 0x7f;
  *    while (true) {
@@ -1942,7 +1970,7 @@ UNKNOWN_COUNT_TEST_INVERT(0x00000003, 0xfffffff9, 0x000562b3, ieq, imul)
  *          break;
  *    }
  */
-UNKNOWN_COUNT_TEST_INVERT(0x0000007f, 0x00000003, 0x00000001, ilt, imul)
+KNOWN_COUNT_TEST_INVERT(0x0000007f, 0x00000003, 0x00000001, ilt, imul, 16)
 
 /*    int i = 0xffff7fff;
  *    while (true) {
@@ -1952,4 +1980,4 @@ UNKNOWN_COUNT_TEST_INVERT(0x0000007f, 0x00000003, 0x00000001, ilt, imul)
  *          break;
  *    }
  */
-UNKNOWN_COUNT_TEST_INVERT(0xffff7fff, 0x0000000f, 0x34cce9b0, ige, imul)
+KNOWN_COUNT_TEST_INVERT(0xffff7fff, 0x0000000f, 0x34cce9b0, ige, imul, 4)
diff --git a/src/compiler/nir/tests/loop_unroll_tests.cpp b/src/compiler/nir/tests/loop_unroll_tests.cpp
new file mode 100644 (file)
index 0000000..7260909
--- /dev/null
@@ -0,0 +1,180 @@
+/*
+ * Copyright © 2020 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "gtest/gtest.h"
+#include "nir.h"
+#include "nir_builder.h"
+#include "nir_phi_builder.h"
+
+#define UNROLL_TEST_INSERT(_label, _type, _init, _limit, _step,         \
+                           _cond, _incr, _rev, _exp_res,                \
+                           _exp_instr_count, _exp_loop_count)           \
+   TEST_F(nir_loop_unroll_test, _label)                                 \
+   {                                                                    \
+      nir_ssa_def *init = nir_imm_##_type(&bld, _init);                 \
+      nir_ssa_def *limit = nir_imm_##_type(&bld, _limit);               \
+      nir_ssa_def *step = nir_imm_##_type(&bld, _step);                 \
+      loop_unroll_test_helper(&bld, init, limit, step,                  \
+                              &nir_##_cond, &nir_##_incr, _rev);        \
+      EXPECT_##_exp_res(nir_opt_loop_unroll(bld.shader));               \
+      EXPECT_EQ(_exp_instr_count, count_instr(nir_op_##_incr));         \
+      EXPECT_EQ(_exp_loop_count, count_loops());                        \
+   }
+
+namespace {
+
+class nir_loop_unroll_test : public ::testing::Test {
+protected:
+   nir_loop_unroll_test()
+   {
+      glsl_type_singleton_init_or_ref();
+      static nir_shader_compiler_options options = { };
+      options.max_unroll_iterations = 32;
+      options.force_indirect_unrolling_sampler = false;
+      options.force_indirect_unrolling = nir_var_all;
+      bld = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, &options,
+                                           "loop unrolling tests");
+   }
+   ~nir_loop_unroll_test()
+   {
+      ralloc_free(bld.shader);
+      glsl_type_singleton_decref();
+   }
+
+   int count_instr(nir_op op);
+   int count_loops(void);
+
+   nir_builder bld;
+};
+
+} /* namespace */
+
+int
+nir_loop_unroll_test::count_instr(nir_op op)
+{
+   int count = 0;
+   nir_foreach_block(block, bld.impl) {
+      nir_foreach_instr(instr, block) {
+         if (instr->type != nir_instr_type_alu)
+            continue;
+         nir_alu_instr *alu_instr = nir_instr_as_alu(instr);
+         if (alu_instr->op == op)
+            count++;
+      }
+   }
+
+   return count;
+}
+
+int
+nir_loop_unroll_test::count_loops(void)
+{
+   int count = 0;
+   foreach_list_typed(nir_cf_node, cf_node, node, &bld.impl->body) {
+      if (cf_node->type == nir_cf_node_loop)
+         count++;
+   }
+
+   return count;
+}
+
+void
+loop_unroll_test_helper(nir_builder *bld, nir_ssa_def *init,
+                        nir_ssa_def *limit, nir_ssa_def *step,
+                        nir_ssa_def* (*cond_instr)(nir_builder*,
+                                                   nir_ssa_def*,
+                                                   nir_ssa_def*),
+                        nir_ssa_def* (*incr_instr)(nir_builder*,
+                                                   nir_ssa_def*,
+                                                   nir_ssa_def*),
+                        bool reverse)
+{
+   nir_loop *loop = nir_push_loop(bld);
+
+   nir_block *top_block =
+      nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
+   nir_block *head_block = nir_loop_first_block(loop);
+
+   nir_phi_instr *phi = nir_phi_instr_create(bld->shader);
+   nir_ssa_dest_init(&phi->instr, &phi->dest, 1, 32, NULL);
+
+   nir_phi_instr_add_src(phi, top_block, nir_src_for_ssa(init));
+
+   nir_ssa_def *cond = cond_instr(bld,
+                                  (reverse ? limit : &phi->dest.ssa),
+                                  (reverse ? &phi->dest.ssa : limit));
+
+   nir_if *nif = nir_push_if(bld, cond);
+   nir_jump(bld, nir_jump_break);
+   nir_pop_if(bld, nif);
+
+   nir_ssa_def *var = incr_instr(bld, &phi->dest.ssa, step);
+
+   nir_phi_instr_add_src(phi, nir_cursor_current_block(bld->cursor),
+                         nir_src_for_ssa(var));
+
+   nir_pop_loop(bld, loop);
+
+   bld->cursor = nir_after_phis(head_block);
+   nir_builder_instr_insert(bld, &phi->instr);
+
+   nir_validate_shader(bld->shader, NULL);
+}
+
+UNROLL_TEST_INSERT(iadd,     int,   0,     24,   4,
+                   ige,      iadd,  false, TRUE, 6, 0)
+UNROLL_TEST_INSERT(iadd_rev, int,   0,     24,   4,
+                   ilt,      iadd,  true,  TRUE, 7, 0)
+#ifndef __s390x__
+UNROLL_TEST_INSERT(fadd,     float, 0.0,   24.0, 4.0,
+                   fge,      fadd,  false, TRUE, 6, 0)
+UNROLL_TEST_INSERT(fadd_rev, float, 0.0,   24.0, 4.0,
+                   flt,      fadd,  true,  TRUE, 7, 0)
+#endif
+UNROLL_TEST_INSERT(imul,     int,   1,     81,   3,
+                   ige,      imul,  false, TRUE, 4, 0)
+UNROLL_TEST_INSERT(imul_rev, int,   1,     81,   3,
+                   ilt,      imul,  true,  TRUE, 5, 0)
+#if 0 /* Disable tests until support is re-enabled in loop_analyze. */
+UNROLL_TEST_INSERT(fmul,     float, 1.5,   81.0, 3.0,
+                   fge,      fmul,  false, TRUE, 4, 0)
+UNROLL_TEST_INSERT(fmul_rev, float, 1.0,   81.0, 3.0,
+                   flt,      fmul,  true,  TRUE, 5, 0)
+#endif
+UNROLL_TEST_INSERT(ishl,     int,   1,     128,  1,
+                   ige,      ishl,  false, TRUE, 7, 0)
+UNROLL_TEST_INSERT(ishl_rev, int,   1,     128,  1,
+                   ilt,      ishl,  true,  TRUE, 8, 0)
+UNROLL_TEST_INSERT(ishr,     int,   64,    4,    1,
+                   ilt,      ishr,  false, TRUE, 5, 0)
+UNROLL_TEST_INSERT(ishr_rev, int,   64,    4,    1,
+                   ige,      ishr,  true,  TRUE, 4, 0)
+UNROLL_TEST_INSERT(ushr,     int,   64,    4,    1,
+                   ilt,      ushr,  false, TRUE, 5, 0)
+UNROLL_TEST_INSERT(ushr_rev, int,   64,    4,    1,
+                   ige,      ushr,  true,  TRUE, 4, 0)
+
+UNROLL_TEST_INSERT(lshl_neg,     int,  0xf0f0f0f0, 0,    1,
+                   ige,          ishl, false,      TRUE, 4, 0)
+UNROLL_TEST_INSERT(lshl_neg_rev, int,  0xf0f0f0f0, 0,    1,
+                   ilt,          ishl, true,       TRUE, 4, 0)