nir/inline_uniforms: support loop
authorQiang Yu <yuq825@gmail.com>
Mon, 19 Jul 2021 06:25:36 +0000 (14:25 +0800)
committerMarge Bot <eric+marge@anholt.net>
Thu, 19 Aug 2021 02:17:35 +0000 (02:17 +0000)
Be able to inline uniforms in loop for unrolling it.
Nested loop/if is also supported.

Some example:

    for (i = 0; i < count; i++)
...

uniform "count" will be inlined. But note this does not
make sure the loop will be unrolled (ie. count = 1000).

    for (i = 0; i < count; i++)
        for (j = init; j < 10; j++)
            if (type == 2)
                ...

uniform "count", "init" and "type" will be inlined.

It is intentional to not be too aggressive to add uniforms
to avoid false positive case while be able to support most
common usage.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11950>

src/compiler/nir/nir_inline_uniforms.c

index 1a732e2..652b953 100644 (file)
  * effectively inlining uniform values in the IR. The main benefit is register
  * usage decrease leading to better SMT (hyperthreading). It's accomplished
  * by targetting uniforms that determine whether a conditional branch is
- * taken.
+ * taken or a loop can be unrolled.
  *
- * Only uniforms used in if conditions are analyzed.
+ * Only uniforms used in these places are analyzed:
+ *   1. if condition
+ *   2. loop terminator
+ *   3. init and update value of induction variable used in loop terminator
  *
  * nir_find_inlinable_uniforms finds uniforms that can be inlined and stores
  * that information in shader_info.
@@ -37,7 +40,8 @@
  * (uniforms must be lowered to load_ubo before calling this)
  */
 
-#include "compiler/nir/nir_builder.h"
+#include "nir_builder.h"
+#include "nir_loop_analyze.h"
 
 /* Maximum value in shader_info::inlinable_uniform_dw_offsets[] */
 #define MAX_OFFSET (UINT16_MAX * 4)
@@ -131,14 +135,84 @@ src_only_uses_uniforms(const nir_src *src, int component,
    }
 }
 
+static bool
+is_induction_variable(const nir_src *src, int component, nir_loop_info *info,
+                      uint32_t *uni_offsets, unsigned *num_offsets)
+{
+   if (!src->is_ssa)
+      return false;
+
+   assert(component < src->ssa->num_components);
+
+   /* Return true for induction variable (ie. i in for loop) */
+   for (int i = 0; i < info->num_induction_vars; i++) {
+      nir_loop_induction_variable *var = info->induction_vars + i;
+      if (var->def == src->ssa) {
+         /* Induction variable should have constant initial value (ie. i = 0),
+          * constant update value (ie. i++) and constant end condition
+          * (ie. i < 10), so that we know the exact loop count for unrolling
+          * the loop.
+          *
+          * Add uniforms need to be inlined for this induction variable's
+          * initial and update value to be constant, for example:
+          *
+          *     for (i = init; i < count; i += step)
+          *
+          * We collect uniform "init" and "step" here.
+          */
+         if (var->init_src) {
+            if (!src_only_uses_uniforms(var->init_src, component,
+                                        uni_offsets, num_offsets))
+               return false;
+         }
+
+         if (var->update_src) {
+            nir_alu_src *alu_src = var->update_src;
+            if (!src_only_uses_uniforms(&alu_src->src,
+                                        alu_src->swizzle[component],
+                                        uni_offsets, num_offsets))
+               return false;
+         }
+
+         return true;
+      }
+   }
+
+   return false;
+}
+
 static void
-add_inlinable_uniforms(const nir_src *cond, uint32_t *uni_offsets,
-                       unsigned *num_offsets)
+add_inlinable_uniforms(const nir_src *cond, nir_loop_info *info,
+                       uint32_t *uni_offsets, unsigned *num_offsets)
 {
    unsigned new_num = *num_offsets;
    /* If condition SSA is always scalar, so component is 0. */
    unsigned component = 0;
 
+   /* Allow induction variable which means a loop terminator. */
+   if (info) {
+      nir_ssa_scalar cond_scalar = {cond->ssa, 0};
+
+      /* Limit terminator condition to loop unroll support case which is a simple
+       * comparison (ie. "i < count" is supported, but "i + 1 < count" is not).
+       */
+      if (nir_is_supported_terminator_condition(cond_scalar)) {
+         nir_alu_instr *alu = nir_instr_as_alu(cond->ssa->parent_instr);
+
+         /* One side of comparison is induction variable, the other side is
+          * only uniform.
+          */
+         for (int i = 0; i < 2; i++) {
+            if (is_induction_variable(&alu->src[i].src, alu->src[i].swizzle[0],
+                                      info, uni_offsets, &new_num)) {
+               cond = &alu->src[1 - i].src;
+               component = alu->src[1 - i].swizzle[0];
+               break;
+            }
+         }
+      }
+   }
+
    /* Only update uniform number when all uniforms in the expression
     * can be inlined. Partially inline uniforms can't lower if/loop.
     *
@@ -163,6 +237,82 @@ add_inlinable_uniforms(const nir_src *cond, uint32_t *uni_offsets,
       *num_offsets = new_num;
 }
 
+static void
+process_node(nir_cf_node *node, nir_loop_info *info,
+             uint32_t *uni_offsets, unsigned *num_offsets)
+{
+   switch (node->type) {
+   case nir_cf_node_if: {
+      nir_if *if_node = nir_cf_node_as_if(node);
+      const nir_src *cond = &if_node->condition;
+      add_inlinable_uniforms(cond, info, uni_offsets, num_offsets);
+
+      /* Do not pass loop info down so only alow induction variable
+       * in loop terminator "if":
+       *
+       *     for (i = 0; true; i++)
+       *         if (i == count)
+       *             if (i == num)
+       *                 <no break>
+       *             break
+       *
+       * so "num" won't be inlined due to the "if" is not a
+       * terminator.
+       */
+      info = NULL;
+
+      foreach_list_typed(nir_cf_node, nested_node, node, &if_node->then_list)
+         process_node(nested_node, info, uni_offsets, num_offsets);
+      foreach_list_typed(nir_cf_node, nested_node, node, &if_node->else_list)
+         process_node(nested_node, info, uni_offsets, num_offsets);
+      break;
+   }
+
+   case nir_cf_node_loop: {
+      nir_loop *loop = nir_cf_node_as_loop(node);
+
+      /* Replace loop info, no nested loop info currently:
+       *
+       *     for (i = 0; i < count0; i++)
+       *         for (j = 0; j < count1; j++)
+       *             if (i == num)
+       *
+       * so "num" won't be inlined due to "i" is an induction
+       * variable of upper loop.
+       */
+      info = loop->info;
+
+      foreach_list_typed(nir_cf_node, nested_node, node, &loop->body) {
+         bool is_terminator = false;
+         list_for_each_entry(nir_loop_terminator, terminator,
+                             &info->loop_terminator_list,
+                             loop_terminator_link) {
+            if (nested_node == &terminator->nif->cf_node) {
+               is_terminator = true;
+               break;
+            }
+         }
+
+         /* Allow induction variables for terminator "if" only:
+          *
+          *     for (i = 0; i < count; i++)
+          *         if (i == num)
+          *             <no break>
+          *
+          * so "num" won't be inlined due to the "if" is not a
+          * terminator.
+          */
+         nir_loop_info *use_info = is_terminator ? info : NULL;
+         process_node(nested_node, use_info, uni_offsets, num_offsets);
+      }
+      break;
+   }
+
+   default:
+      break;
+   }
+}
+
 void
 nir_find_inlinable_uniforms(nir_shader *shader)
 {
@@ -171,22 +321,10 @@ nir_find_inlinable_uniforms(nir_shader *shader)
 
    nir_foreach_function(function, shader) {
       if (function->impl) {
-         foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
-            switch (node->type) {
-            case nir_cf_node_if: {
-               const nir_src *cond = &nir_cf_node_as_if(node)->condition;
-               add_inlinable_uniforms(cond, uni_offsets, &num_offsets);
-               break;
-            }
-
-            case nir_cf_node_loop:
-               /* TODO: handle loops if we want to unroll them at draw time */
-               break;
+         nir_metadata_require(function->impl, nir_metadata_loop_analysis, nir_var_all);
 
-            default:
-               break;
-            }
-         }
+         foreach_list_typed(nir_cf_node, node, node, &function->impl->body)
+            process_node(node, NULL, uni_offsets, &num_offsets);
       }
    }