nir/gcm: fix pushing instructions into if blocks
authorTimothy Arceri <tarceri@itsqueeze.com>
Mon, 7 Feb 2022 08:10:13 +0000 (19:10 +1100)
committerMarge Bot <emma+marge@anholt.net>
Tue, 31 May 2022 01:03:43 +0000 (01:03 +0000)
The previous logic would just set the block to the instructions
original location if we couldn't evict it from a loop.

For now we only push const loads to a later block inside ifs
but we can add more heuristics later. This change helps a
hand full of shaders but also stops a CTS regression caused
by excess spilling after a series I'm working on to disable
more of the GLSL IR optimisation passes.

Shader-db results iris (BDW):

total instructions in shared programs: 17529759 -> 17529749 (<.01%)
instructions in affected programs: 15929 -> 15919 (-0.06%)
helped: 5
HURT: 2
helped stats (abs) min: 1 max: 5 x̄: 2.40 x̃: 2
helped stats (rel) min: 0.06% max: 0.15% x̄: 0.11% x̃: 0.12%
HURT stats (abs)   min: 1 max: 1 x̄: 1.00 x̃: 1
HURT stats (rel)   min: 0.06% max: 0.06% x̄: 0.06% x̃: 0.06%
95% mean confidence interval for instructions value: -3.34 0.49
95% mean confidence interval for instructions %-change: -0.14% 0.02%
Inconclusive result (value mean confidence interval includes 0).

total cycles in shared programs: 861109994 -> 861099681 (<.01%)
cycles in affected programs: 7027698 -> 7017385 (-0.15%)
helped: 95
HURT: 72
helped stats (abs) min: 1 max: 7995 x̄: 138.54 x̃: 9
helped stats (rel) min: <.01% max: 15.96% x̄: 0.54% x̃: 0.11%
HURT stats (abs)   min: 1 max: 474 x̄: 39.56 x̃: 12
HURT stats (rel)   min: <.01% max: 1.17% x̄: 0.20% x̃: 0.11%
95% mean confidence interval for cycles value: -159.05 35.54
95% mean confidence interval for cycles %-change: -0.45% 0.01%
Inconclusive result (value mean confidence interval includes 0).

total spills in shared programs: 17606 -> 17605 (<.01%)
spills in affected programs: 323 -> 322 (-0.31%)
helped: 1
HURT: 0

total fills in shared programs: 22599 -> 22598 (<.01%)
fills in affected programs: 1348 -> 1347 (-0.07%)
helped: 1
HURT: 0

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14940>

src/compiler/nir/nir_opt_gcm.c

index 694ee64..5550cf2 100644 (file)
@@ -49,6 +49,9 @@ struct gcm_block_info {
    /* Number of loops this block is inside */
    unsigned loop_depth;
 
+   /* Number of ifs this block is inside */
+   unsigned if_depth;
+
    unsigned loop_instr_count;
 
    /* The loop the block is nested inside or NULL */
@@ -128,13 +131,14 @@ get_loop_instr_count(struct exec_list *cf_list)
 /* Recursively walks the CFG and builds the block_info structure */
 static void
 gcm_build_block_info(struct exec_list *cf_list, struct gcm_state *state,
-                     nir_loop *loop, unsigned loop_depth,
+                     nir_loop *loop, unsigned loop_depth, unsigned if_depth,
                      unsigned loop_instr_count)
 {
    foreach_list_typed(nir_cf_node, node, node, cf_list) {
       switch (node->type) {
       case nir_cf_node_block: {
          nir_block *block = nir_cf_node_as_block(node);
+         state->blocks[block->index].if_depth = if_depth;
          state->blocks[block->index].loop_depth = loop_depth;
          state->blocks[block->index].loop_instr_count = loop_instr_count;
          state->blocks[block->index].loop = loop;
@@ -142,13 +146,15 @@ gcm_build_block_info(struct exec_list *cf_list, struct gcm_state *state,
       }
       case nir_cf_node_if: {
          nir_if *if_stmt = nir_cf_node_as_if(node);
-         gcm_build_block_info(&if_stmt->then_list, state, loop, loop_depth, ~0u);
-         gcm_build_block_info(&if_stmt->else_list, state, loop, loop_depth, ~0u);
+         gcm_build_block_info(&if_stmt->then_list, state, loop, loop_depth,
+                              if_depth + 1, ~0u);
+         gcm_build_block_info(&if_stmt->else_list, state, loop, loop_depth,
+                              if_depth + 1, ~0u);
          break;
       }
       case nir_cf_node_loop: {
          nir_loop *loop = nir_cf_node_as_loop(node);
-         gcm_build_block_info(&loop->body, state, loop, loop_depth + 1,
+         gcm_build_block_info(&loop->body, state, loop, loop_depth + 1, if_depth,
                               get_loop_instr_count(&loop->body));
          break;
       }
@@ -528,20 +534,72 @@ set_block_for_loop_instr(struct gcm_state *state, nir_instr *instr,
    return false;
 }
 
+static bool
+set_block_to_if_block(struct gcm_state *state,  nir_instr *instr,
+                      nir_block *block)
+{
+   if (instr->type == nir_instr_type_load_const)
+      return true;
+
+   /* TODO: Figure out some more heuristics to allow more to be moved into
+    * if-statements.
+    */
+
+   return false;
+}
+
 static nir_block *
 gcm_choose_block_for_instr(nir_instr *instr, nir_block *early_block,
                            nir_block *late_block, struct gcm_state *state)
 {
    assert(nir_block_dominates(early_block, late_block));
 
+   bool block_set = false;
+
+   /* First see if we can push the instruction down into an if-statements block */
    nir_block *best = late_block;
    for (nir_block *block = late_block; block != NULL; block = block->imm_dom) {
+      if (state->blocks[block->index].loop_depth >
+          state->blocks[instr->block->index].loop_depth)
+         continue;
+
+      if (state->blocks[block->index].if_depth >=
+          state->blocks[best->index].if_depth &&
+          set_block_to_if_block(state, instr, block)) {
+            /* If we are pushing the instruction into an if we want it to be
+             * in the earliest block not the latest to avoid creating register
+             * pressure issues. So we don't break unless we come across the
+             * block the instruction was originally in.
+             */
+            best = block;
+            block_set = true;
+            if (block == instr->block)
+               break;
+      } else if (block == instr->block) {
+         /* If we couldn't push the instruction later just put is back where it
+          * was previously.
+          */
+         if (!block_set)
+            best = block;
+         break;
+      }
+
+      if (block == early_block)
+         break;
+   }
+
+   /* Now see if we can evict the instruction from a loop */
+   for (nir_block *block = late_block; block != NULL; block = block->imm_dom) {
       if (state->blocks[block->index].loop_depth <
-          state->blocks[best->index].loop_depth &&
-          set_block_for_loop_instr(state, instr, block))
-         best = block;
-      else if (block == instr->block)
-         best = block;
+          state->blocks[best->index].loop_depth) {
+         if (set_block_for_loop_instr(state, instr, block)) {
+            best = block;
+         } else if (block == instr->block) {
+            if (!block_set)
+               best = block;
+            break;
+         }
+      }
 
       if (block == early_block)
          break;
@@ -751,7 +809,7 @@ opt_gcm_impl(nir_shader *shader, nir_function_impl *impl, bool value_number)
    exec_list_make_empty(&state.instrs);
    state.blocks = rzalloc_array(NULL, struct gcm_block_info, impl->num_blocks);
 
-   gcm_build_block_info(&impl->body, &state, NULL, 0, ~0u);
+   gcm_build_block_info(&impl->body, &state, NULL, 0, 0, ~0u);
 
    gcm_pin_instructions(impl, &state);