From e6ddee764eef29d3fe6c66a22de6e60299a9a36d Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason@jlekstrand.net>
Date: Tue, 9 Nov 2021 19:03:19 -0600
Subject: [PATCH] intel/fs: Reset instruction order before re-scheduling
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

The way the current scheduler loop is implemented, each scheduling pass
starts with what the previous pass had.  This means that, if PRE screwed
everything up majorly, PRE_NON_LIFO would have to try to fix it.  It
also meant that tiny changes to one pass would affect every later pass.
Instead, reset the order of the instructions before each scheduling
pass.  This makes the passes entirely independent of each other.

Shader-db results on Ice Lake:

    total instructions in shared programs: 19670486 -> 19670648 (<.01%)
    instructions in affected programs: 25317 -> 25479 (0.64%)
    helped: 2
    HURT: 7
    helped stats (abs) min: 4 max: 4 xÌ: 4.00 xÌ: 4
    helped stats (rel) min: 0.07% max: 0.07% xÌ: 0.07% xÌ: 0.07%
    HURT stats (abs)   min: 8 max: 70 xÌ: 24.29 xÌ: 12
    HURT stats (rel)   min: 0.41% max: 4.95% xÌ: 1.47% xÌ: 0.87%
    95% mean confidence interval for instructions value: -1.28 37.28
    95% mean confidence interval for instructions %-change: -0.04% 2.30%
    Inconclusive result (value mean confidence interval includes 0).

    total cycles in shared programs: 935535948 -> 935490243 (<.01%)
    cycles in affected programs: 421994824 -> 421949119 (-0.01%)
    helped: 1269
    HURT: 879
    helped stats (abs) min: 1 max: 12008 xÌ: 259.38 xÌ: 52
    helped stats (rel) min: <.01% max: 28.02% xÌ: 1.12% xÌ: 0.14%
    HURT stats (abs)   min: 1 max: 29931 xÌ: 322.46 xÌ: 20
    HURT stats (rel)   min: <.01% max: 32.17% xÌ: 1.74% xÌ: 0.22%
    95% mean confidence interval for cycles value: -71.37 28.81
    95% mean confidence interval for cycles %-change: -0.11% 0.21%
    Inconclusive result (value mean confidence interval includes 0).

    total spills in shared programs: 12403 -> 12430 (0.22%)
    spills in affected programs: 1355 -> 1382 (1.99%)
    helped: 2
    HURT: 7

    total fills in shared programs: 15128 -> 15182 (0.36%)
    fills in affected programs: 3294 -> 3348 (1.64%)
    helped: 2
    HURT: 7

    LOST:   21
    GAINED: 28

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13734>
---
 src/intel/compiler/brw_fs.cpp | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index e31e94c..32adfea 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -8663,11 +8663,40 @@ fs_visitor::allocate_registers(bool allow_spilling)
 
    bool spill_all = allow_spilling && INTEL_DEBUG(DEBUG_SPILL_FS);
 
+   /* Before we schedule anything, stash off the instruction order as an array
+    * of fs_inst *.  This way, we can reset it between scheduling passes to
+    * prevent dependencies between the different scheduling modes.
+    */
+   int num_insts = cfg->last_block()->end_ip + 1;
+   fs_inst **inst_arr = ralloc_array(mem_ctx, fs_inst *, num_insts);
+
+   int ip = 0;
+   foreach_block_and_inst(block, fs_inst, inst, cfg) {
+      assert(ip >= block->start_ip && ip <= block->end_ip);
+      inst_arr[ip++] = inst;
+   }
+   assert(ip == num_insts);
+
    /* Try each scheduling heuristic to see if it can successfully register
     * allocate without spilling.  They should be ordered by decreasing
     * performance but increasing likelihood of allocating.
     */
    for (unsigned i = 0; i < ARRAY_SIZE(pre_modes); i++) {
+      if (i > 0) {
+         /* Unless we're the first pass, reset back to the original order */
+         ip = 0;
+         foreach_block (block, cfg) {
+            block->instructions.make_empty();
+
+            assert(ip == block->start_ip);
+            for (; ip <= block->end_ip; ip++)
+               block->instructions.push_tail(inst_arr[ip]);
+         }
+         assert(ip == num_insts);
+
+         invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
+      }
+
       schedule_instructions(pre_modes[i]);
       this->shader_stats.scheduler_mode = scheduler_mode_name[i];
 
-- 
2.7.4