From 07f2ad32e4608ce535d3e9a766ffb84f5349993e Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 14 Aug 2023 19:32:25 -0700 Subject: [PATCH] intel/fs: Pick the lowest register pressure schedule when spilling We try various pre-RA scheduler modes and see if any of them allow us to register allocate without spilling. If all of them spill, however, we left it on the last mode: LIFO. This is unfortunately sometimes significantly worse than other modes (such as "none"). This patch makes us instead select the pre-RA scheduling mode that gives the lowest register pressure estimate, if none of them manage to avoid spilling. The hope is that this scheduling will spill the least out of all of them. fossil-db stats (on Alchemist) speak for themselves: Totals: Instrs: 197297092 -> 195326552 (-1.00%); split: -1.02%, +0.03% Cycles: 14291286956 -> 14303502596 (+0.09%); split: -0.55%, +0.64% Spill count: 190886 -> 129204 (-32.31%); split: -33.01%, +0.70% Fill count: 361408 -> 225038 (-37.73%); split: -39.17%, +1.43% Scratch Memory Size: 12935168 -> 10868736 (-15.98%); split: -16.08%, +0.10% Totals from 1791 (0.27% of 668386) affected shaders: Instrs: 7628929 -> 5658389 (-25.83%); split: -26.50%, +0.67% Cycles: 719326691 -> 731542331 (+1.70%); split: -10.95%, +12.65% Spill count: 110627 -> 48945 (-55.76%); split: -56.96%, +1.20% Fill count: 221560 -> 85190 (-61.55%); split: -63.89%, +2.34% Scratch Memory Size: 4471808 -> 2405376 (-46.21%); split: -46.51%, +0.30% Improves performance when using XeSS in Cyberpunk 2077 by 90% on A770. Improves performance of Borderlands 3 by 1.54% on A770. Reviewed-by: Emma Anholt Part-of: --- src/intel/compiler/brw_fs.cpp | 46 +++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index a7f2886..80ac66c 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -6792,6 +6792,9 @@ fs_visitor::allocate_registers(bool allow_spilling) [SCHEDULE_NONE] = "none", }; + uint32_t best_register_pressure = UINT32_MAX; + enum instruction_scheduler_mode best_sched = SCHEDULE_NONE; + compact_virtual_grfs(); if (needs_register_pressure) @@ -6806,6 +6809,7 @@ fs_visitor::allocate_registers(bool allow_spilling) * prevent dependencies between the different scheduling modes. */ fs_inst **orig_order = save_instruction_order(cfg); + fs_inst **best_pressure_order = NULL; /* Try each scheduling heuristic to see if it can successfully register * allocate without spilling. They should be ordered by decreasing @@ -6814,12 +6818,6 @@ fs_visitor::allocate_registers(bool allow_spilling) for (unsigned i = 0; i < ARRAY_SIZE(pre_modes); i++) { enum instruction_scheduler_mode sched_mode = pre_modes[i]; - if (i > 0) { - /* Unless we're the first pass, reset back to the original order */ - restore_instruction_order(cfg, orig_order); - invalidate_analysis(DEPENDENCY_INSTRUCTIONS); - } - schedule_instructions(sched_mode); this->shader_stats.scheduler_mode = scheduler_mode_name[sched_mode]; @@ -6829,18 +6827,46 @@ fs_visitor::allocate_registers(bool allow_spilling) break; } - bool can_spill = allow_spilling && - (i == ARRAY_SIZE(pre_modes) - 1); - /* We should only spill registers on the last scheduling. */ assert(!spilled_any_registers); - allocated = assign_regs(can_spill, spill_all); + allocated = assign_regs(false, spill_all); if (allocated) break; + + /* Save the maximum register pressure */ + uint32_t this_pressure = compute_max_register_pressure(); + + if (0) { + fprintf(stderr, "Scheduler mode \"%s\" spilled, max pressure = %u\n", + scheduler_mode_name[sched_mode], this_pressure); + } + + if (this_pressure < best_register_pressure) { + best_register_pressure = this_pressure; + best_sched = sched_mode; + delete[] best_pressure_order; + best_pressure_order = save_instruction_order(cfg); + } + + /* Reset back to the original order before trying the next mode */ + restore_instruction_order(cfg, orig_order); + invalidate_analysis(DEPENDENCY_INSTRUCTIONS); + } + + if (!allocated) { + if (0) { + fprintf(stderr, "Spilling - using lowest-pressure mode \"%s\"\n", + scheduler_mode_name[best_sched]); + } + restore_instruction_order(cfg, best_pressure_order); + shader_stats.scheduler_mode = scheduler_mode_name[best_sched]; + + allocated = assign_regs(allow_spilling, spill_all); } delete[] orig_order; + delete[] best_pressure_order; if (!allocated) { fail("Failure to register allocate. Reduce number of " -- 2.7.4