aco: try forming clauses even if reg_pressure exceeds
authorDaniel Schürmann <daniel@schuermann.dev>
Wed, 19 May 2021 07:23:20 +0000 (09:23 +0200)
committerMarge Bot <eric+marge@anholt.net>
Mon, 27 Sep 2021 14:29:16 +0000 (14:29 +0000)
This patch allows to form clauses even if the register pressure
is at the limit with the effect that VMEM instructions are less
scattered after the first clause in a Block.
It respects the previous clause size to avoid excessive moving
of VMEM instructions.
VMEM_CLAUSE_MAX_GRAB_DIST is further reduced to compensate
some of the effects.

Totals from 28922 (19.26% of 150170) affected shaders: (GFX10.3)
VGPRs: 1546568 -> 1523072 (-1.52%); split: -1.52%, +0.00%
CodeSize: 117524892 -> 117510288 (-0.01%); split: -0.08%, +0.07%
MaxWaves: 605554 -> 611120 (+0.92%)
Instrs: 22292568 -> 22291927 (-0.00%); split: -0.10%, +0.09%
Latency: 488975399 -> 490230904 (+0.26%); split: -0.06%, +0.32%
InvThroughput: 117842300 -> 116521653 (-1.12%); split: -1.15%, +0.03%
VClause: 541550 -> 522464 (-3.52%); split: -9.73%, +6.20%
SClause: 718185 -> 718298 (+0.02%); split: -0.00%, +0.02%
Copies: 1420603 -> 1386949 (-2.37%); split: -2.64%, +0.27%
Branches: 559559 -> 559278 (-0.05%); split: -0.06%, +0.01%

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10896>

src/amd/compiler/aco_scheduler.cpp

index 42b8308..bfa08f5 100644 (file)
@@ -37,7 +37,7 @@
 #define SMEM_MAX_MOVES      (64 - ctx.num_waves * 4)
 #define VMEM_MAX_MOVES      (256 - ctx.num_waves * 16)
 /* creating clauses decreases def-use distances, so make it less aggressive the lower num_waves is */
-#define VMEM_CLAUSE_MAX_GRAB_DIST (ctx.num_waves * 4)
+#define VMEM_CLAUSE_MAX_GRAB_DIST (ctx.num_waves * 2)
 #define POS_EXP_MAX_MOVES         512
 
 namespace aco {
@@ -788,6 +788,7 @@ schedule_VMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& registe
    int window_size = VMEM_WINDOW_SIZE;
    int max_moves = VMEM_MAX_MOVES;
    int clause_max_grab_dist = VMEM_CLAUSE_MAX_GRAB_DIST;
+   bool only_clauses = false;
    int16_t k = 0;
 
    /* first, check if we have instructions before current to move down */
@@ -827,7 +828,23 @@ schedule_VMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& registe
 
       /* if current depends on candidate, add additional dependencies and continue */
       bool can_move_down = !is_vmem || part_of_clause || candidate->definitions.empty();
-
+      if (only_clauses) {
+         /* In case of high register pressure, only try to form clauses,
+          * and only if the previous clause is not larger
+          * than the current one will be.
+          */
+         if (part_of_clause) {
+            int clause_size = cursor.insert_idx - cursor.insert_idx_clause;
+            int prev_clause_size = 1;
+            while (should_form_clause(current,
+                                      block->instructions[candidate_idx - prev_clause_size].get()))
+               prev_clause_size++;
+            if (prev_clause_size > clause_size + 1)
+               break;
+         } else {
+            can_move_down = false;
+         }
+      }
       HazardResult haz =
          perform_hazard_query(part_of_clause ? &clause_hq : &indep_hq, candidate.get(), false);
       if (haz == hazard_fail_reorder_ds || haz == hazard_fail_spill ||
@@ -856,7 +873,13 @@ schedule_VMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& registe
          ctx.mv.downwards_skip(cursor);
          continue;
       } else if (res == move_fail_pressure) {
-         break;
+         only_clauses = true;
+         if (part_of_clause)
+            break;
+         add_to_hazard_query(&indep_hq, candidate.get());
+         add_to_hazard_query(&clause_hq, candidate.get());
+         ctx.mv.downwards_skip(cursor);
+         continue;
       }
       if (part_of_clause)
          add_to_hazard_query(&indep_hq, candidate_ptr);