agx: Schedule for register pressure

author Alyssa Rosenzweig <alyssa@rosenzweig.io>

Sat, 15 Apr 2023 23:49:47 +0000 (19:49 -0400)

committer Marge Bot <emma+marge@anholt.net>

Tue, 5 Sep 2023 18:50:34 +0000 (18:50 +0000)
author Alyssa Rosenzweig <alyssa@rosenzweig.io>
Sat, 15 Apr 2023 23:49:47 +0000 (19:49 -0400)
committer Marge Bot <emma+marge@anholt.net>
Tue, 5 Sep 2023 18:50:34 +0000 (18:50 +0000)
diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c

index 83e7ba2..f0807d6 100644 (file)
--- a/src/asahi/compiler/agx_compile.c
+++ b/src/asahi/compiler/agx_compile.c
@@ -32,6 +32,7 @@ static const struct debug_named_value agx_debug_options[] = {
     {"wait",      AGX_DBG_WAIT,      "Wait after all async instructions"},
     {"nopreamble",AGX_DBG_NOPREAMBLE,"Do not use shader preambles"},
     {"demand",    AGX_DBG_DEMAND,    "Bound tightly to register demand"},
+   {"nosched",   AGX_DBG_NOSCHED,   "Do not schedule the shader"},
     DEBUG_NAMED_VALUE_END
  };
  /* clang-format on */
@@ -2560,6 +2561,14 @@ agx_compile_function_nir(nir_shader *nir, nir_function_impl *impl,
     if (agx_should_dump(nir, AGX_DBG_SHADERS))
        agx_print_shader(ctx, stdout);
  
+   if (likely(!(agx_compiler_debug & AGX_DBG_NOSCHED))) {
+      agx_pressure_schedule(ctx);
+      agx_validate(ctx, "Pre-RA scheduler");
+   }
+
+   if (agx_should_dump(nir, AGX_DBG_SHADERS))
+      agx_print_shader(ctx, stdout);
+
     agx_ra(ctx);
     agx_lower_64bit_postra(ctx);
  
diff --git a/src/asahi/compiler/agx_compiler.h b/src/asahi/compiler/agx_compiler.h

index 1378b5e..3246819 100644 (file)
--- a/src/asahi/compiler/agx_compiler.h
+++ b/src/asahi/compiler/agx_compiler.h
@@ -759,6 +759,7 @@ void agx_lower_pseudo(agx_context *ctx);
  void agx_lower_uniform_sources(agx_context *ctx);
  void agx_opt_cse(agx_context *ctx);
  void agx_dce(agx_context *ctx, bool partial);
+void agx_pressure_schedule(agx_context *ctx);
  void agx_ra(agx_context *ctx);
  void agx_lower_64bit_postra(agx_context *ctx);
  void agx_insert_waits(agx_context *ctx);
diff --git a/src/asahi/compiler/agx_debug.h b/src/asahi/compiler/agx_debug.h

index c066491..9ff6b4c 100644 (file)
--- a/src/asahi/compiler/agx_debug.h
+++ b/src/asahi/compiler/agx_debug.h
@@ -25,6 +25,7 @@ enum agx_compiler_dbg {
     AGX_DBG_WAIT        = BITFIELD_BIT(7),
     AGX_DBG_NOPREAMBLE  = BITFIELD_BIT(8),
     AGX_DBG_DEMAND      = BITFIELD_BIT(9),
+   AGX_DBG_NOSCHED     = BITFIELD_BIT(10),
  };
  /* clang-format on */
  
diff --git a/src/asahi/compiler/agx_pressure_schedule.c b/src/asahi/compiler/agx_pressure_schedule.c

new file mode 100644 (file)

index 0000000..4ae25f1
--- /dev/null
+++ b/src/asahi/compiler/agx_pressure_schedule.c
@@ -0,0 +1,245 @@
+/*
+ * Copyright 2023 Alyssa Rosenzweig
+ * Copyright 2022 Collabora Ltd.
+ * SPDX-License-Identifier: MIT
+ */
+
+/* Bottom-up local scheduler to reduce register pressure */
+
+#include "util/dag.h"
+#include "agx_compiler.h"
+#include "agx_opcodes.h"
+
+struct sched_ctx {
+   /* Dependency graph */
+   struct dag *dag;
+
+   /* Live set */
+   BITSET_WORD *live;
+};
+
+struct sched_node {
+   struct dag_node dag;
+
+   /* Instruction this node represents */
+   agx_instr *instr;
+};
+
+static void
+add_dep(struct sched_node *a, struct sched_node *b)
+{
+   assert(a != b && "no self-dependencies");
+
+   if (a && b)
+      dag_add_edge(&a->dag, &b->dag, 0);
+}
+
+static void
+serialize(struct sched_node *a, struct sched_node **b)
+{
+   add_dep(a, *b);
+   *b = a;
+}
+
+static struct dag *
+create_dag(agx_context *ctx, agx_block *block, void *memctx)
+{
+   struct dag *dag = dag_create(ctx);
+
+   struct sched_node **last_write =
+      calloc(ctx->alloc, sizeof(struct sched_node *));
+   struct sched_node *coverage = NULL;
+   struct sched_node *preload = NULL;
+
+   /* Last memory load, to serialize stores against */
+   struct sched_node *memory_load = NULL;
+
+   /* Last memory store, to serialize loads and stores against */
+   struct sched_node *memory_store = NULL;
+
+   agx_foreach_instr_in_block(block, I) {
+      /* Don't touch control flow */
+      if (I->op == AGX_OPCODE_LOGICAL_END)
+         break;
+
+      struct sched_node *node = rzalloc(memctx, struct sched_node);
+      node->instr = I;
+      dag_init_node(dag, &node->dag);
+
+      /* Reads depend on writes, no other hazards in SSA */
+      agx_foreach_ssa_src(I, s) {
+         add_dep(node, last_write[I->src[s].value]);
+      }
+
+      agx_foreach_ssa_dest(I, d) {
+         assert(I->dest[d].value < ctx->alloc);
+         last_write[I->dest[d].value] = node;
+      }
+
+      /* Classify the instruction and add dependencies according to the class */
+      enum agx_schedule_class dep = agx_opcodes_info[I->op].schedule_class;
+      assert(dep != AGX_SCHEDULE_CLASS_INVALID && "invalid instruction seen");
+
+      bool barrier = dep == AGX_SCHEDULE_CLASS_BARRIER;
+
+      if (dep == AGX_SCHEDULE_CLASS_STORE)
+         add_dep(node, memory_load);
+      else if (dep == AGX_SCHEDULE_CLASS_ATOMIC || barrier)
+         serialize(node, &memory_load);
+
+      if (dep == AGX_SCHEDULE_CLASS_LOAD || dep == AGX_SCHEDULE_CLASS_STORE ||
+          dep == AGX_SCHEDULE_CLASS_ATOMIC || barrier)
+         serialize(node, &memory_store);
+
+      if (dep == AGX_SCHEDULE_CLASS_COVERAGE || barrier)
+         serialize(node, &coverage);
+
+      if (dep == AGX_SCHEDULE_CLASS_PRELOAD)
+         serialize(node, &preload);
+      else
+         add_dep(node, preload);
+   }
+
+   free(last_write);
+
+   return dag;
+}
+
+/*
+ * Calculate the change in register pressure from scheduling a given
+ * instruction. Equivalently, calculate the difference in the number of live
+ * registers before and after the instruction, given the live set after the
+ * instruction. This calculation follows immediately from the dataflow
+ * definition of liveness:
+ *
+ *      live_in = (live_out - KILL) + GEN
+ */
+static signed
+calculate_pressure_delta(agx_instr *I, BITSET_WORD *live)
+{
+   signed delta = 0;
+
+   /* Destinations must be unique */
+   agx_foreach_ssa_dest(I, d) {
+      if (BITSET_TEST(live, I->dest[d].value))
+         delta -= agx_write_registers(I, d);
+   }
+
+   agx_foreach_ssa_src(I, src) {
+      /* Filter duplicates */
+      bool dupe = false;
+
+      for (unsigned i = 0; i < src; ++i) {
+         if (agx_is_equiv(I->src[i], I->src[src])) {
+            dupe = true;
+            break;
+         }
+      }
+
+      if (!dupe && !BITSET_TEST(live, I->src[src].value))
+         delta += agx_read_registers(I, src);
+   }
+
+   return delta;
+}
+
+/*
+ * Choose the next instruction, bottom-up. For now we use a simple greedy
+ * heuristic: choose the instruction that has the best effect on liveness.
+ */
+static struct sched_node *
+choose_instr(struct sched_ctx *s)
+{
+   int32_t min_delta = INT32_MAX;
+   struct sched_node *best = NULL;
+
+   list_for_each_entry(struct sched_node, n, &s->dag->heads, dag.link) {
+      int32_t delta = calculate_pressure_delta(n->instr, s->live);
+
+      if (delta < min_delta) {
+         best = n;
+         min_delta = delta;
+      }
+   }
+
+   return best;
+}
+
+static void
+pressure_schedule_block(agx_context *ctx, agx_block *block, struct sched_ctx *s)
+{
+   /* off by a constant, that's ok */
+   signed pressure = 0;
+   signed orig_max_pressure = 0;
+   unsigned nr_ins = 0;
+
+   memcpy(s->live, block->live_out,
+          BITSET_WORDS(ctx->alloc) * sizeof(BITSET_WORD));
+
+   agx_foreach_instr_in_block_rev(block, I) {
+      pressure += calculate_pressure_delta(I, s->live);
+      orig_max_pressure = MAX2(pressure, orig_max_pressure);
+      agx_liveness_ins_update(s->live, I);
+      nr_ins++;
+   }
+
+   memcpy(s->live, block->live_out,
+          BITSET_WORDS(ctx->alloc) * sizeof(BITSET_WORD));
+
+   /* off by a constant, that's ok */
+   signed max_pressure = 0;
+   pressure = 0;
+
+   struct sched_node **schedule = calloc(nr_ins, sizeof(struct sched_node *));
+   nr_ins = 0;
+
+   while (!list_is_empty(&s->dag->heads)) {
+      struct sched_node *node = choose_instr(s);
+      pressure += calculate_pressure_delta(node->instr, s->live);
+      max_pressure = MAX2(pressure, max_pressure);
+      dag_prune_head(s->dag, &node->dag);
+
+      schedule[nr_ins++] = node;
+      agx_liveness_ins_update(s->live, node->instr);
+   }
+
+   /* Bail if it looks like it's worse */
+   if (max_pressure >= orig_max_pressure) {
+      free(schedule);
+      return;
+   }
+
+   /* Apply the schedule */
+   for (unsigned i = 0; i < nr_ins; ++i) {
+      agx_remove_instruction(schedule[i]->instr);
+      list_add(&schedule[i]->instr->link, &block->instructions);
+   }
+
+   free(schedule);
+}
+
+void
+agx_pressure_schedule(agx_context *ctx)
+{
+   agx_compute_liveness(ctx);
+   void *memctx = ralloc_context(ctx);
+   BITSET_WORD *live =
+      ralloc_array(memctx, BITSET_WORD, BITSET_WORDS(ctx->alloc));
+
+   agx_foreach_block(ctx, block) {
+      struct sched_ctx sctx = {
+         .dag = create_dag(ctx, block, memctx),
+         .live = live,
+      };
+
+      pressure_schedule_block(ctx, block, &sctx);
+   }
+
+   /* Clean up after liveness analysis */
+   agx_foreach_instr_global(ctx, I) {
+      agx_foreach_ssa_src(I, s)
+         I->src[s].kill = false;
+   }
+
+   ralloc_free(memctx);
+}
diff --git a/src/asahi/compiler/meson.build b/src/asahi/compiler/meson.build

index e3e2730..171c2de 100644 (file)
--- a/src/asahi/compiler/meson.build
+++ b/src/asahi/compiler/meson.build
@@ -24,6 +24,7 @@ libasahi_agx_files = files(
    'agx_pack.c',
    'agx_performance.c',
    'agx_print.c',
+  'agx_pressure_schedule.c',
    'agx_ir.c',
    'agx_opt_cse.c',
    'agx_opt_empty_else.c',
author	Alyssa Rosenzweig <alyssa@rosenzweig.io>
	Sat, 15 Apr 2023 23:49:47 +0000 (19:49 -0400)
committer	Marge Bot <emma+marge@anholt.net>
	Tue, 5 Sep 2023 18:50:34 +0000 (18:50 +0000)
src/asahi/compiler/agx_compile.c		patch \| blob \| history
src/asahi/compiler/agx_compiler.h		patch \| blob \| history
src/asahi/compiler/agx_debug.h		patch \| blob \| history
src/asahi/compiler/agx_pressure_schedule.c	[new file with mode: 0644]	patch \| blob
src/asahi/compiler/meson.build		patch \| blob \| history