--- /dev/null
+/*
+ * Copyright 2023 Alyssa Rosenzweig
+ * Copyright 2022 Collabora Ltd.
+ * SPDX-License-Identifier: MIT
+ */
+
+/* Bottom-up local scheduler to reduce register pressure */
+
+#include "util/dag.h"
+#include "agx_compiler.h"
+#include "agx_opcodes.h"
+
+struct sched_ctx {
+ /* Dependency graph */
+ struct dag *dag;
+
+ /* Live set */
+ BITSET_WORD *live;
+};
+
+struct sched_node {
+ struct dag_node dag;
+
+ /* Instruction this node represents */
+ agx_instr *instr;
+};
+
+static void
+add_dep(struct sched_node *a, struct sched_node *b)
+{
+ assert(a != b && "no self-dependencies");
+
+ if (a && b)
+ dag_add_edge(&a->dag, &b->dag, 0);
+}
+
+static void
+serialize(struct sched_node *a, struct sched_node **b)
+{
+ add_dep(a, *b);
+ *b = a;
+}
+
+static struct dag *
+create_dag(agx_context *ctx, agx_block *block, void *memctx)
+{
+ struct dag *dag = dag_create(ctx);
+
+ struct sched_node **last_write =
+ calloc(ctx->alloc, sizeof(struct sched_node *));
+ struct sched_node *coverage = NULL;
+ struct sched_node *preload = NULL;
+
+ /* Last memory load, to serialize stores against */
+ struct sched_node *memory_load = NULL;
+
+ /* Last memory store, to serialize loads and stores against */
+ struct sched_node *memory_store = NULL;
+
+ agx_foreach_instr_in_block(block, I) {
+ /* Don't touch control flow */
+ if (I->op == AGX_OPCODE_LOGICAL_END)
+ break;
+
+ struct sched_node *node = rzalloc(memctx, struct sched_node);
+ node->instr = I;
+ dag_init_node(dag, &node->dag);
+
+ /* Reads depend on writes, no other hazards in SSA */
+ agx_foreach_ssa_src(I, s) {
+ add_dep(node, last_write[I->src[s].value]);
+ }
+
+ agx_foreach_ssa_dest(I, d) {
+ assert(I->dest[d].value < ctx->alloc);
+ last_write[I->dest[d].value] = node;
+ }
+
+ /* Classify the instruction and add dependencies according to the class */
+ enum agx_schedule_class dep = agx_opcodes_info[I->op].schedule_class;
+ assert(dep != AGX_SCHEDULE_CLASS_INVALID && "invalid instruction seen");
+
+ bool barrier = dep == AGX_SCHEDULE_CLASS_BARRIER;
+
+ if (dep == AGX_SCHEDULE_CLASS_STORE)
+ add_dep(node, memory_load);
+ else if (dep == AGX_SCHEDULE_CLASS_ATOMIC || barrier)
+ serialize(node, &memory_load);
+
+ if (dep == AGX_SCHEDULE_CLASS_LOAD || dep == AGX_SCHEDULE_CLASS_STORE ||
+ dep == AGX_SCHEDULE_CLASS_ATOMIC || barrier)
+ serialize(node, &memory_store);
+
+ if (dep == AGX_SCHEDULE_CLASS_COVERAGE || barrier)
+ serialize(node, &coverage);
+
+ if (dep == AGX_SCHEDULE_CLASS_PRELOAD)
+ serialize(node, &preload);
+ else
+ add_dep(node, preload);
+ }
+
+ free(last_write);
+
+ return dag;
+}
+
+/*
+ * Calculate the change in register pressure from scheduling a given
+ * instruction. Equivalently, calculate the difference in the number of live
+ * registers before and after the instruction, given the live set after the
+ * instruction. This calculation follows immediately from the dataflow
+ * definition of liveness:
+ *
+ * live_in = (live_out - KILL) + GEN
+ */
+static signed
+calculate_pressure_delta(agx_instr *I, BITSET_WORD *live)
+{
+ signed delta = 0;
+
+ /* Destinations must be unique */
+ agx_foreach_ssa_dest(I, d) {
+ if (BITSET_TEST(live, I->dest[d].value))
+ delta -= agx_write_registers(I, d);
+ }
+
+ agx_foreach_ssa_src(I, src) {
+ /* Filter duplicates */
+ bool dupe = false;
+
+ for (unsigned i = 0; i < src; ++i) {
+ if (agx_is_equiv(I->src[i], I->src[src])) {
+ dupe = true;
+ break;
+ }
+ }
+
+ if (!dupe && !BITSET_TEST(live, I->src[src].value))
+ delta += agx_read_registers(I, src);
+ }
+
+ return delta;
+}
+
+/*
+ * Choose the next instruction, bottom-up. For now we use a simple greedy
+ * heuristic: choose the instruction that has the best effect on liveness.
+ */
+static struct sched_node *
+choose_instr(struct sched_ctx *s)
+{
+ int32_t min_delta = INT32_MAX;
+ struct sched_node *best = NULL;
+
+ list_for_each_entry(struct sched_node, n, &s->dag->heads, dag.link) {
+ int32_t delta = calculate_pressure_delta(n->instr, s->live);
+
+ if (delta < min_delta) {
+ best = n;
+ min_delta = delta;
+ }
+ }
+
+ return best;
+}
+
+static void
+pressure_schedule_block(agx_context *ctx, agx_block *block, struct sched_ctx *s)
+{
+ /* off by a constant, that's ok */
+ signed pressure = 0;
+ signed orig_max_pressure = 0;
+ unsigned nr_ins = 0;
+
+ memcpy(s->live, block->live_out,
+ BITSET_WORDS(ctx->alloc) * sizeof(BITSET_WORD));
+
+ agx_foreach_instr_in_block_rev(block, I) {
+ pressure += calculate_pressure_delta(I, s->live);
+ orig_max_pressure = MAX2(pressure, orig_max_pressure);
+ agx_liveness_ins_update(s->live, I);
+ nr_ins++;
+ }
+
+ memcpy(s->live, block->live_out,
+ BITSET_WORDS(ctx->alloc) * sizeof(BITSET_WORD));
+
+ /* off by a constant, that's ok */
+ signed max_pressure = 0;
+ pressure = 0;
+
+ struct sched_node **schedule = calloc(nr_ins, sizeof(struct sched_node *));
+ nr_ins = 0;
+
+ while (!list_is_empty(&s->dag->heads)) {
+ struct sched_node *node = choose_instr(s);
+ pressure += calculate_pressure_delta(node->instr, s->live);
+ max_pressure = MAX2(pressure, max_pressure);
+ dag_prune_head(s->dag, &node->dag);
+
+ schedule[nr_ins++] = node;
+ agx_liveness_ins_update(s->live, node->instr);
+ }
+
+ /* Bail if it looks like it's worse */
+ if (max_pressure >= orig_max_pressure) {
+ free(schedule);
+ return;
+ }
+
+ /* Apply the schedule */
+ for (unsigned i = 0; i < nr_ins; ++i) {
+ agx_remove_instruction(schedule[i]->instr);
+ list_add(&schedule[i]->instr->link, &block->instructions);
+ }
+
+ free(schedule);
+}
+
+void
+agx_pressure_schedule(agx_context *ctx)
+{
+ agx_compute_liveness(ctx);
+ void *memctx = ralloc_context(ctx);
+ BITSET_WORD *live =
+ ralloc_array(memctx, BITSET_WORD, BITSET_WORDS(ctx->alloc));
+
+ agx_foreach_block(ctx, block) {
+ struct sched_ctx sctx = {
+ .dag = create_dag(ctx, block, memctx),
+ .live = live,
+ };
+
+ pressure_schedule_block(ctx, block, &sctx);
+ }
+
+ /* Clean up after liveness analysis */
+ agx_foreach_instr_global(ctx, I) {
+ agx_foreach_ssa_src(I, s)
+ I->src[s].kill = false;
+ }
+
+ ralloc_free(memctx);
+}