ir3/ra: Add a validation pass
authorConnor Abbott <cwabbott0@gmail.com>
Mon, 26 Apr 2021 21:05:53 +0000 (23:05 +0200)
committerEmma Anholt <emma@anholt.net>
Thu, 10 Jun 2021 19:24:06 +0000 (12:24 -0700)
This helps catch tricky-to-debug bugs in RA, or helps rule them out.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9842>

src/freedreno/ir3/ir3_ra.c
src/freedreno/ir3/ir3_ra.h
src/freedreno/ir3/ir3_ra_validate.c [new file with mode: 0644]
src/freedreno/ir3/meson.build

index a8a523f..34f828f 100644 (file)
@@ -1936,6 +1936,8 @@ ir3_ra(struct ir3_shader_variant *v)
 
        handle_block(ctx, ir3_start_block(v->ir));
 
+       ir3_ra_validate(v, ctx->full.size, ctx->half.size, live->block_count);
+
        /* Strip array-ness and SSA-ness at the end, because various helpers still
         * need to work even on definitions that have already been assigned. For
         * example, we need to preserve array-ness so that array live-ins have the
index 672b536..7e5b063 100644 (file)
@@ -187,6 +187,9 @@ void ir3_calc_pressure(struct ir3_shader_variant *v,
                                           struct ir3_liveness *live,
                                           struct ir3_pressure *max_pressure);
 
+void ir3_ra_validate(struct ir3_shader_variant *v,
+                                        unsigned full_size, unsigned half_size, unsigned block_count);
+
 void ir3_lower_copies(struct ir3_shader_variant *v);
 
 /* Register interval datastructure
diff --git a/src/freedreno/ir3/ir3_ra_validate.c b/src/freedreno/ir3/ir3_ra_validate.c
new file mode 100644 (file)
index 0000000..b9d3e81
--- /dev/null
@@ -0,0 +1,556 @@
+/*
+ * Copyright (C) 2021 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "util/ralloc.h"
+#include "ir3_ra.h"
+#include "ir3_shader.h"
+
+/* This file implements a validation pass for register allocation. We check
+ * that the assignment of SSA values to registers is "valid", in the sense
+ * that each original definition reaches all of its uses without being
+ * clobbered by something else.
+ *
+ * The validation is a forward dataflow analysis. The state at each point
+ * consists of, for each physical register, the SSA value occupying it, or a
+ * few special values:
+ *
+ * - "unknown" is set initially, before the dataflow analysis assigns it a
+ *   value. This is the lattice bottom.
+ * - Values at the start get "undef", which acts like a special SSA value that
+ *   indicates it is never written.
+ * - "overdefined" registers are set to more than one value, depending on
+ *   which path you take to get to the spot. This is the lattice top.
+ *
+ * Overdefined is necessary to distinguish because in some programs, like this
+ * simple example, it's perfectly normal and allowed:
+ *
+ * if (...) {
+ *    mov.u32u32 ssa_1(r1.x), ...
+ *    ...
+ * } else {
+ *    mov.u32u32 ssa_2(r1.x), ...
+ *    ...
+ * }
+ * // r1.x is overdefined here!
+ *
+ * However, if an ssa value after the if is accidentally assigned to r1.x, we
+ * need to remember that it's invalid to catch the mistake. Overdef has to be
+ * distinguished from undef so that the state forms a valid lattice to
+ * guarantee that the analysis always terminates. We could avoid relying on
+ * overdef by using liveness analysis, but not relying on liveness has the
+ * benefit that we can catch bugs in liveness analysis too.
+ *
+ * One tricky thing we have to handle is the coalescing of splits/collects,
+ * which means that multiple SSA values can occupy a register at the same
+ * time. While we could use the same merge set indices that RA uses, again
+ * that would rely on the merge set calculation being correct which we don't
+ * want to. Instead we treat splits/collects as transfer instructions, similar
+ * to the parallelcopy instructions inserted by RA, and have them copy their
+ * sources to their destinations. This means that each physreg must carry the
+ * SSA def assigned to it plus an offset into that definition, and when
+ * validating sources we must look through splits/collects to find the
+ * "original" source for each subregister.
+ */
+
+#define UNKNOWN ((struct ir3_register *)NULL)
+#define UNDEF ((struct ir3_register *)(uintptr_t)1)
+#define OVERDEF ((struct ir3_register *)(uintptr_t)2)
+
+struct reg_state {
+       struct ir3_register *def;
+       unsigned offset;
+};
+
+struct file_state {
+       struct reg_state regs[RA_MAX_FILE_SIZE];
+};
+
+struct reaching_state {
+       struct file_state half, full, shared;
+};
+
+struct ra_val_ctx {
+       struct ir3_instruction *current_instr;
+
+       struct reaching_state reaching;
+       struct reaching_state *block_reaching;
+       unsigned block_count;
+
+       unsigned full_size, half_size;
+
+       bool merged_regs;
+
+       bool failed;
+};
+
+static void
+validate_error(struct ra_val_ctx *ctx, const char *condstr)
+{
+       fprintf(stderr, "ra validation fail: %s\n", condstr);
+       fprintf(stderr, "  -> for instruction: ");
+       ir3_print_instr(ctx->current_instr);
+       abort();
+}
+
+#define validate_assert(ctx, cond) do { \
+       if (!(cond)) { \
+               validate_error(ctx, #cond); \
+       } } while (0)
+
+static unsigned
+get_file_size(struct ra_val_ctx *ctx, struct ir3_register *reg)
+{
+       if (reg->flags & IR3_REG_SHARED)
+               return RA_SHARED_SIZE;
+       else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
+               return ctx->full_size;
+       else
+               return ctx->half_size;
+}
+
+/* Validate simple things, like the registers being in-bounds. This way we
+ * don't have to worry about out-of-bounds accesses later.
+ */
+
+static void
+validate_simple(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
+{
+       ctx->current_instr = instr;
+       ra_foreach_dst (dst, instr) {
+               unsigned dst_max = ra_reg_get_physreg(dst) + reg_size(dst);
+               validate_assert(ctx, dst_max <= get_file_size(ctx, dst));
+       }
+
+       ra_foreach_src (src, instr) {
+               unsigned src_max = ra_reg_get_physreg(src) + reg_size(src);
+               validate_assert(ctx, src_max <= get_file_size(ctx, src));
+       }
+}
+
+/* This is the lattice operator. */
+static bool
+merge_reg(struct reg_state *dst, const struct reg_state *src)
+{
+       if (dst->def == UNKNOWN) {
+               *dst = *src;
+               return src->def != UNKNOWN;
+       } else if (dst->def == OVERDEF) {
+               return false;
+       } else {
+               if (src->def == UNKNOWN)
+                       return false;
+               else if (src->def == OVERDEF) {
+                       *dst = *src;
+                       return true;
+               } else {
+                       if (dst->def != src->def || dst->offset != src->offset) {
+                               dst->def = OVERDEF;
+                               dst->offset = 0;
+                               return true;
+                       } else {
+                               return false;
+                       }
+               }
+       }
+}
+
+static bool
+merge_file(struct file_state *dst, const struct file_state *src, unsigned size)
+{
+       bool progress = false;
+       for (unsigned i = 0; i < size; i++)
+               progress |= merge_reg(&dst->regs[i], &src->regs[i]);
+       return progress;
+}
+
+static bool
+merge_state(struct ra_val_ctx *ctx, struct reaching_state *dst,
+                       const struct reaching_state *src)
+{
+       bool progress = false;
+       progress |= merge_file(&dst->full, &src->full, ctx->full_size);
+       progress |= merge_file(&dst->half, &src->half, ctx->half_size);
+       progress |= merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE);
+       return progress;
+}
+
+static struct file_state *
+ra_val_get_file(struct ra_val_ctx *ctx, struct ir3_register *reg)
+{
+       if (reg->flags & IR3_REG_SHARED)
+               return &ctx->reaching.shared;
+       else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
+               return &ctx->reaching.full;
+       else
+               return &ctx->reaching.half;
+}
+
+static void
+propagate_normal_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
+{
+       ra_foreach_dst (dst, instr) {
+               struct file_state *file = ra_val_get_file(ctx, dst);
+               physreg_t physreg = ra_reg_get_physreg(dst);
+               for (unsigned i = 0; i < reg_size(dst); i++) {
+                       file->regs[physreg + i] = (struct reg_state) {
+                               .def = dst,
+                               .offset = i,
+                       };
+               }
+       }
+}
+
+static void
+propagate_split(struct ra_val_ctx *ctx, struct ir3_instruction *split)
+{
+       struct ir3_register *dst = split->regs[0];
+       struct ir3_register *src = split->regs[1];
+       physreg_t dst_physreg = ra_reg_get_physreg(dst);
+       physreg_t src_physreg = ra_reg_get_physreg(src);
+       struct file_state *file = ra_val_get_file(ctx, dst);
+
+       unsigned offset = split->split.off * reg_elem_size(src);
+       for (unsigned i = 0; i < reg_elem_size(src); i++) {
+               file->regs[dst_physreg + i] = file->regs[src_physreg + offset + i];
+       }
+}
+
+static void
+propagate_collect(struct ra_val_ctx *ctx, struct ir3_instruction *collect)
+{
+       struct ir3_register *dst = collect->regs[0];
+       physreg_t dst_physreg = ra_reg_get_physreg(dst);
+       struct file_state *file = ra_val_get_file(ctx, dst);
+
+       unsigned size = reg_size(dst);
+       struct reg_state srcs[size];
+
+       for (unsigned i = 1; i < collect->regs_count; i++) {
+               struct ir3_register *src = collect->regs[i];
+               unsigned dst_offset = (i - 1) * reg_elem_size(dst);
+               for (unsigned j = 0; j < reg_elem_size(dst); j++) {
+                       if (!ra_reg_is_src(src)) {
+                               srcs[dst_offset + j] = (struct reg_state) {
+                                       .def = dst,
+                                       .offset = dst_offset + j,
+                               };
+                       } else {
+                               physreg_t src_physreg = ra_reg_get_physreg(src);
+                               srcs[dst_offset + j] = file->regs[src_physreg + j];
+                       }
+               }
+       }
+
+       for (unsigned i = 0; i < size; i++)
+               file->regs[dst_physreg + i] = srcs[i];
+}
+
+static void
+propagate_parallelcopy(struct ra_val_ctx *ctx, struct ir3_instruction *pcopy)
+{
+       unsigned size = 0;
+       for (unsigned i = 0; i < pcopy->regs_count / 2; i++) {
+               size += reg_size(pcopy->regs[i]);
+       }
+
+       struct reg_state srcs[size];
+
+       unsigned offset = 0;
+       for (unsigned i = 0; i < pcopy->regs_count / 2; i++) {
+               struct ir3_register *dst = pcopy->regs[i];
+               struct ir3_register *src = pcopy->regs[i + pcopy->regs_count / 2];
+               struct file_state *file = ra_val_get_file(ctx, dst);
+
+               for (unsigned j = 0; j < reg_size(dst); j++) {
+                       if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) {
+                               srcs[offset + j] = (struct reg_state) {
+                                       .def = dst,
+                                       .offset = j,
+                               };
+                       } else {
+                               physreg_t src_physreg = ra_reg_get_physreg(src);
+                               srcs[offset + j] = file->regs[src_physreg + j];
+                       }
+               }
+
+               offset += reg_size(dst);
+       }
+       assert(offset == size);
+
+       offset = 0;
+       for (unsigned i = 0; i < pcopy->regs_count / 2; i++) {
+               struct ir3_register *dst = pcopy->regs[i];
+               physreg_t dst_physreg = ra_reg_get_physreg(dst);
+               struct file_state *file = ra_val_get_file(ctx, dst);
+
+               for (unsigned j = 0; j < reg_size(dst); j++)
+                       file->regs[dst_physreg + j] = srcs[offset + j];
+
+               offset += reg_size(dst);
+       }
+       assert(offset == size);
+}
+
+static void
+propagate_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
+{
+       if (instr->opc == OPC_META_SPLIT)
+               propagate_split(ctx, instr);
+       else if (instr->opc == OPC_META_COLLECT)
+               propagate_collect(ctx, instr);
+       else if (instr->opc == OPC_META_PARALLEL_COPY)
+               propagate_parallelcopy(ctx, instr);
+       else
+               propagate_normal_instr(ctx, instr);
+}
+
+static bool
+propagate_block(struct ra_val_ctx *ctx, struct ir3_block *block)
+{
+       ctx->reaching = ctx->block_reaching[block->index];
+
+       foreach_instr (instr, &block->instr_list) {
+               propagate_instr(ctx, instr);
+       }
+
+       bool progress = false;
+       for (unsigned i = 0; i < 2; i++) {
+               if (!block->successors[i])
+                       continue;
+               progress |= merge_state(ctx,
+                                                               &ctx->block_reaching[block->successors[i]->index],
+                                                               &ctx->reaching);
+       }
+       return progress;
+}
+
+static void
+chase_definition(struct reg_state *state)
+{
+       while (true) {
+               struct ir3_instruction *instr = state->def->instr;
+               switch (instr->opc) {
+               case OPC_META_SPLIT: {
+                       struct ir3_register *new_def = instr->regs[1]->def;
+                       unsigned offset = instr->split.off * reg_elem_size(new_def);
+                       *state = (struct reg_state) {
+                               .def = new_def,
+                               .offset = state->offset + offset,
+                       };
+                       break;
+               }
+               case OPC_META_COLLECT: {
+                       unsigned src_idx = state->offset / reg_elem_size(state->def);
+                       unsigned src_offset = state->offset % reg_elem_size(state->def);
+                       struct ir3_register *new_def = instr->regs[src_idx + 1]->def;
+                       if (new_def) {
+                               *state = (struct reg_state) {
+                                       .def = new_def,
+                                       .offset = src_offset,
+                               };
+                       } else {
+                               /* Bail on immed/const */
+                               return;
+                       }
+                       break;
+               }
+               case OPC_META_PARALLEL_COPY: {
+                       unsigned dst_idx = ~0;
+                       for (unsigned i = 0; i < instr->regs_count; i++) {
+                               if (instr->regs[i] == state->def) {
+                                       dst_idx = i;
+                                       break;
+                               }
+                       }
+                       assert(dst_idx != ~0);
+
+                       struct ir3_register *new_def =
+                               instr->regs[dst_idx + instr->regs_count / 2]->def;
+                       if (new_def) {
+                               state->def = new_def;
+                       } else {
+                               /* Bail on immed/const */
+                               return;
+                       }
+                       break;
+               }
+               default:
+                       return;
+               }
+       }
+}
+
+static void
+dump_reg_state(struct reg_state *state)
+{
+       if (state->def == UNDEF) {
+               fprintf(stderr, "no reaching definition");
+       } else if (state->def == OVERDEF) {
+               fprintf(stderr, "more than one reaching definition or partial definition");
+       } else {
+               /* The analysis should always remove UNKNOWN eventually. */
+               assert(state->def != UNKNOWN);
+
+               fprintf(stderr, "ssa_%u:%u(%sr%u.%c) + %u",
+                               state->def->instr->serialno, state->def->name,
+                               (state->def->flags & IR3_REG_HALF) ? "h" : "",
+                               state->def->num / 4, "xyzw"[state->def->num % 4],
+                               state->offset);
+       }
+}
+
+static void
+check_reaching_src(struct ra_val_ctx *ctx, struct ir3_instruction *instr,
+                                  struct ir3_register *src)
+{
+       struct file_state *file = ra_val_get_file(ctx, src);
+       physreg_t physreg = ra_reg_get_physreg(src);
+       for (unsigned i = 0; i < reg_size(src); i++) {
+               struct reg_state expected = (struct reg_state) {
+                       .def = src->def,
+                       .offset = i,
+               };
+               chase_definition(&expected);
+
+               struct reg_state actual = file->regs[physreg + i];
+
+               if (expected.def != actual.def ||
+                       expected.offset != actual.offset) {
+                       fprintf(stderr, "ra validation fail: wrong definition reaches source ssa_%u:%u + %u\n",
+                                       src->def->instr->serialno, src->def->name, i);
+                       fprintf(stderr, "expected: ");
+                       dump_reg_state(&expected);
+                       fprintf(stderr, "\n");
+                       fprintf(stderr, "actual: ");
+                       dump_reg_state(&actual);
+                       fprintf(stderr, "\n");
+                       fprintf(stderr, "-> for instruction: ");
+                       ir3_print_instr(instr);
+                       ctx->failed = true;
+               }
+       }
+}
+
+static void
+check_reaching_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
+{
+       if (instr->opc == OPC_META_SPLIT ||
+               instr->opc == OPC_META_COLLECT ||
+               instr->opc == OPC_META_PARALLEL_COPY ||
+               instr->opc == OPC_META_PHI) {
+               return;
+       }
+
+       ra_foreach_src (src, instr) {
+               check_reaching_src(ctx, instr, src);
+       }
+}
+
+static void
+check_reaching_block(struct ra_val_ctx *ctx, struct ir3_block *block)
+{
+       ctx->reaching = ctx->block_reaching[block->index];
+
+       foreach_instr (instr, &block->instr_list) {
+               check_reaching_instr(ctx, instr);
+               propagate_instr(ctx, instr);
+       }
+
+       for (unsigned i = 0; i < 2; i++) {
+               struct ir3_block *succ = block->successors[i];
+               if (!succ)
+                       continue;
+
+               unsigned pred_idx = ir3_block_get_pred_index(succ, block);
+               foreach_instr (instr, &succ->instr_list) {
+                       if (instr->opc != OPC_META_PHI)
+                               break;
+                       if (instr->regs[1 + pred_idx]->def)
+                               check_reaching_src(ctx, instr, instr->regs[1 + pred_idx]);
+               }
+       }
+}
+
+static void
+check_reaching_defs(struct ra_val_ctx *ctx, struct ir3 *ir)
+{
+       ctx->block_reaching =
+               rzalloc_array(ctx, struct reaching_state, ctx->block_count);
+
+       struct reaching_state *start = &ctx->block_reaching[0];
+       for (unsigned i = 0; i < ctx->full_size; i++)
+               start->full.regs[i].def = UNDEF;
+       for (unsigned i = 0; i < ctx->half_size; i++)
+               start->half.regs[i].def = UNDEF;
+       for (unsigned i = 0; i < RA_SHARED_SIZE; i++)
+               start->shared.regs[i].def = UNDEF;
+
+       bool progress;
+       do {
+               progress = false;
+               foreach_block (block, &ir->block_list) {
+                       progress |= propagate_block(ctx, block);
+               }
+       } while (progress);
+       
+       foreach_block (block, &ir->block_list) {
+               check_reaching_block(ctx, block);
+       }
+
+       if (ctx->failed) {
+               fprintf(stderr, "failing shader:\n");
+               ir3_print(ir);
+               abort();
+       }
+}
+
+void
+ir3_ra_validate(struct ir3_shader_variant *v,
+                           unsigned full_size, unsigned half_size, unsigned block_count)
+{
+#ifdef NDEBUG
+#  define VALIDATE 0
+#else
+#  define VALIDATE 1
+#endif
+
+       if (!VALIDATE)
+               return;
+
+       struct ra_val_ctx *ctx = rzalloc(NULL, struct ra_val_ctx);
+       ctx->merged_regs = v->mergedregs;
+       ctx->full_size = full_size;
+       ctx->half_size = half_size;
+       ctx->block_count = block_count;
+
+       foreach_block (block, &v->ir->block_list) {
+               foreach_instr (instr, &block->instr_list) {
+                       validate_simple(ctx, instr);
+               }
+       }
+
+       check_reaching_defs(ctx, v->ir);
+
+       ralloc_free(ctx);
+}
+
index c27b267..28c8e88 100644 (file)
@@ -102,6 +102,7 @@ libfreedreno_ir3_files = files(
   'ir3_print.c',
   'ir3_ra.c',
   'ir3_ra.h',
+  'ir3_ra_validate.c',
   'ir3_sched.c',
   'ir3_shader.c',
   'ir3_shader.h',