From: Connor Abbott Date: Mon, 26 Apr 2021 21:05:53 +0000 (+0200) Subject: ir3/ra: Add a validation pass X-Git-Tag: upstream/21.2.3~2094 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2f51379d036cefa1a553d087e1b61d2a642c3394;p=platform%2Fupstream%2Fmesa.git ir3/ra: Add a validation pass This helps catch tricky-to-debug bugs in RA, or helps rule them out. Part-of: --- diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c index a8a523f..34f828f 100644 --- a/src/freedreno/ir3/ir3_ra.c +++ b/src/freedreno/ir3/ir3_ra.c @@ -1936,6 +1936,8 @@ ir3_ra(struct ir3_shader_variant *v) handle_block(ctx, ir3_start_block(v->ir)); + ir3_ra_validate(v, ctx->full.size, ctx->half.size, live->block_count); + /* Strip array-ness and SSA-ness at the end, because various helpers still * need to work even on definitions that have already been assigned. For * example, we need to preserve array-ness so that array live-ins have the diff --git a/src/freedreno/ir3/ir3_ra.h b/src/freedreno/ir3/ir3_ra.h index 672b536..7e5b063 100644 --- a/src/freedreno/ir3/ir3_ra.h +++ b/src/freedreno/ir3/ir3_ra.h @@ -187,6 +187,9 @@ void ir3_calc_pressure(struct ir3_shader_variant *v, struct ir3_liveness *live, struct ir3_pressure *max_pressure); +void ir3_ra_validate(struct ir3_shader_variant *v, + unsigned full_size, unsigned half_size, unsigned block_count); + void ir3_lower_copies(struct ir3_shader_variant *v); /* Register interval datastructure diff --git a/src/freedreno/ir3/ir3_ra_validate.c b/src/freedreno/ir3/ir3_ra_validate.c new file mode 100644 index 0000000..b9d3e81 --- /dev/null +++ b/src/freedreno/ir3/ir3_ra_validate.c @@ -0,0 +1,556 @@ +/* + * Copyright (C) 2021 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "util/ralloc.h" +#include "ir3_ra.h" +#include "ir3_shader.h" + +/* This file implements a validation pass for register allocation. We check + * that the assignment of SSA values to registers is "valid", in the sense + * that each original definition reaches all of its uses without being + * clobbered by something else. + * + * The validation is a forward dataflow analysis. The state at each point + * consists of, for each physical register, the SSA value occupying it, or a + * few special values: + * + * - "unknown" is set initially, before the dataflow analysis assigns it a + * value. This is the lattice bottom. + * - Values at the start get "undef", which acts like a special SSA value that + * indicates it is never written. + * - "overdefined" registers are set to more than one value, depending on + * which path you take to get to the spot. This is the lattice top. + * + * Overdefined is necessary to distinguish because in some programs, like this + * simple example, it's perfectly normal and allowed: + * + * if (...) { + * mov.u32u32 ssa_1(r1.x), ... + * ... + * } else { + * mov.u32u32 ssa_2(r1.x), ... + * ... + * } + * // r1.x is overdefined here! + * + * However, if an ssa value after the if is accidentally assigned to r1.x, we + * need to remember that it's invalid to catch the mistake. Overdef has to be + * distinguished from undef so that the state forms a valid lattice to + * guarantee that the analysis always terminates. We could avoid relying on + * overdef by using liveness analysis, but not relying on liveness has the + * benefit that we can catch bugs in liveness analysis too. + * + * One tricky thing we have to handle is the coalescing of splits/collects, + * which means that multiple SSA values can occupy a register at the same + * time. While we could use the same merge set indices that RA uses, again + * that would rely on the merge set calculation being correct which we don't + * want to. Instead we treat splits/collects as transfer instructions, similar + * to the parallelcopy instructions inserted by RA, and have them copy their + * sources to their destinations. This means that each physreg must carry the + * SSA def assigned to it plus an offset into that definition, and when + * validating sources we must look through splits/collects to find the + * "original" source for each subregister. + */ + +#define UNKNOWN ((struct ir3_register *)NULL) +#define UNDEF ((struct ir3_register *)(uintptr_t)1) +#define OVERDEF ((struct ir3_register *)(uintptr_t)2) + +struct reg_state { + struct ir3_register *def; + unsigned offset; +}; + +struct file_state { + struct reg_state regs[RA_MAX_FILE_SIZE]; +}; + +struct reaching_state { + struct file_state half, full, shared; +}; + +struct ra_val_ctx { + struct ir3_instruction *current_instr; + + struct reaching_state reaching; + struct reaching_state *block_reaching; + unsigned block_count; + + unsigned full_size, half_size; + + bool merged_regs; + + bool failed; +}; + +static void +validate_error(struct ra_val_ctx *ctx, const char *condstr) +{ + fprintf(stderr, "ra validation fail: %s\n", condstr); + fprintf(stderr, " -> for instruction: "); + ir3_print_instr(ctx->current_instr); + abort(); +} + +#define validate_assert(ctx, cond) do { \ + if (!(cond)) { \ + validate_error(ctx, #cond); \ + } } while (0) + +static unsigned +get_file_size(struct ra_val_ctx *ctx, struct ir3_register *reg) +{ + if (reg->flags & IR3_REG_SHARED) + return RA_SHARED_SIZE; + else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF)) + return ctx->full_size; + else + return ctx->half_size; +} + +/* Validate simple things, like the registers being in-bounds. This way we + * don't have to worry about out-of-bounds accesses later. + */ + +static void +validate_simple(struct ra_val_ctx *ctx, struct ir3_instruction *instr) +{ + ctx->current_instr = instr; + ra_foreach_dst (dst, instr) { + unsigned dst_max = ra_reg_get_physreg(dst) + reg_size(dst); + validate_assert(ctx, dst_max <= get_file_size(ctx, dst)); + } + + ra_foreach_src (src, instr) { + unsigned src_max = ra_reg_get_physreg(src) + reg_size(src); + validate_assert(ctx, src_max <= get_file_size(ctx, src)); + } +} + +/* This is the lattice operator. */ +static bool +merge_reg(struct reg_state *dst, const struct reg_state *src) +{ + if (dst->def == UNKNOWN) { + *dst = *src; + return src->def != UNKNOWN; + } else if (dst->def == OVERDEF) { + return false; + } else { + if (src->def == UNKNOWN) + return false; + else if (src->def == OVERDEF) { + *dst = *src; + return true; + } else { + if (dst->def != src->def || dst->offset != src->offset) { + dst->def = OVERDEF; + dst->offset = 0; + return true; + } else { + return false; + } + } + } +} + +static bool +merge_file(struct file_state *dst, const struct file_state *src, unsigned size) +{ + bool progress = false; + for (unsigned i = 0; i < size; i++) + progress |= merge_reg(&dst->regs[i], &src->regs[i]); + return progress; +} + +static bool +merge_state(struct ra_val_ctx *ctx, struct reaching_state *dst, + const struct reaching_state *src) +{ + bool progress = false; + progress |= merge_file(&dst->full, &src->full, ctx->full_size); + progress |= merge_file(&dst->half, &src->half, ctx->half_size); + progress |= merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE); + return progress; +} + +static struct file_state * +ra_val_get_file(struct ra_val_ctx *ctx, struct ir3_register *reg) +{ + if (reg->flags & IR3_REG_SHARED) + return &ctx->reaching.shared; + else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF)) + return &ctx->reaching.full; + else + return &ctx->reaching.half; +} + +static void +propagate_normal_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr) +{ + ra_foreach_dst (dst, instr) { + struct file_state *file = ra_val_get_file(ctx, dst); + physreg_t physreg = ra_reg_get_physreg(dst); + for (unsigned i = 0; i < reg_size(dst); i++) { + file->regs[physreg + i] = (struct reg_state) { + .def = dst, + .offset = i, + }; + } + } +} + +static void +propagate_split(struct ra_val_ctx *ctx, struct ir3_instruction *split) +{ + struct ir3_register *dst = split->regs[0]; + struct ir3_register *src = split->regs[1]; + physreg_t dst_physreg = ra_reg_get_physreg(dst); + physreg_t src_physreg = ra_reg_get_physreg(src); + struct file_state *file = ra_val_get_file(ctx, dst); + + unsigned offset = split->split.off * reg_elem_size(src); + for (unsigned i = 0; i < reg_elem_size(src); i++) { + file->regs[dst_physreg + i] = file->regs[src_physreg + offset + i]; + } +} + +static void +propagate_collect(struct ra_val_ctx *ctx, struct ir3_instruction *collect) +{ + struct ir3_register *dst = collect->regs[0]; + physreg_t dst_physreg = ra_reg_get_physreg(dst); + struct file_state *file = ra_val_get_file(ctx, dst); + + unsigned size = reg_size(dst); + struct reg_state srcs[size]; + + for (unsigned i = 1; i < collect->regs_count; i++) { + struct ir3_register *src = collect->regs[i]; + unsigned dst_offset = (i - 1) * reg_elem_size(dst); + for (unsigned j = 0; j < reg_elem_size(dst); j++) { + if (!ra_reg_is_src(src)) { + srcs[dst_offset + j] = (struct reg_state) { + .def = dst, + .offset = dst_offset + j, + }; + } else { + physreg_t src_physreg = ra_reg_get_physreg(src); + srcs[dst_offset + j] = file->regs[src_physreg + j]; + } + } + } + + for (unsigned i = 0; i < size; i++) + file->regs[dst_physreg + i] = srcs[i]; +} + +static void +propagate_parallelcopy(struct ra_val_ctx *ctx, struct ir3_instruction *pcopy) +{ + unsigned size = 0; + for (unsigned i = 0; i < pcopy->regs_count / 2; i++) { + size += reg_size(pcopy->regs[i]); + } + + struct reg_state srcs[size]; + + unsigned offset = 0; + for (unsigned i = 0; i < pcopy->regs_count / 2; i++) { + struct ir3_register *dst = pcopy->regs[i]; + struct ir3_register *src = pcopy->regs[i + pcopy->regs_count / 2]; + struct file_state *file = ra_val_get_file(ctx, dst); + + for (unsigned j = 0; j < reg_size(dst); j++) { + if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) { + srcs[offset + j] = (struct reg_state) { + .def = dst, + .offset = j, + }; + } else { + physreg_t src_physreg = ra_reg_get_physreg(src); + srcs[offset + j] = file->regs[src_physreg + j]; + } + } + + offset += reg_size(dst); + } + assert(offset == size); + + offset = 0; + for (unsigned i = 0; i < pcopy->regs_count / 2; i++) { + struct ir3_register *dst = pcopy->regs[i]; + physreg_t dst_physreg = ra_reg_get_physreg(dst); + struct file_state *file = ra_val_get_file(ctx, dst); + + for (unsigned j = 0; j < reg_size(dst); j++) + file->regs[dst_physreg + j] = srcs[offset + j]; + + offset += reg_size(dst); + } + assert(offset == size); +} + +static void +propagate_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr) +{ + if (instr->opc == OPC_META_SPLIT) + propagate_split(ctx, instr); + else if (instr->opc == OPC_META_COLLECT) + propagate_collect(ctx, instr); + else if (instr->opc == OPC_META_PARALLEL_COPY) + propagate_parallelcopy(ctx, instr); + else + propagate_normal_instr(ctx, instr); +} + +static bool +propagate_block(struct ra_val_ctx *ctx, struct ir3_block *block) +{ + ctx->reaching = ctx->block_reaching[block->index]; + + foreach_instr (instr, &block->instr_list) { + propagate_instr(ctx, instr); + } + + bool progress = false; + for (unsigned i = 0; i < 2; i++) { + if (!block->successors[i]) + continue; + progress |= merge_state(ctx, + &ctx->block_reaching[block->successors[i]->index], + &ctx->reaching); + } + return progress; +} + +static void +chase_definition(struct reg_state *state) +{ + while (true) { + struct ir3_instruction *instr = state->def->instr; + switch (instr->opc) { + case OPC_META_SPLIT: { + struct ir3_register *new_def = instr->regs[1]->def; + unsigned offset = instr->split.off * reg_elem_size(new_def); + *state = (struct reg_state) { + .def = new_def, + .offset = state->offset + offset, + }; + break; + } + case OPC_META_COLLECT: { + unsigned src_idx = state->offset / reg_elem_size(state->def); + unsigned src_offset = state->offset % reg_elem_size(state->def); + struct ir3_register *new_def = instr->regs[src_idx + 1]->def; + if (new_def) { + *state = (struct reg_state) { + .def = new_def, + .offset = src_offset, + }; + } else { + /* Bail on immed/const */ + return; + } + break; + } + case OPC_META_PARALLEL_COPY: { + unsigned dst_idx = ~0; + for (unsigned i = 0; i < instr->regs_count; i++) { + if (instr->regs[i] == state->def) { + dst_idx = i; + break; + } + } + assert(dst_idx != ~0); + + struct ir3_register *new_def = + instr->regs[dst_idx + instr->regs_count / 2]->def; + if (new_def) { + state->def = new_def; + } else { + /* Bail on immed/const */ + return; + } + break; + } + default: + return; + } + } +} + +static void +dump_reg_state(struct reg_state *state) +{ + if (state->def == UNDEF) { + fprintf(stderr, "no reaching definition"); + } else if (state->def == OVERDEF) { + fprintf(stderr, "more than one reaching definition or partial definition"); + } else { + /* The analysis should always remove UNKNOWN eventually. */ + assert(state->def != UNKNOWN); + + fprintf(stderr, "ssa_%u:%u(%sr%u.%c) + %u", + state->def->instr->serialno, state->def->name, + (state->def->flags & IR3_REG_HALF) ? "h" : "", + state->def->num / 4, "xyzw"[state->def->num % 4], + state->offset); + } +} + +static void +check_reaching_src(struct ra_val_ctx *ctx, struct ir3_instruction *instr, + struct ir3_register *src) +{ + struct file_state *file = ra_val_get_file(ctx, src); + physreg_t physreg = ra_reg_get_physreg(src); + for (unsigned i = 0; i < reg_size(src); i++) { + struct reg_state expected = (struct reg_state) { + .def = src->def, + .offset = i, + }; + chase_definition(&expected); + + struct reg_state actual = file->regs[physreg + i]; + + if (expected.def != actual.def || + expected.offset != actual.offset) { + fprintf(stderr, "ra validation fail: wrong definition reaches source ssa_%u:%u + %u\n", + src->def->instr->serialno, src->def->name, i); + fprintf(stderr, "expected: "); + dump_reg_state(&expected); + fprintf(stderr, "\n"); + fprintf(stderr, "actual: "); + dump_reg_state(&actual); + fprintf(stderr, "\n"); + fprintf(stderr, "-> for instruction: "); + ir3_print_instr(instr); + ctx->failed = true; + } + } +} + +static void +check_reaching_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr) +{ + if (instr->opc == OPC_META_SPLIT || + instr->opc == OPC_META_COLLECT || + instr->opc == OPC_META_PARALLEL_COPY || + instr->opc == OPC_META_PHI) { + return; + } + + ra_foreach_src (src, instr) { + check_reaching_src(ctx, instr, src); + } +} + +static void +check_reaching_block(struct ra_val_ctx *ctx, struct ir3_block *block) +{ + ctx->reaching = ctx->block_reaching[block->index]; + + foreach_instr (instr, &block->instr_list) { + check_reaching_instr(ctx, instr); + propagate_instr(ctx, instr); + } + + for (unsigned i = 0; i < 2; i++) { + struct ir3_block *succ = block->successors[i]; + if (!succ) + continue; + + unsigned pred_idx = ir3_block_get_pred_index(succ, block); + foreach_instr (instr, &succ->instr_list) { + if (instr->opc != OPC_META_PHI) + break; + if (instr->regs[1 + pred_idx]->def) + check_reaching_src(ctx, instr, instr->regs[1 + pred_idx]); + } + } +} + +static void +check_reaching_defs(struct ra_val_ctx *ctx, struct ir3 *ir) +{ + ctx->block_reaching = + rzalloc_array(ctx, struct reaching_state, ctx->block_count); + + struct reaching_state *start = &ctx->block_reaching[0]; + for (unsigned i = 0; i < ctx->full_size; i++) + start->full.regs[i].def = UNDEF; + for (unsigned i = 0; i < ctx->half_size; i++) + start->half.regs[i].def = UNDEF; + for (unsigned i = 0; i < RA_SHARED_SIZE; i++) + start->shared.regs[i].def = UNDEF; + + bool progress; + do { + progress = false; + foreach_block (block, &ir->block_list) { + progress |= propagate_block(ctx, block); + } + } while (progress); + + foreach_block (block, &ir->block_list) { + check_reaching_block(ctx, block); + } + + if (ctx->failed) { + fprintf(stderr, "failing shader:\n"); + ir3_print(ir); + abort(); + } +} + +void +ir3_ra_validate(struct ir3_shader_variant *v, + unsigned full_size, unsigned half_size, unsigned block_count) +{ +#ifdef NDEBUG +# define VALIDATE 0 +#else +# define VALIDATE 1 +#endif + + if (!VALIDATE) + return; + + struct ra_val_ctx *ctx = rzalloc(NULL, struct ra_val_ctx); + ctx->merged_regs = v->mergedregs; + ctx->full_size = full_size; + ctx->half_size = half_size; + ctx->block_count = block_count; + + foreach_block (block, &v->ir->block_list) { + foreach_instr (instr, &block->instr_list) { + validate_simple(ctx, instr); + } + } + + check_reaching_defs(ctx, v->ir); + + ralloc_free(ctx); +} + diff --git a/src/freedreno/ir3/meson.build b/src/freedreno/ir3/meson.build index c27b267..28c8e88 100644 --- a/src/freedreno/ir3/meson.build +++ b/src/freedreno/ir3/meson.build @@ -102,6 +102,7 @@ libfreedreno_ir3_files = files( 'ir3_print.c', 'ir3_ra.c', 'ir3_ra.h', + 'ir3_ra_validate.c', 'ir3_sched.c', 'ir3_shader.c', 'ir3_shader.h',