From 94cb129d514b748db1342c6208ae4b7390bd33da Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Fri, 17 Apr 2020 19:31:56 +0200 Subject: [PATCH] ir3/ra: Fix off-by-one issues with live-range extension The intersects() function assumes that inside each instruction values always die before they are defined, so that if the end of one range is the same instruction as the beginning of the next then they don't intersect. However, this isn't the case for values that become live at the beginning of a basic block, which become live *before* the first instruction, or instructions that die at the end of a basic block which die after the last instruction. For example, imagine that we have two values, A which is defined earlier in the block and B which is defined in the last instruction of the block and both die at the end of the basic block (e.g. are used in the next iteration of a loop). We would compute a range for A of, say, (10, 20) and for B of (20, 20) since each block's end_ip is the same as the ip of the last instruction, and RA would consider them to not interfere. There's a similar problem with values that become live at the beginning. The fix is to offset the block's start_ip and end_ip by one so that they don't correspond to any actual instruction. One way to think about this is that we're adding fake instructions at the beginning and end of a block where values become live & die. We could invert the order, so that values consumed by each instruction are considered dead at the end of the previous instruction, but then values that become dead at the beginning of the basic block would incorrectly have an empty live range, with a similar problem at the end of the basic block if we try to say that values are defined at the beginning of the next instruction. So the extra padding instructions are unavoidable. This fixes an accidental infinite loop in the shader for dEQP-VK.spirv_assembly.type.scalar.u32.switch_vert. Part-of: --- src/freedreno/ir3/ir3.c | 24 +++++++++++++++++++++++- src/freedreno/ir3/ir3.h | 1 + src/freedreno/ir3/ir3_ra.c | 11 ++++++++++- 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 9678389..d4fcd99 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -1155,11 +1155,33 @@ ir3_count_instructions(struct ir3 *ir) unsigned cnt = 1; foreach_block (block, &ir->block_list) { block->start_ip = cnt; + foreach_instr (instr, &block->instr_list) { + instr->ip = cnt++; + } block->end_ip = cnt; + } + return cnt; +} + +/* When counting instructions for RA, we insert extra fake instructions at the + * beginning of each block, where values become live, and at the end where + * values die. This prevents problems where values live-in at the beginning or + * live-out at the end of a block from being treated as if they were + * live-in/live-out at the first/last instruction, which would be incorrect. + * In ir3_legalize these ip's are assumed to be actual ip's of the final + * program, so it would be incorrect to use this everywhere. + */ + +unsigned +ir3_count_instructions_ra(struct ir3 *ir) +{ + unsigned cnt = 1; + foreach_block (block, &ir->block_list) { + block->start_ip = cnt++; foreach_instr (instr, &block->instr_list) { instr->ip = cnt++; - block->end_ip = instr->ip; } + block->end_ip = cnt++; } return cnt; } diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 351490a..6cb8820 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -594,6 +594,7 @@ void ir3_block_clear_mark(struct ir3_block *block); void ir3_clear_mark(struct ir3 *shader); unsigned ir3_count_instructions(struct ir3 *ir); +unsigned ir3_count_instructions_ra(struct ir3 *ir); void ir3_find_ssa_uses(struct ir3 *ir, void *mem_ctx, bool falsedeps); diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c index 72682dc..f741744 100644 --- a/src/freedreno/ir3/ir3_ra.c +++ b/src/freedreno/ir3/ir3_ra.c @@ -541,7 +541,7 @@ ra_init(struct ir3_ra_ctx *ctx) unsigned n, base; ir3_clear_mark(ctx->ir); - n = ir3_count_instructions(ctx->ir); + n = ir3_count_instructions_ra(ctx->ir); ctx->instrd = rzalloc_array(NULL, struct ir3_ra_instr_data, n); @@ -950,6 +950,15 @@ ra_calc_block_live_values(struct ir3_ra_ctx *ctx, struct ir3_block *block) /* the remaining live should match liveout (for extra sanity testing): */ if (RA_DEBUG) { + unsigned new_dead = 0; + BITSET_FOREACH_SET (name, live, ctx->alloc_count) { + /* Is this the last use? */ + if (ctx->use[name] != block->end_ip) + continue; + new_dead += name_size(ctx, name); + d("NEW_DEAD: %u (new_dead=%u)", name, new_dead); + BITSET_CLEAR(live, name); + } unsigned liveout = 0; BITSET_FOREACH_SET (name, bd->liveout, ctx->alloc_count) { liveout += name_size(ctx, name); -- 2.7.4