From: Connor Abbott Date: Wed, 16 Jun 2021 13:55:30 +0000 (+0200) Subject: ir3/ra: Fix array parallelcopy confusion X-Git-Tag: upstream/21.2.3~1760 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e19f1124353bf0c76072d8687bb93b78fd2970cf;p=platform%2Fupstream%2Fmesa.git ir3/ra: Fix array parallelcopy confusion With array registers, there are two num's we care about: 1. The base num that the whole array starts at (->array.base) 2. The num that the instruction uses, plus possibly an indirect offset (->num or ->array.offset) For parallel copies we always copy the whole array, so (2) is irrelevant here. For phis and parallel copies inserted for phis, we used assign_reg() which assigned ->array.base, but we forgot about this when constructing our own parallel copies for live range splitting, just setting ->num instead. The parallel copy lowering was also inconsistent here, using ra_reg_get_num() (which looks at ->array.base for arrays) for sources but looking at ->num directly for destinations. This makes everything use ->array.base consistently. While we're here, make sure to remove IR3_REG_SSA from liveout copies to make sure printing works correctly. Fixes: 0ffcb19 ("ir3: Rewrite register allocation") Part-of: --- diff --git a/src/freedreno/ir3/ir3_lower_parallelcopy.c b/src/freedreno/ir3/ir3_lower_parallelcopy.c index 68f1fef..0fa5e76 100644 --- a/src/freedreno/ir3/ir3_lower_parallelcopy.c +++ b/src/freedreno/ir3/ir3_lower_parallelcopy.c @@ -466,9 +466,10 @@ ir3_lower_copies(struct ir3_shader_variant *v) struct ir3_register *dst = instr->regs[i]; struct ir3_register *src = instr->regs[i + instr->regs_count / 2]; unsigned flags = src->flags & (IR3_REG_HALF | IR3_REG_SHARED); + unsigned dst_physreg = ra_reg_get_physreg(dst); for (unsigned j = 0; j < reg_elems(dst); j++) { array_insert(NULL, copies, (struct copy_entry) { - .dst = ra_num_to_physreg(dst->num + j, flags), + .dst = dst_physreg + j * reg_elem_size(dst), .src = get_copy_src(src, j * reg_elem_size(dst)), .flags = flags, }); diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c index 34f828f..afbb817 100644 --- a/src/freedreno/ir3/ir3_ra.c +++ b/src/freedreno/ir3/ir3_ra.c @@ -1177,20 +1177,21 @@ insert_parallel_copy_instr(struct ra_ctx *ctx, struct ir3_instruction *instr) for (unsigned i = 0; i < ctx->parallel_copies_count; i++) { struct ra_parallel_copy *entry = &ctx->parallel_copies[i]; struct ir3_register *reg = - ir3_reg_create(pcopy, ra_interval_get_num(entry->interval), + ir3_reg_create(pcopy, INVALID_REG, entry->interval->interval.reg->flags & ~IR3_REG_SSA); reg->size = entry->interval->interval.reg->size; reg->wrmask = entry->interval->interval.reg->wrmask; + assign_reg(pcopy, reg, ra_interval_get_num(entry->interval)); } for (unsigned i = 0; i < ctx->parallel_copies_count; i++) { struct ra_parallel_copy *entry = &ctx->parallel_copies[i]; struct ir3_register *reg = - ir3_reg_create(pcopy, - ra_physreg_to_num(entry->src, entry->interval->interval.reg->flags), + ir3_reg_create(pcopy, INVALID_REG, entry->interval->interval.reg->flags & ~(IR3_REG_DEST | IR3_REG_SSA)); reg->size = entry->interval->interval.reg->size; reg->wrmask = entry->interval->interval.reg->wrmask; + assign_reg(pcopy, reg, ra_physreg_to_num(entry->src, reg->flags)); } list_del(&pcopy->node); @@ -1620,23 +1621,26 @@ insert_liveout_copy(struct ir3_block *block, physreg_t dst, physreg_t src, 2 + old_pcopy_regs); for (unsigned i = 0; i < old_pcopy_regs / 2; i++) { + old_pcopy->regs[i]->instr = pcopy; pcopy->regs[pcopy->regs_count++] = old_pcopy->regs[i]; } struct ir3_register *dst_reg = - ir3_reg_create(pcopy, ra_physreg_to_num(dst, reg->flags), reg->flags); + ir3_reg_create(pcopy, INVALID_REG, + reg->flags & ~IR3_REG_SSA); dst_reg->wrmask = reg->wrmask; dst_reg->size = reg->size; + assign_reg(pcopy, dst_reg, ra_physreg_to_num(dst, reg->flags)); for (unsigned i = old_pcopy_regs / 2; i < old_pcopy_regs; i++) { pcopy->regs[pcopy->regs_count++] = old_pcopy->regs[i]; } struct ir3_register *src_reg = - ir3_reg_create(pcopy, ra_physreg_to_num(src, reg->flags), - reg->flags & ~IR3_REG_DEST); + ir3_reg_create(pcopy, INVALID_REG, reg->flags & ~(IR3_REG_DEST | IR3_REG_SSA)); src_reg->wrmask = reg->wrmask; src_reg->size = reg->size; + assign_reg(pcopy, src_reg, ra_physreg_to_num(src, reg->flags)); if (old_pcopy) list_del(&old_pcopy->node);