ir3/ra: Fix array parallelcopy confusion
authorConnor Abbott <cwabbott0@gmail.com>
Wed, 16 Jun 2021 13:55:30 +0000 (15:55 +0200)
committerMarge Bot <eric+marge@anholt.net>
Wed, 16 Jun 2021 22:45:13 +0000 (22:45 +0000)
With array registers, there are two num's we care about:

1. The base num that the whole array starts at (->array.base)
2. The num that the instruction uses, plus possibly an indirect offset
   (->num or ->array.offset)

For parallel copies we always copy the whole array, so (2) is irrelevant
here. For phis and parallel copies inserted for phis, we used
assign_reg() which assigned ->array.base, but we forgot about this when
constructing our own parallel copies for live range splitting, just
setting ->num instead. The parallel copy lowering was also inconsistent
here, using ra_reg_get_num() (which looks at ->array.base for arrays)
for sources but looking at ->num directly for destinations. This makes
everything use ->array.base consistently.

While we're here, make sure to remove IR3_REG_SSA from liveout copies to
make sure printing works correctly.

Fixes: 0ffcb19 ("ir3: Rewrite register allocation")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11422>

src/freedreno/ir3/ir3_lower_parallelcopy.c
src/freedreno/ir3/ir3_ra.c

index 68f1fef..0fa5e76 100644 (file)
@@ -466,9 +466,10 @@ ir3_lower_copies(struct ir3_shader_variant *v)
                                        struct ir3_register *dst = instr->regs[i];
                                        struct ir3_register *src = instr->regs[i + instr->regs_count / 2];
                                        unsigned flags = src->flags & (IR3_REG_HALF | IR3_REG_SHARED);
+                                       unsigned dst_physreg = ra_reg_get_physreg(dst);
                                        for (unsigned j = 0; j < reg_elems(dst); j++) {
                                                array_insert(NULL, copies, (struct copy_entry) {
-                                                       .dst = ra_num_to_physreg(dst->num + j, flags),
+                                                       .dst = dst_physreg + j * reg_elem_size(dst),
                                                        .src = get_copy_src(src, j * reg_elem_size(dst)),
                                                        .flags = flags,
                                                });
index 34f828f..afbb817 100644 (file)
@@ -1177,20 +1177,21 @@ insert_parallel_copy_instr(struct ra_ctx *ctx, struct ir3_instruction *instr)
        for (unsigned i = 0; i < ctx->parallel_copies_count; i++) {
                struct ra_parallel_copy *entry = &ctx->parallel_copies[i];
                struct ir3_register *reg =
-                       ir3_reg_create(pcopy, ra_interval_get_num(entry->interval),
+                       ir3_reg_create(pcopy, INVALID_REG,
                                                   entry->interval->interval.reg->flags & ~IR3_REG_SSA);
                reg->size = entry->interval->interval.reg->size;
                reg->wrmask = entry->interval->interval.reg->wrmask;
+               assign_reg(pcopy, reg, ra_interval_get_num(entry->interval));
        }
 
        for (unsigned i = 0; i < ctx->parallel_copies_count; i++) {
                struct ra_parallel_copy *entry = &ctx->parallel_copies[i];
                struct ir3_register *reg =
-                       ir3_reg_create(pcopy,
-                                                  ra_physreg_to_num(entry->src, entry->interval->interval.reg->flags),
+                       ir3_reg_create(pcopy, INVALID_REG,
                                                   entry->interval->interval.reg->flags & ~(IR3_REG_DEST | IR3_REG_SSA));
                reg->size = entry->interval->interval.reg->size;
                reg->wrmask = entry->interval->interval.reg->wrmask;
+               assign_reg(pcopy, reg, ra_physreg_to_num(entry->src, reg->flags));
        }
 
        list_del(&pcopy->node);
@@ -1620,23 +1621,26 @@ insert_liveout_copy(struct ir3_block *block, physreg_t dst, physreg_t src,
                                                 2 + old_pcopy_regs);
 
        for (unsigned i = 0; i < old_pcopy_regs / 2; i++) {
+               old_pcopy->regs[i]->instr = pcopy;
                pcopy->regs[pcopy->regs_count++] = old_pcopy->regs[i];
        }
 
        struct ir3_register *dst_reg =
-               ir3_reg_create(pcopy, ra_physreg_to_num(dst, reg->flags), reg->flags);
+               ir3_reg_create(pcopy, INVALID_REG,
+                                          reg->flags & ~IR3_REG_SSA);
        dst_reg->wrmask = reg->wrmask;
        dst_reg->size = reg->size;
+       assign_reg(pcopy, dst_reg, ra_physreg_to_num(dst, reg->flags));
 
        for (unsigned i = old_pcopy_regs / 2; i < old_pcopy_regs; i++) {
                pcopy->regs[pcopy->regs_count++] = old_pcopy->regs[i];
        }
 
        struct ir3_register *src_reg =
-               ir3_reg_create(pcopy, ra_physreg_to_num(src, reg->flags),
-                                          reg->flags & ~IR3_REG_DEST);
+               ir3_reg_create(pcopy, INVALID_REG, reg->flags & ~(IR3_REG_DEST | IR3_REG_SSA));
        src_reg->wrmask = reg->wrmask;
        src_reg->size = reg->size;
+       assign_reg(pcopy, src_reg, ra_physreg_to_num(src, reg->flags));
 
        if (old_pcopy)
                list_del(&old_pcopy->node);