ir3: Prepare for instructions with multiple destinations
authorConnor Abbott <cwabbott0@gmail.com>
Thu, 18 Feb 2021 16:57:49 +0000 (17:57 +0100)
committerEmma Anholt <emma@anholt.net>
Thu, 10 Jun 2021 19:20:38 +0000 (12:20 -0700)
To simplify the pre-RA merge set code and express the result live-range
splitting in RA, we need to add support for parallel copy instructions,
and for the merge set code these parallel copies need to be in SSA form.
Parallel copies have multiple destinations by necessity, but there was
no way to express this in the existing IR. In particular there was no
support for marking a register as being a destination, and no support
for indicating which destination register out of several an SSA source
refers to. This replaces ir3_register::instr with ir3_register::def and
re-purposes ir3_register::instr. I haven't propagated this into common
helpers, like ssa(), because that would vastly increase the amount of
churn and the number of places that produce such instructions should be
limited -- only RA will create parallel copies and they will be
destroyed right after RA. In the future swz will have multiple
destinations too, but it will only be created after RA via parallel copy
lowering.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9842>

12 files changed:
src/freedreno/ir3/ir3.h
src/freedreno/ir3/ir3_compiler_nir.c
src/freedreno/ir3/ir3_context.c
src/freedreno/ir3/ir3_cp.c
src/freedreno/ir3/ir3_cp_postsched.c
src/freedreno/ir3/ir3_delay.c
src/freedreno/ir3/ir3_group.c
src/freedreno/ir3/ir3_postsched.c
src/freedreno/ir3/ir3_print.c
src/freedreno/ir3/ir3_ra.c
src/freedreno/ir3/ir3_ra.h
src/freedreno/ir3/tests/delay.c

index 3fdcdfe..7f89d51 100644 (file)
@@ -118,6 +118,7 @@ struct ir3_register {
                IR3_REG_SSA    = 0x4000,   /* 'instr' is ptr to assigning instr */
                IR3_REG_ARRAY  = 0x8000,
 
+               IR3_REG_DEST   = 0x10000,
        } flags;
 
        /* used for cat5 instructions, but also for internal/IR level
@@ -153,6 +154,12 @@ struct ir3_register {
                } array;
        };
 
+
+       /* For IR3_REG_DEST, pointer back to the instruction containing this
+        * register.
+        */
+       struct ir3_instruction *instr;
+
        /* For IR3_REG_SSA, src registers contain ptr back to assigning
         * instruction.
         *
@@ -160,7 +167,7 @@ struct ir3_register {
         * array access (although the net effect is the same, it points
         * back to a previous instruction that we depend on).
         */
-       struct ir3_instruction *instr;
+       struct ir3_register *def;
 };
 
 /*
@@ -525,7 +532,7 @@ struct ir3_array {
         * last read.  But all the writes that happen before that have
         * something depending on them
         */
-       struct ir3_instruction *last_write;
+       struct ir3_register *last_write;
 
        /* extra stuff used in RA pass: */
        unsigned base;      /* base vreg name */
@@ -985,9 +992,8 @@ static inline bool writes_pred(struct ir3_instruction *instr)
 /* TODO better name */
 static inline struct ir3_instruction *ssa(struct ir3_register *reg)
 {
-       if (reg->flags & (IR3_REG_SSA | IR3_REG_ARRAY)) {
-               return reg->instr;
-       }
+       if ((reg->flags & (IR3_REG_SSA | IR3_REG_ARRAY)) && reg->def)
+               return reg->def->instr;
        return NULL;
 }
 
@@ -1309,7 +1315,7 @@ ir3_try_swap_signedness(opc_t opc, bool *can_swap)
        if ((__instr)->regs_count) \
                for (struct ir3_register *__srcreg = (void *)~0; __srcreg; __srcreg = NULL) \
                        for (unsigned __cnt = (__instr)->regs_count - 1, __n = 0; __n < __cnt; __n++) \
-                               if ((__srcreg = (__instr)->regs[__n + 1]))
+                               if ((__srcreg = (__instr)->regs[__n + 1]) && !(__srcreg->flags & IR3_REG_DEST))
 
 /* iterator for an instructions's sources (reg): */
 #define foreach_src(__srcreg, __instr) \
@@ -1331,7 +1337,7 @@ __ssa_srcp_n(struct ir3_instruction *instr, unsigned n)
        if (n >= instr->regs_count)
                return &instr->deps[n - instr->regs_count];
        if (ssa(instr->regs[n]))
-               return &instr->regs[n]->instr;
+               return &instr->regs[n]->def->instr;
        return NULL;
 }
 
@@ -1513,16 +1519,17 @@ static inline struct ir3_register * __ssa_src(struct ir3_instruction *instr,
        struct ir3_register *reg;
        if (src->regs[0]->flags & IR3_REG_HALF)
                flags |= IR3_REG_HALF;
-       reg = ir3_reg_create(instr, 0, IR3_REG_SSA | flags);
-       reg->instr = src;
+       reg = ir3_reg_create(instr, INVALID_REG, IR3_REG_SSA | flags);
+       reg->def = src->regs[0];
        reg->wrmask = src->regs[0]->wrmask;
        return reg;
 }
 
 static inline struct ir3_register * __ssa_dst(struct ir3_instruction *instr)
 {
-       struct ir3_register *reg = ir3_reg_create(instr, 0, 0);
-       reg->flags |= IR3_REG_SSA;
+       struct ir3_register *reg = ir3_reg_create(instr, INVALID_REG, 0);
+       reg->flags |= IR3_REG_SSA | IR3_REG_DEST;
+       reg->instr = instr;
        return reg;
 }
 
index 164f72d..f63a3ce 100644 (file)
@@ -648,7 +648,7 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
                if (cond->opc == OPC_ABSNEG_S &&
                                cond->flags == 0 &&
                                (cond->regs[1]->flags & (IR3_REG_SNEG | IR3_REG_SABS)) == IR3_REG_SNEG) {
-                       cond = cond->regs[1]->instr;
+                       cond = cond->regs[1]->def->instr;
                }
 
                compile_assert(ctx, bs[1] == bs[2]);
@@ -2791,7 +2791,7 @@ resolve_phis(struct ir3_context *ctx, struct ir3_block *block)
                                if (get_block(ctx, nsrc->pred) == pred) {
                                        if (nsrc->src.ssa->parent_instr->type == nir_instr_type_ssa_undef) {
                                                /* Create an ir3 undef */
-                                               ir3_reg_create(phi, INVALID_REG, phi->regs[0]->flags);
+                                               ir3_reg_create(phi, INVALID_REG, phi->regs[0]->flags & ~IR3_REG_DEST);
                                        } else {
                                                struct ir3_instruction *src = ir3_get_src(ctx, &nsrc->src)[0];
                                                __ssa_src(phi, src, 0);
index fd49d60..59cc14e 100644 (file)
@@ -598,7 +598,7 @@ ir3_create_array_load(struct ir3_context *ctx, struct ir3_array *arr, int n,
        __ssa_dst(mov)->flags |= flags;
        src = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
                        COND(address, IR3_REG_RELATIV) | flags);
-       src->instr = arr->last_write;
+       src->def = arr->last_write;
        src->size  = arr->length;
        src->array.id = arr->id;
        src->array.offset = n;
@@ -632,12 +632,12 @@ ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n,
                src->barrier_conflict |= IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W;
 
                dst->flags |= IR3_REG_ARRAY;
-               dst->instr = arr->last_write;
+               dst->def = arr->last_write;
                dst->size = arr->length;
                dst->array.id = arr->id;
                dst->array.offset = n;
 
-               arr->last_write = src;
+               arr->last_write = dst;
 
                array_insert(block, block->keeps, src);
 
@@ -655,19 +655,20 @@ ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n,
        }
        mov->barrier_class = IR3_BARRIER_ARRAY_W;
        mov->barrier_conflict = IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W;
-       dst = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
+       dst = ir3_reg_create(mov, 0, IR3_REG_DEST | IR3_REG_SSA | IR3_REG_ARRAY |
                        flags |
                        COND(address, IR3_REG_RELATIV));
-       dst->instr = arr->last_write;
+       dst->def = arr->last_write;
+       dst->instr = mov;
        dst->size  = arr->length;
        dst->array.id = arr->id;
        dst->array.offset = n;
-       ir3_reg_create(mov, 0, IR3_REG_SSA | flags)->instr = src;
+       ir3_reg_create(mov, 0, IR3_REG_SSA | flags)->def = src->regs[0];
 
        if (address)
                ir3_instr_set_address(mov, address);
 
-       arr->last_write = mov;
+       arr->last_write = dst;
 
        /* the array store may only matter to something in an earlier
         * block (ie. loops), but since arrays are not in SSA, depth
index 7e65228..e9899c1 100644 (file)
@@ -339,13 +339,13 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
                                reg->array = src_reg->array;
                        }
                        reg->flags = new_flags;
-                       reg->instr = ssa(src_reg);
+                       reg->def = src_reg->def;
 
                        instr->barrier_class |= src->barrier_class;
                        instr->barrier_conflict |= src->barrier_conflict;
 
                        unuse(src);
-                       reg->instr->use_count++;
+                       reg->def->instr->use_count++;
 
                        return true;
                }
@@ -395,7 +395,7 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
                         * address registers:
                         */
                        if ((src_reg->flags & IR3_REG_RELATIV) &&
-                                       conflicts(instr->address, reg->instr->address))
+                                       conflicts(instr->address, reg->def->instr->address))
                                return false;
 
                        /* This seems to be a hw bug, or something where the timings
@@ -430,7 +430,7 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
                        instr->regs[n+1] = src_reg;
 
                        if (src_reg->flags & IR3_REG_RELATIV)
-                               ir3_instr_set_address(instr, reg->instr->address);
+                               ir3_instr_set_address(instr, reg->def->instr->address);
 
                        return true;
                }
index 65b1ea0..dc0e1c5 100644 (file)
@@ -50,7 +50,7 @@
 static bool
 has_conflicting_write(struct ir3_instruction *src,
                struct ir3_instruction *use,
-               struct ir3_instruction **def,
+               struct ir3_register **def,
                unsigned id, int offset)
 {
        assert(src->block == use->block);
@@ -99,7 +99,7 @@ has_conflicting_write(struct ir3_instruction *src,
                        return true;
 
                if (last_write)
-                       *def = instr;
+                       *def = dst;
 
                last_write = false;
        }
@@ -152,7 +152,7 @@ instr_cp_postsched(struct ir3_instruction *mov)
                if (is_meta(use))
                        continue;
 
-               struct ir3_instruction *def = src->instr;
+               struct ir3_register *def = src->def;
                if (has_conflicting_write(mov, use, &def, src->array.id, offset))
                        continue;
 
@@ -177,7 +177,7 @@ instr_cp_postsched(struct ir3_instruction *mov)
                        /* If we're sinking the array read past any writes, make
                         * sure to update it to point to the new previous write:
                         */
-                       use->regs[n + 1]->instr = def;
+                       use->regs[n + 1]->def = def;
 
                        removed = true;
                }
index c0615ff..73adfae 100644 (file)
@@ -182,10 +182,10 @@ delay_calc_srcn(struct ir3_block *block,
                foreach_src_n (src, n, assigner) {
                        unsigned d;
 
-                       if (!src->instr)
+                       if (!src->def)
                                continue;
 
-                       d = delay_calc_srcn(block, src->instr, consumer, srcn, soft, pred);
+                       d = delay_calc_srcn(block, src->def->instr, consumer, srcn, soft, pred);
 
                        /* A (rptN) instruction executes in consecutive cycles so
                         * it's outputs are written in successive cycles.  And
@@ -200,9 +200,9 @@ delay_calc_srcn(struct ir3_block *block,
                         * for src registers.  There is exactly one case, bary.f,
                         * which has a vecN (collect) src that is not (r)'d.
                         */
-                       if ((assigner->opc == OPC_META_SPLIT) && src->instr->repeat) {
+                       if ((assigner->opc == OPC_META_SPLIT) && src->def->instr->repeat) {
                                /* (rptN) assigner case: */
-                               d -= MIN2(d, src->instr->repeat - assigner->split.off);
+                               d -= MIN2(d, src->def->instr->repeat - assigner->split.off);
                        } else if ((assigner->opc == OPC_META_COLLECT) && consumer->repeat &&
                                        (consumer->regs[srcn]->flags & IR3_REG_R)) {
                                d -= MIN2(d, n);
@@ -328,8 +328,8 @@ ir3_delay_calc(struct ir3_block *block, struct ir3_instruction *instr,
 
                if ((src->flags & IR3_REG_RELATIV) && !(src->flags & IR3_REG_CONST)) {
                        d = delay_calc_array(block, src->array.id, instr, i+1, soft, pred, 6);
-               } else if (src->instr) {
-                       d = delay_calc_srcn(block, src->instr, instr, i+1, soft, pred);
+               } else if (src->def) {
+                       d = delay_calc_srcn(block, src->def->instr, instr, i+1, soft, pred);
                }
 
                delay = MAX2(delay, d);
index f0bb646..6efaf29 100644 (file)
@@ -37,7 +37,7 @@ insert_mov(struct ir3_instruction *collect, int idx)
        struct ir3_instruction *mov = ir3_MOV(src->block, src,
                (collect->regs[idx+1]->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32);
 
-       collect->regs[idx+1]->instr = mov;
+       collect->regs[idx+1]->def = mov->regs[0];
 
        /* if collect and src are in the same block, move the inserted mov
         * to just before the collect to avoid a use-before-def.  Otherwise
index fe68cfe..2ef06ee 100644 (file)
@@ -706,19 +706,19 @@ cleanup_self_movs(struct ir3 *ir)
                foreach_instr_safe (instr, &block->instr_list) {
 
                        foreach_src (reg, instr) {
-                               if (!reg->instr)
+                               if (!reg->def)
                                        continue;
 
-                               if (is_self_mov(reg->instr)) {
-                                       list_delinit(&reg->instr->node);
-                                       reg->instr = reg->instr->regs[1]->instr;
+                               if (is_self_mov(reg->def->instr)) {
+                                       list_delinit(&reg->def->instr->node);
+                                       reg->def = reg->def->instr->regs[1]->def;
                                }
                        }
 
                        for (unsigned i = 0; i < instr->deps_count; i++) {
                                if (instr->deps[i] && is_self_mov(instr->deps[i])) {
                                        list_delinit(&instr->deps[i]->node);
-                                       instr->deps[i] = instr->deps[i]->regs[1]->instr;
+                                       instr->deps[i] = instr->deps[i]->regs[1]->def->instr;
                                }
                        }
                }
index ff5c5bb..6ddb5d7 100644 (file)
@@ -191,14 +191,14 @@ static void print_reg_name(struct ir3_instruction *instr, struct ir3_register *r
                 * Note for array writes from another block, we aren't really
                 * sure who wrote it so skip trying to show this
                 */
-               if (reg->instr && (reg->instr->block == instr->block)) {
+               if (reg->def && (reg->def->instr->block == instr->block)) {
                        printf(SYN_ARRAY(", "));
-                       printf(SYN_SSA("ssa_%u"), reg->instr->serialno);
+                       printf(SYN_SSA("ssa_%u"), reg->def->instr->serialno);
                }
                printf(SYN_ARRAY("]"));
        } else if (reg->flags & IR3_REG_SSA) {
-               /* For dst regs, reg->instr will be NULL: */
-               printf(SYN_SSA("ssa_%u"), reg->instr ? reg->instr->serialno : instr->serialno);
+               /* For dst regs, reg->def will be NULL: */
+               printf(SYN_SSA("ssa_%u"), (reg->flags & IR3_REG_DEST) ? instr->serialno : reg->def->instr->serialno);
        } else if (reg->flags & IR3_REG_RELATIV) {
                if (reg->flags & IR3_REG_CONST)
                        printf(SYN_CONST("c<a0.x + %d>"), reg->array.offset);
@@ -320,13 +320,13 @@ print_instr(struct ir3_instruction *instr, int lvl)
                                printf(" %sp0.%c ("SYN_SSA("ssa_%u")"),",
                                                instr->cat0.inv1 ? "!" : "",
                                                "xyzw"[instr->cat0.comp1 & 0x3],
-                                               instr->regs[1]->instr->serialno);
+                                               instr->regs[1]->def->instr->serialno);
                        }
                        if (brinfo[instr->cat0.brtype].nsrc >= 2) {
                                printf(" %sp0.%c ("SYN_SSA("ssa_%u")"),",
                                                instr->cat0.inv2 ? "!" : "",
                                                "xyzw"[instr->cat0.comp2 & 0x3],
-                                               instr->regs[2]->instr->serialno);
+                                               instr->regs[2]->def->instr->serialno);
                        }
                }
                printf(" target=block%u", block_id(instr->cat0.target));
index 508793c..2c292ca 100644 (file)
@@ -154,10 +154,10 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
                 */
                foreach_src_n (src, n, instr) {
                        struct ir3_instruction *dd;
-                       if (!src->instr)
+                       if (!src->def)
                                continue;
 
-                       dd = get_definer(ctx, src->instr, &dsz, &doff);
+                       dd = get_definer(ctx, src->def->instr, &dsz, &doff);
 
                        if ((!d) || instr_before(dd, d)) {
                                d = dd;
@@ -224,7 +224,7 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
                struct ir3_instruction *dd;
                int dsz, doff;
 
-               dd = get_definer(ctx, d->regs[1]->instr, &dsz, &doff);
+               dd = get_definer(ctx, d->regs[1]->def->instr, &dsz, &doff);
 
                /* by definition, should come before: */
                ra_assert(ctx, instr_before(dd, d));
@@ -322,12 +322,12 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
                 */
                if (ctx->scalar_pass) {
                        if (instr->opc == OPC_META_SPLIT) {
-                               instr->name = instr->regs[1]->instr->name + instr->split.off;
+                               instr->name = instr->regs[1]->def->instr->name + instr->split.off;
                                continue;
                        }
 
                        if (instr->opc == OPC_META_COLLECT) {
-                               instr->name = instr->regs[1]->instr->name;
+                               instr->name = instr->regs[1]->def->instr->name;
                                continue;
                        }
                }
@@ -477,7 +477,7 @@ ra_select_reg_merged(unsigned int n, BITSET_WORD *regs, void *data)
                        struct ir3_array *arr = ir3_lookup_array(ctx->ir, src->array.id);
                        src_n = arr->base + src->array.offset;
                } else {
-                       src_n = scalar_name(ctx, src->instr, 0);
+                       src_n = scalar_name(ctx, src->def->instr, 0);
                }
 
                unsigned reg = ra_get_node_reg(ctx->g, src_n);
@@ -1213,12 +1213,12 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block)
                }
 
                foreach_src_n (reg, n, instr) {
-                       struct ir3_instruction *src = reg->instr;
+                       struct ir3_instruction *src = reg->def ? reg->def->instr : NULL;
 
                        if (src && should_assign(ctx, instr))
                                reg_assign(ctx, src->regs[0], src);
 
-                       /* Note: reg->instr could be null for IR3_REG_ARRAY */
+                       /* Note: reg->def could be null for IR3_REG_ARRAY */
                        if (((reg->flags & IR3_REG_ARRAY) && ctx->scalar_pass) ||
                                (src && should_assign(ctx, src))) {
                                reg_assign(ctx, instr->regs[n+1], src);
@@ -1466,9 +1466,9 @@ ra_precolor_assigned(struct ir3_ra_ctx *ctx)
                        precolor(ctx, instr);
 
                        foreach_src (src, instr) {
-                               if (!src->instr)
+                               if (!src->def)
                                        continue;
-                               precolor(ctx, src->instr);
+                               precolor(ctx, src->def->instr);
                        }
                }
        }
index 1380734..34f2549 100644 (file)
@@ -213,11 +213,11 @@ scalar_name(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, unsigned n)
        if (ctx->scalar_pass) {
                if (instr->opc == OPC_META_SPLIT) {
                        debug_assert(n == 0);     /* split results in a scalar */
-                       struct ir3_instruction *src = instr->regs[1]->instr;
+                       struct ir3_instruction *src = instr->regs[1]->def->instr;
                        return scalar_name(ctx, src, instr->split.off);
                } else if (instr->opc == OPC_META_COLLECT) {
                        debug_assert(n < (instr->regs_count + 1));
-                       struct ir3_instruction *src = instr->regs[n + 1]->instr;
+                       struct ir3_instruction *src = instr->regs[n + 1]->def->instr;
                        return scalar_name(ctx, src, 0);
                }
        } else {
@@ -347,8 +347,8 @@ __ra_init_use_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
                                __ra_itr_push(ctx, arr->base + reg->array.offset);
                                debug_assert(reg->array.offset < arr->length);
                        }
-               } else {
-                       foreach_name_n (name, i, ctx, reg->instr) {
+               } else if (reg->def) {
+                       foreach_name_n (name, i, ctx, reg->def->instr) {
                                /* split takes a src w/ wrmask potentially greater
                                 * than 0x1, but it really only cares about a single
                                 * component.  This shows up in splits coming out of
index 79a66d2..d1cff80 100644 (file)
@@ -148,7 +148,7 @@ regs_to_ssa(struct ir3 *ir)
                                src = collect;
                        }
 
-                       reg->instr = src;
+                       reg->def = src->regs[0];
                        reg->flags |= IR3_REG_SSA;
                }
 
@@ -168,6 +168,7 @@ regs_to_ssa(struct ir3 *ir)
                                regfile[regn(instr->regs[0]) + i] = split;
                        }
                } else {
+                       instr->regs[0]->instr = instr;
                        regfile[regn(instr->regs[0])] = instr;
                }
        }