From 077d07a983eceefca990ba78a0d8ed5c85b5c963 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 2 Dec 2021 14:48:08 +0100 Subject: [PATCH] ir3/ra: Fix tied destination handling with multiple destinations Before, we were careful to 1. Get the source physreg. 2. Allocate the destination. 3. Insert a copy with the source being the physreg from step 1. and this guaranteed that if the tied source were moved in step 2 we'd still insert a copy from the correct place. However this won't work with multiple destinations because an earlier destination could've already moved the tied source around. Instead flip steps 2 and 3 (we'll insert the copy before we allocate the interval, but that's ok) and run the first two steps in a separate loop before any destinations are allocated. Part-of: --- src/freedreno/ir3/ir3_ra.c | 44 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c index 2897163..72fd22c 100644 --- a/src/freedreno/ir3/ir3_ra.c +++ b/src/freedreno/ir3/ir3_ra.c @@ -1254,6 +1254,38 @@ allocate_dst_fixed(struct ra_ctx *ctx, struct ir3_register *dst, interval->physreg_end = physreg + reg_size(dst); } +/* If a tied destination interferes with its source register, we have to insert + * a copy beforehand to copy the source to the destination. Because we are using + * the parallel_copies array and not creating a separate copy, this copy will + * happen in parallel with any shuffling around of the tied source, so we have + * to copy the source *as it exists before it is shuffled around*. We do this by + * inserting the copy early, before any other copies are inserted. We don't + * actually know the destination of the copy, but that's ok because the + * dst_interval will be filled out later. + */ +static void +insert_tied_dst_copy(struct ra_ctx *ctx, struct ir3_register *dst) +{ + struct ir3_register *tied = dst->tied; + + if (!tied) + return; + + struct ra_interval *tied_interval = &ctx->intervals[tied->def->name]; + struct ra_interval *dst_interval = &ctx->intervals[dst->name]; + + if (tied_interval->is_killed) + return; + + physreg_t tied_physreg = ra_interval_get_physreg(tied_interval); + + array_insert(ctx, ctx->parallel_copies, + (struct ra_parallel_copy){ + .interval = dst_interval, + .src = tied_physreg, + }); +} + static void allocate_dst(struct ra_ctx *ctx, struct ir3_register *dst) { @@ -1262,8 +1294,6 @@ allocate_dst(struct ra_ctx *ctx, struct ir3_register *dst) struct ir3_register *tied = dst->tied; if (tied) { struct ra_interval *tied_interval = &ctx->intervals[tied->def->name]; - struct ra_interval *dst_interval = &ctx->intervals[dst->name]; - physreg_t tied_physreg = ra_interval_get_physreg(tied_interval); if (tied_interval->is_killed) { /* The easy case: the source is killed, so we can just reuse it * for the destination. @@ -1277,11 +1307,6 @@ allocate_dst(struct ra_ctx *ctx, struct ir3_register *dst) */ physreg_t physreg = get_reg(ctx, file, dst, true); allocate_dst_fixed(ctx, dst, physreg); - array_insert(ctx, ctx->parallel_copies, - (struct ra_parallel_copy){ - .interval = dst_interval, - .src = tied_physreg, - }); } return; @@ -1361,6 +1386,11 @@ handle_normal_instr(struct ra_ctx *ctx, struct ir3_instruction *instr) mark_src_killed(ctx, src); } + /* Pre-insert tied dst copies. */ + ra_foreach_dst (dst, instr) { + insert_tied_dst_copy(ctx, dst); + } + /* Allocate the destination. */ ra_foreach_dst (dst, instr) { allocate_dst(ctx, dst); -- 2.7.4