From 890de1a43603c98b5788f9867ff0793f117b5f4b Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Mon, 22 Feb 2021 15:00:55 +0100 Subject: [PATCH] ir3/delay: Fix full->half and half->full delay The current compiler never does this, but the new compiler will start to in mergeregs mode. There is an extra penalty for this. Part-of: --- src/freedreno/ir3/ir3_delay.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/freedreno/ir3/ir3_delay.c b/src/freedreno/ir3/ir3_delay.c index 73adfae..8a76601 100644 --- a/src/freedreno/ir3/ir3_delay.c +++ b/src/freedreno/ir3/ir3_delay.c @@ -88,12 +88,25 @@ ir3_delayslots(struct ir3_instruction *assigner, if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) || is_mem(consumer)) { return 6; - } else if ((is_mad(consumer->opc) || is_madsh(consumer->opc)) && - (n == 3)) { - /* special case, 3rd src to cat3 not required on first cycle */ - return 1; } else { - return 3; + /* assigner and consumer are both alu */ + assert(n > 0); + + /* In mergedregs mode, there is an extra 2-cycle penalty when half of + * a full-reg is read as a half-reg or when a half-reg is read as a + * full-reg. + */ + bool mismatched_half = + (assigner->regs[0]->flags & IR3_REG_HALF) != + (consumer->regs[n - 1]->flags & IR3_REG_HALF); + unsigned penalty = mismatched_half ? 2 : 0; + if ((is_mad(consumer->opc) || is_madsh(consumer->opc)) && + (n == 3)) { + /* special case, 3rd src to cat3 not required on first cycle */ + return 1 + penalty; + } else { + return 3 + penalty; + } } } -- 2.7.4