From 775e42e6b841c76337af9977f55d0ec74df3283d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Pavel=20Ondra=C4=8Dka?= Date: Wed, 26 Apr 2023 13:38:41 +0200 Subject: [PATCH] r300: remove simple duplicate ARL instructions MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Removes duplicate ARL reads from the same source when the original ADDR register is still live. This is the remaining low-hanging fruit from #7723 Should account for most of the potential improvements and is also trivial as no source or destination rewrite is needed. RV530: total instructions in shared programs: 132447 -> 131488 (-0.72%) instructions in affected programs: 33396 -> 32437 (-2.87%) helped: 331 HURT: 0 total temps in shared programs: 17035 -> 17015 (-0.12%) temps in affected programs: 361 -> 341 (-5.54%) helped: 30 HURT: 10 RV370: total instructions in shared programs: 83555 -> 82659 (-1.07%) instructions in affected programs: 28310 -> 27414 (-3.16%) helped: 312 HURT: 0 total temps in shared programs: 12418 -> 12426 (0.06%) temps in affected programs: 302 -> 310 (2.65%) helped: 21 HURT: 29 Signed-off-by: Pavel Ondračka Reviewed-by: Filip Gawin Part-of: --- .../drivers/r300/compiler/radeon_optimize.c | 54 +++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c index f216998..475d896 100644 --- a/src/gallium/drivers/r300/compiler/radeon_optimize.c +++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c @@ -1331,6 +1331,49 @@ static void merge_channels(struct radeon_compiler * c, struct rc_instruction * i } } +/** + * Searches for duplicate ARLs + * + * Only a very trivial case is now optimized where if a second ARL is detected which reads from + * the same register as the first one and source is the same, just remove the second one. + */ +static void merge_ARL(struct radeon_compiler * c, struct rc_instruction * inst) +{ + unsigned int ARL_src_reg = inst->U.I.SrcReg[0].Index; + unsigned int ARL_src_file = inst->U.I.SrcReg[0].File; + unsigned int ARL_src_swizzle = inst->U.I.SrcReg[0].Swizzle; + + struct rc_instruction * cur = inst; + while (cur != &c->Program.Instructions) { + cur = cur->Next; + const struct rc_opcode_info * opcode = rc_get_opcode_info(cur->U.I.Opcode); + + /* Keep it simple for now and stop when encountering any + * control flow. + */ + if (opcode->IsFlowControl) + return; + + /* Stop when the original source is overwritten */ + if (ARL_src_reg == cur->U.I.DstReg.Index && + ARL_src_file == cur->U.I.DstReg.File && + cur->U.I.DstReg.WriteMask | rc_swizzle_to_writemask(ARL_src_swizzle)) + return; + + if (cur->U.I.Opcode == RC_OPCODE_ARL) { + if (ARL_src_reg == cur->U.I.SrcReg[0].Index && + ARL_src_file == cur->U.I.SrcReg[0].File && + ARL_src_swizzle == cur->U.I.SrcReg[0].Swizzle) { + struct rc_instruction * next = cur->Next; + rc_remove_instruction(cur); + cur = next; + } else { + return; + } + } + } +} + void rc_optimize(struct radeon_compiler * c, void *user) { struct rc_instruction * inst = c->Program.Instructions.Next; @@ -1351,7 +1394,7 @@ void rc_optimize(struct radeon_compiler * c, void *user) } /* Merge MOVs to same source in different channels using the constant - * swizzles. + * swizzle. */ if (c->is_r500 || c->type == RC_VERTEX_PROGRAM) { inst = c->Program.Instructions.Next; @@ -1384,7 +1427,16 @@ void rc_optimize(struct radeon_compiler * c, void *user) peephole(c, cur); } + if (!c->has_omod) { + inst = c->Program.Instructions.Next; + while (inst != &c->Program.Instructions) { + struct rc_instruction * cur = inst; + inst = inst->Next; + if (cur->U.I.Opcode == RC_OPCODE_ARL) { + merge_ARL(c, cur); + } + } return; } -- 2.7.4