From dd90273aaacb9093334aa3db95e298026616b6dd Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Wed, 20 Apr 2022 17:21:11 +0200 Subject: [PATCH] aco: Optimize MUBUF 0 offset when idxen is also being used. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Now that we added an index src to the NIR intrinsic, it can happen that these generate MUBUF instructions which have both an index and an offset. Extend this ACO optimization to the case when idxen is used. Signed-off-by: Timur Kristóf Reviewed-by: Georg Lehmann Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_optimizer.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 96d5edd..48f429f 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -1424,8 +1424,16 @@ label_instruction(opt_ctx& ctx, aco_ptr& instr) * MUBUF accesses. */ bool vaddr_prevent_overflow = mubuf.swizzled && ctx.program->gfx_level < GFX9; - if (mubuf.offen && i == 1 && info.is_constant_or_literal(32) && - mubuf.offset + info.val < 4096) { + if (mubuf.offen && mubuf.idxen && i == 1 && info.is_vec() && + info.instr->operands.size() == 2 && info.instr->operands[0].isTemp() && + info.instr->operands[0].regClass() == v1 && info.instr->operands[1].isConstant() && + mubuf.offset + info.instr->operands[1].constantValue() < 4096) { + instr->operands[1] = info.instr->operands[0]; + mubuf.offset += info.instr->operands[1].constantValue(); + mubuf.offen = false; + continue; + } else if (mubuf.offen && i == 1 && info.is_constant_or_literal(32) && + mubuf.offset + info.val < 4096) { assert(!mubuf.idxen); instr->operands[1] = Operand(v1); mubuf.offset += info.val; -- 2.7.4