From 63f353b45605379b8b81754543907a6b0fc3afdd Mon Sep 17 00:00:00 2001 From: Filip Gawin Date: Thu, 17 Nov 2022 13:50:44 +0100 Subject: [PATCH] r300: don't use abs twice in transform_TRUNC on older hardware MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit r400: total instructions in shared programs: 27594 -> 27523 (-0.26%) instructions in affected programs: 3594 -> 3523 (-1.98%) helped: 61 HURT: 0 helped stats (abs) min: 1 max: 4 x̄: 1.16 x̃: 1 helped stats (rel) min: 1.16% max: 8.70% x̄: 2.55% x̃: 1.64% 95% mean confidence interval for instructions value: -1.29 -1.04 95% mean confidence interval for instructions %-change: -3.13% -1.97% Instructions are helped. Part-of: --- .../drivers/r300/compiler/radeon_program_alu.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c index 5d76e5b..5e32e7b 100644 --- a/src/gallium/drivers/r300/compiler/radeon_program_alu.c +++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c @@ -294,12 +294,30 @@ static void transform_TRUNC(struct radeon_compiler* c, * The multiplication by sgn(x) can be simplified using CMP: * y * sgn(x) = (x < 0 ? -y : y) */ + + struct rc_src_register abs; + + if (c->is_r500) { + abs = absolute(inst->U.I.SrcReg[0]); + } else { + /* abs isn't free on r300/r400, so we want + * to avoid doing it twice + */ + int tmp = rc_find_free_temporary(c); + + emit2(c, inst->Prev, RC_OPCODE_MAX, NULL, dstregtmpmask(tmp, RC_MASK_XYZW), + srcregswz(inst->U.I.SrcReg[0].File, inst->U.I.SrcReg[0].Index, RC_SWIZZLE_XYZW), + negate(srcregswz(inst->U.I.SrcReg[0].File, inst->U.I.SrcReg[0].Index, RC_SWIZZLE_XYZW))); + abs = srcregswz(RC_FILE_TEMPORARY, tmp, inst->U.I.SrcReg[0].Swizzle); + + } struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit1(c, inst->Prev, RC_OPCODE_FRC, NULL, dst, absolute(inst->U.I.SrcReg[0])); - emit2(c, inst->Prev, RC_OPCODE_ADD, NULL, dst, absolute(inst->U.I.SrcReg[0]), + emit1(c, inst->Prev, RC_OPCODE_FRC, NULL, dst, abs); + emit2(c, inst->Prev, RC_OPCODE_ADD, NULL, dst, abs, negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index)), srcreg(RC_FILE_TEMPORARY, dst.Index)); + rc_remove_instruction(inst); } -- 2.7.4