r300: don't use abs twice in transform_TRUNC on older hardware
authorFilip Gawin <filip@gawin.net>
Thu, 17 Nov 2022 12:50:44 +0000 (13:50 +0100)
committerMarge Bot <emma+marge@anholt.net>
Thu, 17 Nov 2022 21:57:38 +0000 (21:57 +0000)
r400:
total instructions in shared programs: 27594 -> 27523 (-0.26%)
instructions in affected programs: 3594 -> 3523 (-1.98%)
helped: 61
HURT: 0
helped stats (abs) min: 1 max: 4 x̄: 1.16 x̃: 1
helped stats (rel) min: 1.16% max: 8.70% x̄: 2.55% x̃: 1.64%
95% mean confidence interval for instructions value: -1.29 -1.04
95% mean confidence interval for instructions %-change: -3.13% -1.97%
Instructions are helped.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19807>

src/gallium/drivers/r300/compiler/radeon_program_alu.c

index 5d76e5b..5e32e7b 100644 (file)
@@ -294,12 +294,30 @@ static void transform_TRUNC(struct radeon_compiler* c,
         * The multiplication by sgn(x) can be simplified using CMP:
         *   y * sgn(x) = (x < 0 ? -y : y)
         */
+        
+       struct rc_src_register abs;
+       
+       if (c->is_r500) {
+               abs = absolute(inst->U.I.SrcReg[0]);
+       } else {
+               /* abs isn't free on r300/r400, so we want
+                * to avoid doing it twice
+                */
+               int tmp = rc_find_free_temporary(c);
+
+               emit2(c, inst->Prev, RC_OPCODE_MAX, NULL, dstregtmpmask(tmp, RC_MASK_XYZW),
+                         srcregswz(inst->U.I.SrcReg[0].File, inst->U.I.SrcReg[0].Index, RC_SWIZZLE_XYZW),
+                     negate(srcregswz(inst->U.I.SrcReg[0].File, inst->U.I.SrcReg[0].Index, RC_SWIZZLE_XYZW)));
+               abs = srcregswz(RC_FILE_TEMPORARY, tmp, inst->U.I.SrcReg[0].Swizzle);
+
+       }
        struct rc_dst_register dst = try_to_reuse_dst(c, inst);
-       emit1(c, inst->Prev, RC_OPCODE_FRC, NULL, dst, absolute(inst->U.I.SrcReg[0]));
-       emit2(c, inst->Prev, RC_OPCODE_ADD, NULL, dst, absolute(inst->U.I.SrcReg[0]),
+       emit1(c, inst->Prev, RC_OPCODE_FRC, NULL, dst, abs);
+       emit2(c, inst->Prev, RC_OPCODE_ADD, NULL, dst, abs,
              negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
        emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, inst->U.I.SrcReg[0],
              negate(srcreg(RC_FILE_TEMPORARY, dst.Index)), srcreg(RC_FILE_TEMPORARY, dst.Index));
+
        rc_remove_instruction(inst);
 }