r300: merge MOVs into ADD using the 0 swizzle
authorPavel Ondračka <pavel.ondracka@gmail.com>
Tue, 12 Jul 2022 14:04:22 +0000 (16:04 +0200)
committerPavel Ondračka <pavel.ondracka@gmail.com>
Sun, 21 Aug 2022 07:52:31 +0000 (09:52 +0200)
Shader-db stats with RV530:
total instructions in shared programs: 169509 -> 166013 (-2.06%)
instructions in affected programs: 99126 -> 95630 (-3.53%)
total presub in shared programs: 10975 -> 10758 (-1.98%)
presub in affected programs: 744 -> 527 (-29.17%)
total temps in shared programs: 21722 -> 21649 (-0.34%)
temps in affected programs: 1350 -> 1277 (-5.41%)

Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com>
Reviewed-by: Filip Gawin <filip@gawin.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17560>

src/gallium/drivers/r300/compiler/radeon_optimize.c

index 14d8eb6..b13c863 100644 (file)
@@ -959,14 +959,43 @@ static bool merge_movs(
                src.Swizzle = merge_swizzles(cur->U.I.SrcReg[0].Swizzle,
                                                inst->U.I.SrcReg[0].Swizzle);
                src.Negate = merge_negates(inst->U.I.SrcReg[0], cur->U.I.SrcReg[0]);
-               if (!c->SwizzleCaps->IsNative(RC_OPCODE_MOV, src))
-                       return false;
-               cur->U.I.DstReg.WriteMask |= orig_dst_wmask;
-               cur->U.I.SrcReg[0] = src;
-               rc_remove_instruction(inst);
-               return true;
+               if (c->SwizzleCaps->IsNative(RC_OPCODE_MOV, src)) {
+                       cur->U.I.DstReg.WriteMask |= orig_dst_wmask;
+                       cur->U.I.SrcReg[0] = src;
+                       rc_remove_instruction(inst);
+                       return true;
+               }
        }
-       return false;
+
+       /* Otherwise, we can convert the MOVs into ADD.
+        *
+        * For example
+        *   MOV temp[0].x const[0].x
+        *   MOV temp[0].y input[0].y
+        *
+        * becomes
+        *   ADD temp[0].xy const[0].x0 input[0].0y
+        */
+       unsigned wmask = cur->U.I.DstReg.WriteMask | orig_dst_wmask;
+       struct rc_src_register src0 = inst->U.I.SrcReg[0];
+       struct rc_src_register src1 = cur->U.I.SrcReg[0];
+
+       src0.Swizzle = fill_swizzle(src0.Swizzle,
+                               wmask, RC_SWIZZLE_ZERO);
+       src1.Swizzle = fill_swizzle(src1.Swizzle,
+                               wmask, RC_SWIZZLE_ZERO);
+       if (!c->SwizzleCaps->IsNative(RC_OPCODE_ADD, src0) ||
+               !c->SwizzleCaps->IsNative(RC_OPCODE_ADD, src1))
+               return false;
+
+       cur->U.I.DstReg.WriteMask = wmask;
+       cur->U.I.Opcode = RC_OPCODE_ADD;
+       cur->U.I.SrcReg[0] = src0;
+       cur->U.I.SrcReg[1] = src1;
+
+       /* finally delete the original mov */
+       rc_remove_instruction(inst);
+       return true;
 }
 
 static bool inst_combination(