r300: fix negate mask computation when merging movs
authorPavel Ondračka <pavel.ondracka@gmail.com>
Tue, 12 Jul 2022 08:17:13 +0000 (10:17 +0200)
committerPavel Ondračka <pavel.ondracka@gmail.com>
Sun, 21 Aug 2022 07:49:35 +0000 (09:49 +0200)
The main problem here is we can have a negate bit set for an unused
channel, so we can't just OR together the negates when channel merging.
Right now the bug is hidden because how we run the pass order, but
that will change in a later commit. Add some helpers for merging of the
negates, they will be also used more in a later commits. As a bonus
construct the new source separatelly and only rewrite the original
instructions after checking that the final swizzle is valid.

Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com>
Reviewed-by: Filip Gawin <filip@gawin.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17560>

src/gallium/drivers/r300/compiler/radeon_optimize.c

index 8d12098..fef370d 100644 (file)
@@ -900,12 +900,28 @@ static unsigned int merge_swizzles(unsigned int swz1, unsigned int swz2) {
        return new_swz;
 }
 
+/* Sets negate to 0 for unused channels. */
+static unsigned int clean_negate(struct rc_src_register src)
+{
+       unsigned int new_negate = 0;
+       for (unsigned int chan = 0; chan < 4; chan++) {
+               unsigned int swz = GET_SWZ(src.Swizzle, chan);
+               if (swz != RC_SWIZZLE_UNUSED)
+                       new_negate |= src.Negate & (1 << chan);
+       }
+       return new_negate;
+}
+
+static unsigned int merge_negates(struct rc_src_register src1, struct rc_src_register src2)
+{
+       return clean_negate(src1) | clean_negate(src2);
+}
+
 static int merge_movs(struct radeon_compiler * c, struct rc_instruction * inst)
 {
        unsigned int orig_dst_reg = inst->U.I.DstReg.Index;
        unsigned int orig_dst_file = inst->U.I.DstReg.File;
        unsigned int orig_dst_wmask = inst->U.I.DstReg.WriteMask;
-       unsigned int orig_src_reg = inst->U.I.SrcReg[0].Index;
        unsigned int orig_src_file = inst->U.I.SrcReg[0].File;
 
        struct rc_instruction * cur = inst;
@@ -944,17 +960,19 @@ static int merge_movs(struct radeon_compiler * c, struct rc_instruction * inst)
                                orig_src_file == RC_FILE_NONE) {
                                cur->U.I.DstReg.WriteMask |= orig_dst_wmask;
 
+                               struct rc_src_register src;
                                if (cur->U.I.SrcReg[0].File == RC_FILE_NONE) {
-                                       cur->U.I.SrcReg[0].File = orig_src_file;
-                                       cur->U.I.SrcReg[0].Index = orig_src_reg;
-                                       cur->U.I.SrcReg[0].Abs = inst->U.I.SrcReg[0].Abs;
-                                       cur->U.I.SrcReg[0].RelAddr = inst->U.I.SrcReg[0].RelAddr;
+                                       src = inst->U.I.SrcReg[0];
+                               } else {
+                                       src = cur->U.I.SrcReg[0];
                                }
-                               cur->U.I.SrcReg[0].Swizzle =
-                                       merge_swizzles(cur->U.I.SrcReg[0].Swizzle,
-                                                       inst->U.I.SrcReg[0].Swizzle);
-
-                               cur->U.I.SrcReg[0].Negate |= inst->U.I.SrcReg[0].Negate;
+                               src.Swizzle = merge_swizzles(cur->U.I.SrcReg[0].Swizzle,
+                                                               inst->U.I.SrcReg[0].Swizzle);
+                               src.Negate = merge_negates(inst->U.I.SrcReg[0], cur->U.I.SrcReg[0]);
+                               if (!c->SwizzleCaps->IsNative(RC_OPCODE_MOV, src))
+                                       return 0;
+                               cur->U.I.DstReg.WriteMask |= orig_dst_wmask;
+                               cur->U.I.SrcReg[0] = src;
 
                                /* finally delete the original mov */
                                rc_remove_instruction(inst);