r300: be more careful with presubtract and non-native swizzles
authorPavel Ondračka <pavel.ondracka@gmail.com>
Tue, 15 Nov 2022 15:43:25 +0000 (16:43 +0100)
committerMarge Bot <emma+marge@anholt.net>
Wed, 16 Nov 2022 19:51:47 +0000 (19:51 +0000)
The problematic scenario is when we have the same source used by both
normal and presubtract argument. We check that case currently and count
the source only once. However if one of the arguments uses a non-native
swizzle, we have to rewrite it later and the source changes. Therefore
we end with too many sources and fail later during pair translation.

Example:
ADD temp[21].xy, temp[20].xy__, temp[17].xy__;
MAD temp[22].xy, temp[17].zw__, temp[11].xy__, temp[21].xy__;

will get converted to

MAD temp[22].xy, temp[17].zw__, temp[11].xy__, (temp[17] + temp[20]).xy__;

however after dataflow swizzles pass we end with

MOV temp[3].x, temp[17].z___;
MOV temp[3].y, temp[17]._w__;
MAD temp[22].xy, temp[3].xy__, temp[11].xy__, (temp[17] + temp[20]).xy__;

Just skip the "don't count the same source twice" optimization when a
non-native swizzle is used to fix 2 dEQP atan2 tests.

Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com>
Reviewed-by: Filip Gawin <filip@gawin.net>
Tested-by: Filip Gawin <filip@gawin.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19764>

src/gallium/drivers/r300/ci/r300-r480-fails.txt
src/gallium/drivers/r300/compiler/radeon_compiler_util.c
src/gallium/drivers/r300/compiler/radeon_compiler_util.h
src/gallium/drivers/r300/compiler/radeon_dataflow.c
src/gallium/drivers/r300/compiler/radeon_dataflow.h
src/gallium/drivers/r300/compiler/radeon_optimize.c
src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c

index ac863a4..e891ed5 100644 (file)
@@ -622,8 +622,6 @@ dEQP-GLES2.functional.shaders.operator.angle_and_trigonometry.asin.highp_vec2_fr
 dEQP-GLES2.functional.shaders.operator.angle_and_trigonometry.asin.mediump_vec2_fragment,Fail
 dEQP-GLES2.functional.shaders.operator.angle_and_trigonometry.atan.highp_vec2_fragment,Fail
 dEQP-GLES2.functional.shaders.operator.angle_and_trigonometry.atan.mediump_vec2_fragment,Fail
-dEQP-GLES2.functional.shaders.operator.angle_and_trigonometry.atan2.highp_vec2_fragment,Fail
-dEQP-GLES2.functional.shaders.operator.angle_and_trigonometry.atan2.mediump_vec2_fragment,Fail
 dEQP-GLES2.functional.shaders.random.all_features.fragment.1,Fail
 dEQP-GLES2.functional.shaders.random.all_features.fragment.5,Fail
 dEQP-GLES2.functional.shaders.random.all_features.fragment.6,Fail
index a4bc60f..c2a2b39 100644 (file)
@@ -33,6 +33,7 @@
 
 #include "radeon_compiler.h"
 #include "radeon_dataflow.h"
+#include "r300_fragprog_swizzle.h"
 /**
  */
 unsigned int rc_swizzle_to_writemask(unsigned int swz)
@@ -383,6 +384,7 @@ struct src_select {
        rc_register_file File;
        int Index;
        unsigned int SrcType;
+       unsigned int Swizzle;
 };
 
 struct can_use_presub_data {
@@ -396,14 +398,15 @@ static void can_use_presub_data_add_select(
        struct can_use_presub_data * data,
        rc_register_file file,
        unsigned int index,
-       unsigned int src_type)
+       unsigned int swizzle)
 {
        struct src_select * select;
 
        select = &data->Selects[data->SelectCount++];
        select->File = file;
        select->Index = index;
-       select->SrcType = src_type;
+       select->SrcType = rc_source_type_swz(swizzle);
+       select->Swizzle = swizzle;
 }
 
 /**
@@ -426,10 +429,11 @@ static void can_use_presub_read_cb(
                return;
 
        can_use_presub_data_add_select(d, src->File, src->Index,
-                                       rc_source_type_swz(src->Swizzle));
+                                       src->Swizzle);
 }
 
 unsigned int rc_inst_can_use_presub(
+       struct radeon_compiler * c,
        struct rc_instruction * inst,
        rc_presubtract_op presub_op,
        unsigned int presub_writemask,
@@ -473,14 +477,14 @@ unsigned int rc_inst_can_use_presub(
        can_use_presub_data_add_select(&d,
                presub_src0->File,
                presub_src0->Index,
-               src_type0);
+               presub_src0->Swizzle);
 
        if (num_presub_srcs > 1) {
                src_type1 = rc_source_type_swz(presub_src1->Swizzle);
                can_use_presub_data_add_select(&d,
                        presub_src1->File,
                        presub_src1->Index,
-                       src_type1);
+                       presub_src1->Swizzle);
 
                /* Even if both of the presub sources read from the same
                 * register, we still need to use 2 different source selects
@@ -504,6 +508,12 @@ unsigned int rc_inst_can_use_presub(
                unsigned int j;
                unsigned int src_type = d.Selects[i].SrcType;
                for (j = i + 1; j < d.SelectCount; j++) {
+                       /* Even if the sources are the same now, they will not be the
+                        * same later, if we have to rewrite some non-native swizzle. */
+                       if(!c->is_r500 && (
+                               !r300_swizzle_is_native_basic(d.Selects[i].Swizzle) ||
+                               !r300_swizzle_is_native_basic(d.Selects[j].Swizzle)))
+                               continue;
                        if (d.Selects[i].File == d.Selects[j].File
                            && d.Selects[i].Index == d.Selects[j].Index) {
                                src_type &= ~d.Selects[j].SrcType;
index 49b3e66..7c1d6bb 100644 (file)
@@ -87,6 +87,7 @@ unsigned int rc_source_type_swz(unsigned int swizzle);
 unsigned int rc_source_type_mask(unsigned int mask);
 
 unsigned int rc_inst_can_use_presub(
+       struct radeon_compiler * c,
        struct rc_instruction * inst,
        rc_presubtract_op presub_op,
        unsigned int presub_writemask,
index d761ae9..94ee921 100644 (file)
@@ -849,6 +849,7 @@ static void init_get_readers_callback_data(
        rc_pair_read_arg_fn read_pair_cb,
        rc_read_write_mask_fn write_cb)
 {
+       reader_data->C = c;
        reader_data->Abort = 0;
        reader_data->ReaderCount = 0;
        reader_data->ReadersReserved = 0;
index bb8d482..0c7bf8a 100644 (file)
@@ -86,6 +86,8 @@ struct rc_reader {
 };
 
 struct rc_reader_data {
+       struct radeon_compiler * C;
+
        unsigned int Abort;
        unsigned int AbortOnRead;
        unsigned int AbortOnWrite;
index b603b2c..33350b7 100644 (file)
@@ -71,7 +71,8 @@ static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
        rc_register_file file = src->File;
        struct rc_reader_data * reader_data = data;
 
-       if(!rc_inst_can_use_presub(inst,
+       if(!rc_inst_can_use_presub(reader_data->C,
+                               inst,
                                reader_data->Writer->U.I.PreSub.Opcode,
                                rc_swizzle_to_writemask(src->Swizzle),
                                src,
@@ -455,7 +456,9 @@ static void presub_scan_read(
        struct rc_reader_data * reader_data = data;
        rc_presubtract_op * presub_opcode = reader_data->CbData;
 
-       if (!rc_inst_can_use_presub(inst, *presub_opcode,
+       if (!rc_inst_can_use_presub(reader_data->C,
+                       inst,
+                       *presub_opcode,
                        reader_data->Writer->U.I.DstReg.WriteMask,
                        src,
                        &reader_data->Writer->U.I.SrcReg[0],
index 3e97594..bcdbbfc 100644 (file)
@@ -45,11 +45,14 @@ static void test_rc_inst_can_use_presub(
        struct rc_instruction add_inst, replace_inst;
        int ret;
 
+       struct r300_fragment_program_compiler c = {};
+       init_compiler(&c.Base, RC_FRAGMENT_PROGRAM, 0, 0);
+
        test_begin(result);
        init_rc_normal_instruction(&add_inst, add_str);
        init_rc_normal_instruction(&replace_inst, replace_str);
 
-       ret = rc_inst_can_use_presub(&replace_inst, RC_PRESUB_ADD, 0,
+       ret = rc_inst_can_use_presub(&c.Base, &replace_inst, RC_PRESUB_ADD, 0,
                        &replace_inst.U.I.SrcReg[0],
                        &add_inst.U.I.SrcReg[0], &add_inst.U.I.SrcReg[1]);