pan/bi: Force u32 for flat varyings
authorAlyssa Rosenzweig <alyssa@collabora.com>
Wed, 2 Jun 2021 18:52:56 +0000 (14:52 -0400)
committerMarge Bot <eric+marge@anholt.net>
Thu, 10 Jun 2021 18:06:10 +0000 (18:06 +0000)
Since the GLSL compilers will pack together flat varyings with no regard
to type, under the assumption the backend can deal with it. I guess we
can deal with it then... Fixes fails in
dEQP-GLES31.functional.separate_shader.random.*

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11123>

src/gallium/drivers/panfrost/ci/deqp-panfrost-g52-fails.txt
src/panfrost/bifrost/bifrost_compile.c
src/panfrost/lib/pan_shader.c

index cc23dee..0e6c1c8 100644 (file)
@@ -1,6 +1,2 @@
 dEQP-GLES31.functional.layout_binding.image.image2d.vertex_binding_max_array,Fail
 dEQP-GLES31.functional.layout_binding.image.image3d.vertex_binding_max_array,Fail
-dEQP-GLES31.functional.separate_shader.random.23,Fail
-dEQP-GLES31.functional.separate_shader.random.35,Fail
-dEQP-GLES31.functional.separate_shader.random.68,Fail
-dEQP-GLES31.functional.separate_shader.random.79,Fail
index fee43f3..90d396a 100644 (file)
@@ -257,6 +257,8 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr)
         enum bi_vecsize vecsize = (instr->num_components + component - 1);
         bi_index dest = (component == 0) ? bi_dest_index(&instr->dest) : bi_temp(b->shader);
 
+        unsigned sz = nir_dest_bit_size(instr->dest);
+
         if (smooth) {
                 nir_intrinsic_instr *parent = nir_src_as_intrinsic(instr->src[0]);
                 assert(parent);
@@ -264,13 +266,12 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr)
                 sample = bi_interp_for_intrinsic(parent->intrinsic);
                 src0 = bi_varying_src0_for_barycentric(b, parent);
 
-                unsigned sz = nir_dest_bit_size(instr->dest);
                 assert(sz == 16 || sz == 32);
-
                 regfmt = (sz == 16) ? BI_REGISTER_FORMAT_F16
                         : BI_REGISTER_FORMAT_F32;
         } else {
-                regfmt = bi_reg_fmt_for_nir(nir_intrinsic_dest_type(instr));
+                assert(sz == 32);
+                regfmt = BI_REGISTER_FORMAT_U32;
         }
 
         nir_src *offset = nir_get_io_offset_src(instr);
@@ -602,8 +603,14 @@ bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr)
 static void
 bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr)
 {
-        nir_alu_type T = nir_intrinsic_src_type(instr);
-        enum bi_register_format regfmt = bi_reg_fmt_for_nir(T);
+        /* In principle we can do better for 16-bit. At the moment we require
+         * 32-bit to permit the use of .auto, in order to force .u32 for flat
+         * varyings, to handle internal TGSI shaders that set flat in the VS
+         * but smooth in the FS */
+
+        ASSERTED nir_alu_type T = nir_intrinsic_src_type(instr);
+        assert(nir_alu_type_get_type_size(T) == 32);
+        enum bi_register_format regfmt = BI_REGISTER_FORMAT_AUTO;
 
         unsigned imm_index = 0;
         bool immediate = bi_is_intr_immediate(instr, &imm_index, 16);
index 4cf91c7..3b302db 100644 (file)
@@ -83,7 +83,7 @@ varying_format(nir_alu_type t, unsigned ncomps)
 static void
 collect_varyings(nir_shader *s, nir_variable_mode varying_mode,
                  struct pan_shader_varying *varyings,
-                 unsigned *varying_count)
+                 unsigned *varying_count, bool is_bifrost)
 {
         *varying_count = 0;
 
@@ -113,9 +113,12 @@ collect_varyings(nir_shader *s, nir_variable_mode varying_mode,
                 unsigned chan = comps[loc];
 
                 nir_alu_type type = nir_get_nir_type_for_glsl_base_type(base_type);
-
                 type = nir_alu_type_get_base_type(type);
 
+                /* Can't do type conversion since GLSL IR packs in funny ways */
+                if (is_bifrost && var->data.interpolation == INTERP_MODE_FLAT)
+                        type = nir_type_uint;
+
                 /* Demote to fp16 where possible. int16 varyings are TODO as the hw
                  * will saturate instead of wrap which is not conformant, so we need to
                  * insert i2i16/u2u16 instructions before the st_vary_32i/32u to get
@@ -202,7 +205,7 @@ pan_shader_compile(const struct panfrost_device *dev,
                 info->vs.writes_point_size =
                         s->info.outputs_written & (1 << VARYING_SLOT_PSIZ);
                 collect_varyings(s, nir_var_shader_out, info->varyings.output,
-                                 &info->varyings.output_count);
+                                 &info->varyings.output_count, pan_is_bifrost(dev));
                 break;
         case MESA_SHADER_FRAGMENT:
                 if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
@@ -262,7 +265,7 @@ pan_shader_compile(const struct panfrost_device *dev,
                 info->fs.reads_helper_invocation =
                         BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_HELPER_INVOCATION);
                 collect_varyings(s, nir_var_shader_in, info->varyings.input,
-                                 &info->varyings.input_count);
+                                 &info->varyings.input_count, pan_is_bifrost(dev));
                 break;
         case MESA_SHADER_COMPUTE:
                 info->wls_size = s->info.shared_size;