pan/mdg: Emulate 8-bit with the 16-bit pipe
authorAlyssa Rosenzweig <alyssa@collabora.com>
Fri, 28 Oct 2022 01:28:34 +0000 (21:28 -0400)
committerMarge Bot <emma+marge@anholt.net>
Thu, 1 Dec 2022 00:52:53 +0000 (00:52 +0000)
We don't care to support i8vec16, we just need a bit of 8-bit support to
implement format packing/unpacking in blend shaders. We're already doing
this by using the 16-bit pipe, we just need to commit to it all the way
-- reporting the correct sizes in max_bitsize_for_alu so the mask
packing logic works as intended -- and dropping the imov-specific hack
that was introduced to workaround a similar class of bugs.

With the previous patch, fixes:

   dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.1

Fixes: 39e4b7279dc ("pan/midg: Fix swizzling on 8-bit sources")
Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19763>

src/panfrost/ci/panfrost-t860-fails.txt
src/panfrost/midgard/midgard_compile.c
src/panfrost/midgard/midgard_emit.c
src/panfrost/midgard/mir.c

index 8190d96..9c1d19c 100644 (file)
@@ -4,11 +4,9 @@ dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffer
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.3,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.4,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.5,Fail
-dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.7,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.8,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.9,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.10,Fail
-dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.11,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.12,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.13,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.14,Fail
@@ -17,7 +15,6 @@ dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffer
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.17,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.18,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.19,Fail
-dEQP-GLES3.functional.draw_buffers_indexed.random.max_required_draw_buffers.1,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_required_draw_buffers.2,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_required_draw_buffers.10,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_required_draw_buffers.11,Fail
@@ -25,7 +22,6 @@ dEQP-GLES3.functional.draw_buffers_indexed.random.max_required_draw_buffers.12,F
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_required_draw_buffers.15,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_required_draw_buffers.16,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_required_draw_buffers.17,Fail
-dEQP-GLES3.functional.draw_buffers_indexed.random.max_required_draw_buffers.18,Fail
 dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_mag,Fail
 dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_mag_reverse_dst_x,Fail
 dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_mag_reverse_dst_y,Fail
index 436795d..9ebddce 100644 (file)
@@ -2650,6 +2650,9 @@ max_bitsize_for_alu(midgard_instruction *ins)
         unsigned dst_bitsize = nir_alu_type_get_type_size(ins->dest_type);
         max_bitsize = MAX2(dst_bitsize, max_bitsize);
 
+        /* We emulate 8-bit as 16-bit for simplicity of packing */
+        max_bitsize = MAX2(max_bitsize, 16);
+
         /* We don't have fp16 LUTs, so we'll want to emit code like:
          *
          *      vlut.fsinr hr0, hr0
index 52dd4a8..45b23db 100644 (file)
@@ -340,12 +340,6 @@ mir_pack_vector_srcs(midgard_instruction *ins, midgard_vector_alu *alu)
                 unsigned sz = nir_alu_type_get_type_size(ins->src_types[i]);
                 assert((sz == base_size) || (sz == base_size / 2));
 
-                /* Promote 8bit moves to 16bit ones so we can support any swizzles. */
-                if (sz == 8 && base_size == 8 && ins->op == midgard_alu_op_imov) {
-                        ins->outmod = midgard_outmod_keeplo;
-                        base_size = 16;
-                }
-
                 midgard_src_expand_mode expand_mode = midgard_src_passthrough;
                 unsigned swizzle = mir_pack_swizzle(ins->mask, ins->swizzle[i],
                                                     sz, base_size, channeled,
index 5ff9469..a4ea28f 100644 (file)
@@ -232,20 +232,16 @@ mir_set_bytemask(midgard_instruction *ins, uint16_t bytemask)
         ins->mask = mir_from_bytemask(bytemask, type_size);
 }
 
-/* Checks if we should use an upper destination override, rather than the lower
- * one in the IR. Returns zero if no, returns the bytes to shift otherwise */
-
+/*
+ * Checks if we should use an upper destination override, rather than the lower
+ * one in the IR. If yes, returns the bytes to shift by. If no, returns zero
+ * for a lower override and negative for no override.
+ */
 signed
 mir_upper_override(midgard_instruction *ins, unsigned inst_size)
 {
         unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
 
-        /* 8bit imovs are promoted to 16bit ones with .sext on the source and
-         * .keeplo on the destination to accomodate with non-identity swizzles.
-         */
-        if (ins->op == midgard_alu_op_imov && type_size == 8)
-                return 0;
-
         /* If the sizes are the same, there's nothing to override */
         if (type_size == inst_size)
                 return -1;