From: Alyssa Rosenzweig <alyssa@collabora.com>
Date: Fri, 28 Oct 2022 01:28:34 +0000 (-0400)
Subject: pan/mdg: Emulate 8-bit with the 16-bit pipe
X-Git-Tag: upstream/22.3.5~302
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ba9ee546d587a1100e5d1d4aecb68ef9543aef9c;p=platform%2Fupstream%2Fmesa.git

pan/mdg: Emulate 8-bit with the 16-bit pipe

We don't care to support i8vec16, we just need a bit of 8-bit support to
implement format packing/unpacking in blend shaders. We're already doing
this by using the 16-bit pipe, we just need to commit to it all the way
-- reporting the correct sizes in max_bitsize_for_alu so the mask
packing logic works as intended -- and dropping the imov-specific hack
that was introduced to workaround a similar class of bugs.

With the previous patch, fixes:

   dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.1

Fixes: 39e4b7279dc ("pan/midg: Fix swizzling on 8-bit sources")
Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19763>
(cherry picked from commit 976405907e35629b42501a9f86b067986599cb28)
---

diff --git a/.pick_status.json b/.pick_status.json
index 73ebab1..8454a12 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -5872,7 +5872,7 @@
         "description": "pan/mdg: Emulate 8-bit with the 16-bit pipe",
         "nominated": true,
         "nomination_type": 1,
-        "resolution": 0,
+        "resolution": 1,
         "main_sha": null,
         "because_sha": "39e4b7279dcdcef91a0e829a1938b2816aa4ce75"
     },
diff --git a/src/panfrost/ci/panfrost-t860-fails.txt b/src/panfrost/ci/panfrost-t860-fails.txt
index 8190d96..9c1d19c 100644
--- a/src/panfrost/ci/panfrost-t860-fails.txt
+++ b/src/panfrost/ci/panfrost-t860-fails.txt
@@ -4,11 +4,9 @@ dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffer
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.3,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.4,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.5,Fail
-dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.7,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.8,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.9,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.10,Fail
-dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.11,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.12,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.13,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.14,Fail
@@ -17,7 +15,6 @@ dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffer
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.17,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.18,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.19,Fail
-dEQP-GLES3.functional.draw_buffers_indexed.random.max_required_draw_buffers.1,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_required_draw_buffers.2,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_required_draw_buffers.10,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_required_draw_buffers.11,Fail
@@ -25,7 +22,6 @@ dEQP-GLES3.functional.draw_buffers_indexed.random.max_required_draw_buffers.12,F
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_required_draw_buffers.15,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_required_draw_buffers.16,Fail
 dEQP-GLES3.functional.draw_buffers_indexed.random.max_required_draw_buffers.17,Fail
-dEQP-GLES3.functional.draw_buffers_indexed.random.max_required_draw_buffers.18,Fail
 dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_mag,Fail
 dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_mag_reverse_dst_x,Fail
 dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_mag_reverse_dst_y,Fail
diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c
index 940a98b..69d88eb 100644
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -2650,6 +2650,9 @@ max_bitsize_for_alu(midgard_instruction *ins)
         unsigned dst_bitsize = nir_alu_type_get_type_size(ins->dest_type);
         max_bitsize = MAX2(dst_bitsize, max_bitsize);
 
+        /* We emulate 8-bit as 16-bit for simplicity of packing */
+        max_bitsize = MAX2(max_bitsize, 16);
+
         /* We don't have fp16 LUTs, so we'll want to emit code like:
          *
          *      vlut.fsinr hr0, hr0
diff --git a/src/panfrost/midgard/midgard_emit.c b/src/panfrost/midgard/midgard_emit.c
index 52dd4a8..45b23db 100644
--- a/src/panfrost/midgard/midgard_emit.c
+++ b/src/panfrost/midgard/midgard_emit.c
@@ -340,12 +340,6 @@ mir_pack_vector_srcs(midgard_instruction *ins, midgard_vector_alu *alu)
                 unsigned sz = nir_alu_type_get_type_size(ins->src_types[i]);
                 assert((sz == base_size) || (sz == base_size / 2));
 
-                /* Promote 8bit moves to 16bit ones so we can support any swizzles. */
-                if (sz == 8 && base_size == 8 && ins->op == midgard_alu_op_imov) {
-                        ins->outmod = midgard_outmod_keeplo;
-                        base_size = 16;
-                }
-
                 midgard_src_expand_mode expand_mode = midgard_src_passthrough;
                 unsigned swizzle = mir_pack_swizzle(ins->mask, ins->swizzle[i],
                                                     sz, base_size, channeled,
diff --git a/src/panfrost/midgard/mir.c b/src/panfrost/midgard/mir.c
index 5ff9469..a4ea28f 100644
--- a/src/panfrost/midgard/mir.c
+++ b/src/panfrost/midgard/mir.c
@@ -232,20 +232,16 @@ mir_set_bytemask(midgard_instruction *ins, uint16_t bytemask)
         ins->mask = mir_from_bytemask(bytemask, type_size);
 }
 
-/* Checks if we should use an upper destination override, rather than the lower
- * one in the IR. Returns zero if no, returns the bytes to shift otherwise */
-
+/*
+ * Checks if we should use an upper destination override, rather than the lower
+ * one in the IR. If yes, returns the bytes to shift by. If no, returns zero
+ * for a lower override and negative for no override.
+ */
 signed
 mir_upper_override(midgard_instruction *ins, unsigned inst_size)
 {
         unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
 
-        /* 8bit imovs are promoted to 16bit ones with .sext on the source and
-         * .keeplo on the destination to accomodate with non-identity swizzles.
-         */
-        if (ins->op == midgard_alu_op_imov && type_size == 8)
-                return 0;
-
         /* If the sizes are the same, there's nothing to override */
         if (type_size == inst_size)
                 return -1;