pan/bi: Lower fragment output with <4 components

author Alyssa Rosenzweig <alyssa@collabora.com>

Tue, 27 Jul 2021 23:46:24 +0000 (19:46 -0400)

committer Marge Bot <eric+marge@anholt.net>

Thu, 29 Jul 2021 23:40:46 +0000 (23:40 +0000)
author Alyssa Rosenzweig <alyssa@collabora.com>
Tue, 27 Jul 2021 23:46:24 +0000 (19:46 -0400)
committer Marge Bot <eric+marge@anholt.net>
Thu, 29 Jul 2021 23:40:46 +0000 (23:40 +0000)
diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c

index a386999..cc6bb87 100644 (file)
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -3179,6 +3179,62 @@ nir_invalidate_divergence(struct nir_builder *b, nir_instr *instr,
          return nir_foreach_ssa_def(instr, nir_invalidate_divergence_ssa, NULL);
  }
  
+/* Ensure we write exactly 4 components */
+static nir_ssa_def *
+bifrost_nir_valid_channel(nir_builder *b, nir_ssa_def *in,
+                          unsigned channel, unsigned first, unsigned mask)
+{
+        if (!(mask & BITFIELD_BIT(channel)))
+                channel = first;
+
+        return nir_channel(b, in, channel);
+}
+
+/* Lower fragment store_output instructions to always write 4 components,
+ * matching the hardware semantic. This may require additional moves. Skipping
+ * these moves is possible in theory, but invokes undefined behaviour in the
+ * compiler. The DDK inserts these moves, so we will as well. */
+
+static bool
+bifrost_nir_lower_blend_components(struct nir_builder *b,
+                                   nir_instr *instr, void *data)
+{
+        if (instr->type != nir_instr_type_intrinsic)
+                return false;
+
+        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+        if (intr->intrinsic != nir_intrinsic_store_output)
+                return false;
+
+        nir_ssa_def *in = intr->src[0].ssa;
+        unsigned first = nir_intrinsic_component(intr);
+        unsigned mask = nir_intrinsic_write_mask(intr);
+
+        assert(first == 0 && "shouldn't get nonzero components");
+
+        /* Nothing to do */
+        if (mask == BITFIELD_MASK(4))
+                return false;
+
+        b->cursor = nir_before_instr(&intr->instr);
+
+        /* Replicate the first valid component instead */
+        nir_ssa_def *replicated =
+                nir_vec4(b, bifrost_nir_valid_channel(b, in, 0, first, mask),
+                            bifrost_nir_valid_channel(b, in, 1, first, mask),
+                            bifrost_nir_valid_channel(b, in, 2, first, mask),
+                            bifrost_nir_valid_channel(b, in, 3, first, mask));
+
+        /* Rewrite to use our replicated version */
+        nir_instr_rewrite_src_ssa(instr, &intr->src[0], replicated);
+        nir_intrinsic_set_component(intr, 0);
+        nir_intrinsic_set_write_mask(intr, 0xF);
+        intr->num_components = 4;
+
+        return true;
+}
+
  static void
  bi_optimize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend)
  {
@@ -3281,6 +3337,13 @@ bi_optimize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend)
          NIR_PASS(progress, nir, bifrost_nir_lower_algebraic_late);
          NIR_PASS(progress, nir, nir_opt_dce);
  
+        if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+                NIR_PASS_V(nir, nir_shader_instructions_pass,
+                           bifrost_nir_lower_blend_components,
+                           nir_metadata_block_index | nir_metadata_dominance,
+                           NULL);
+        }
+
          /* Backend scheduler is purely local, so do some global optimizations
           * to reduce register pressure. */
          nir_move_options move_all =
author	Alyssa Rosenzweig <alyssa@collabora.com>
	Tue, 27 Jul 2021 23:46:24 +0000 (19:46 -0400)
committer	Marge Bot <eric+marge@anholt.net>
	Thu, 29 Jul 2021 23:40:46 +0000 (23:40 +0000)