ac/nir: clear unused components before storing XFB outputs to LDS
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 16 Jan 2023 09:13:12 +0000 (10:13 +0100)
committerMarge Bot <emma+marge@anholt.net>
Wed, 18 Jan 2023 07:38:44 +0000 (07:38 +0000)
Shader variables don't always exactly match intrinsics and they might
contain unused slots.

Fixes a bunch of regressions with RADV_PERFTEST=ngg_streamout on RDNA2,
and also fixes RDNA3 NGG streamout.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8099
Fixes: cd22bf90e79 ("ac/nir/ngg: refine nogs outputs handling")
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20735>

src/amd/common/ac_nir_lower_ngg.c

index 5fe8c42..4a0f3bc 100644 (file)
@@ -1684,6 +1684,13 @@ ngg_nogs_store_xfb_outputs_to_lds(nir_builder *b, lower_ngg_nogs_state *s)
          util_bitcount64(b->shader->info.outputs_written & BITFIELD64_MASK(slot));
 
       unsigned mask = xfb_mask[slot];
+
+      /* Clear unused components. */
+      for (unsigned i = 0; i < 4; i++) {
+         if (!s->outputs[slot][i])
+            mask &= ~BITFIELD_BIT(i);
+      }
+
       while (mask) {
          int start, count;
          u_bit_scan_consecutive_range(&mask, &start, &count);
@@ -1706,6 +1713,14 @@ ngg_nogs_store_xfb_outputs_to_lds(nir_builder *b, lower_ngg_nogs_state *s)
       unsigned mask_lo = xfb_mask_16bit_lo[slot];
       unsigned mask_hi = xfb_mask_16bit_hi[slot];
 
+      /* Clear unused components. */
+      for (unsigned i = 0; i < 4; i++) {
+         if (!s->outputs_16bit_lo[slot][i])
+            mask_lo &= ~BITFIELD_BIT(i);
+         if (!s->outputs_16bit_hi[slot][i])
+            mask_hi &= ~BITFIELD_BIT(i);
+      }
+
       nir_ssa_def **outputs_lo = s->outputs_16bit_lo[slot];
       nir_ssa_def **outputs_hi = s->outputs_16bit_hi[slot];
       nir_ssa_def *undef = nir_ssa_undef(b, 1, 16);