radeonsi: only mask 1 CU for GS/VS waves on gfx10.3
authorMarek Olšák <marek.olsak@amd.com>
Tue, 24 Nov 2020 22:17:42 +0000 (17:17 -0500)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 1 Dec 2020 20:33:03 +0000 (15:33 -0500)
ported from PAL

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7721>

src/gallium/drivers/radeonsi/si_state.c

index 1112399..ca87c67 100644 (file)
@@ -5175,10 +5175,18 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
          } else {
             late_alloc_wave64 = (num_cu_per_sh - 2) * 4;
 
-            /* CU2 & CU3 disabled because of the dual CU design */
+            /* Gfx10: CU2 & CU3 must be disabled to prevent a hw deadlock.
+             * Others: CU1 must be disabled to prevent a hw deadlock.
+             *
+             * The deadlock is caused by late alloc, which usually increases
+             * performance.
+             */
+            cu_mask_vs &= sctx->chip_class == GFX10 ? ~BITFIELD_RANGE(2, 2) :
+                                                      ~BITFIELD_RANGE(1, 1);
+
             /* Late alloc is not used for NGG on Navi14 due to a hw bug. */
-            cu_mask_vs = 0xfff3;
-            cu_mask_gs = sscreen->use_ngg && sctx->family != CHIP_NAVI14 ? 0xfff3 : 0xffff;
+            if (sscreen->use_ngg && sctx->family != CHIP_NAVI14)
+               cu_mask_gs = cu_mask_vs;
          }
       } else {
          if (!sscreen->info.use_late_alloc) {