radeonsi: fix and enable full DCC with MSAA 2x on gfx9
authorMarek Olšák <marek.olsak@amd.com>
Fri, 19 Mar 2021 20:14:23 +0000 (16:14 -0400)
committerMarge Bot <eric+marge@anholt.net>
Tue, 13 Apr 2021 03:17:42 +0000 (03:17 +0000)
This enables fast clear with any clear color (not just 0/1) for bpp >= 32.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10003>

src/gallium/drivers/radeonsi/si_blit.c
src/gallium/drivers/radeonsi/si_clear.c
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_texture.c

index 75eda35..6f79adc 100644 (file)
@@ -488,6 +488,22 @@ static void si_blit_decompress_color(struct si_context *sctx, struct si_texture
              custom_blend == sctx->custom_blend_dcc_decompress)
             sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
 
+         /* When running FMASK decompresion with DCC, we need to run the "eliminate fast clear" pass
+          * separately because FMASK decompression doesn't eliminate DCC fast clear. This makes
+          * render->texture transitions more expensive. It can be disabled by
+          * allow_dcc_msaa_clear_to_reg_for_bpp.
+          *
+          * TODO: When we get here, change the compression to TC-compatible on the next clear
+          *       to disable both the FMASK decompression and fast clear elimination passes.
+          */
+         if (sctx->screen->allow_dcc_msaa_clear_to_reg_for_bpp[util_logbase2(tex->surface.bpe)] &&
+             custom_blend == sctx->custom_blend_fmask_decompress &&
+             vi_dcc_enabled(tex, level)) {
+            si_blitter_begin(sctx, SI_DECOMPRESS);
+            util_blitter_custom_color(sctx->blitter, cbsurf, sctx->custom_blend_eliminate_fastclear);
+            si_blitter_end(sctx);
+         }
+
          pipe_surface_reference(&cbsurf, NULL);
       }
 
index ee69e87..adeff6f 100644 (file)
@@ -644,8 +644,8 @@ static void si_fast_clear(struct si_context *sctx, unsigned *buffers,
          if (level > 0 && (eliminate_needed || !sctx->screen->info.has_dcc_constant_encode))
             continue;
 
-         /* TODO: This DCC+CMASK clear doesn't work with MSAA. */
-         if (tex->buffer.b.b.nr_samples >= 2 && tex->cmask_buffer && eliminate_needed)
+         if (tex->buffer.b.b.nr_samples >= 2 && eliminate_needed &&
+             !sctx->screen->allow_dcc_msaa_clear_to_reg_for_bpp[util_logbase2(tex->surface.bpe)])
             continue;
 
          assert(num_clears < ARRAY_SIZE(info));
index 9181aef..ede7560 100644 (file)
@@ -1204,6 +1204,14 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
    sscreen->use_ngg_culling = sscreen->use_ngg && !(sscreen->debug_flags & DBG(NO_NGG_CULLING));
    sscreen->use_ngg_streamout = false;
 
+   /* Only set this for the cases that are known to work, which are:
+    * - GFX9 if bpp >= 4 (in bytes)
+    */
+   if (sscreen->info.chip_class == GFX9) {
+      for (unsigned bpp_log2 = util_logbase2(4); bpp_log2 <= util_logbase2(16); bpp_log2++)
+         sscreen->allow_dcc_msaa_clear_to_reg_for_bpp[bpp_log2] = true;
+   }
+
    /* Only enable primitive binning on APUs by default. */
    if (sscreen->info.chip_class >= GFX10) {
       sscreen->dpbb_allowed = true;
index b18dd62..12d8060 100644 (file)
@@ -557,6 +557,7 @@ struct si_screen {
    bool use_ngg;
    bool use_ngg_culling;
    bool use_ngg_streamout;
+   bool allow_dcc_msaa_clear_to_reg_for_bpp[5]; /* indexed by log2(Bpp) */
 
    struct {
 #define OPT_BOOL(name, dflt, description) bool name : 1;
index a99d55b..578714e 100644 (file)
@@ -233,8 +233,7 @@ static int si_init_surface(struct si_screen *sscreen, struct radeon_surf *surfac
 
       case GFX9:
          /* DCC clear for 4x and 8x MSAA textures unimplemented. */
-         if (ptex->nr_storage_samples >= 4 ||
-             (sscreen->info.family == CHIP_RAVEN && ptex->nr_storage_samples >= 2 && bpe < 4))
+         if (ptex->nr_storage_samples >= 4)
             flags |= RADEON_SURF_DISABLE_DCC;
          break;