radeonsi: implement Z/S fast clear for non-zero mipmap levels
authorMarek Olšák <marek.olsak@amd.com>
Sat, 20 Mar 2021 23:59:06 +0000 (19:59 -0400)
committerMarge Bot <eric+marge@anholt.net>
Tue, 13 Apr 2021 03:17:42 +0000 (03:17 +0000)
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10003>

src/gallium/drivers/radeonsi/si_clear.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_texture.c

index fd8d0d9..d29dba9 100644 (file)
@@ -714,13 +714,13 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
    }
 
    if (zstex && zsbuf->u.tex.first_layer == 0 &&
-       zsbuf->u.tex.last_layer == util_max_layer(&zstex->buffer.b.b, 0) &&
-       /* TODO: enable fast clear for other mipmap levels */
-       zsbuf->u.tex.level == 0) {
+       zsbuf->u.tex.last_layer == util_max_layer(&zstex->buffer.b.b, 0)) {
+      unsigned level = zsbuf->u.tex.level;
+
       /* See whether we should enable TC-compatible HTILE. */
       if (zstex->enable_tc_compatible_htile_next_clear &&
           !zstex->tc_compatible_htile &&
-          si_htile_enabled(zstex, zsbuf->u.tex.level, PIPE_MASK_ZS) &&
+          si_htile_enabled(zstex, level, PIPE_MASK_ZS) &&
           /* If both depth and stencil are present, they must be cleared together. */
           ((buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL ||
            (buffers & PIPE_CLEAR_DEPTH && (!zstex->surface.has_stencil ||
@@ -758,22 +758,23 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
       }
 
       /* TC-compatible HTILE only supports depth clears to 0 or 1. */
-      if (buffers & PIPE_CLEAR_DEPTH && si_htile_enabled(zstex, zsbuf->u.tex.level, PIPE_MASK_Z) &&
+      if (buffers & PIPE_CLEAR_DEPTH && si_htile_enabled(zstex, level, PIPE_MASK_Z) &&
           (!zstex->tc_compatible_htile || depth == 0 || depth == 1)) {
          /* Need to disable EXPCLEAR temporarily if clearing
           * to a new value. */
-         if (!zstex->depth_cleared || zstex->depth_clear_value != depth) {
+         if (!(zstex->depth_cleared_level_mask & BITFIELD_BIT(level)) ||
+             zstex->depth_clear_value[level] != depth) {
             sctx->db_depth_disable_expclear = true;
          }
 
-         if (zstex->depth_clear_value != (float)depth) {
-            if ((zstex->depth_clear_value != 0) != (depth != 0)) {
+         if (zstex->depth_clear_value[level] != (float)depth) {
+            if ((zstex->depth_clear_value[level] != 0) != (depth != 0)) {
                /* ZRANGE_PRECISION register of a bound surface will change so we
                 * must flush the DB caches. */
                needs_db_flush = true;
             }
             /* Update DB_DEPTH_CLEAR. */
-            zstex->depth_clear_value = depth;
+            zstex->depth_clear_value[level] = depth;
             sctx->framebuffer.dirty_zsbuf = true;
             si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
          }
@@ -783,19 +784,20 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
 
       /* TC-compatible HTILE only supports stencil clears to 0. */
       if (buffers & PIPE_CLEAR_STENCIL &&
-          si_htile_enabled(zstex, zsbuf->u.tex.level, PIPE_MASK_S) &&
+          si_htile_enabled(zstex, level, PIPE_MASK_S) &&
           (!zstex->tc_compatible_htile || stencil == 0)) {
          stencil &= 0xff;
 
          /* Need to disable EXPCLEAR temporarily if clearing
           * to a new value. */
-         if (!zstex->stencil_cleared || zstex->stencil_clear_value != stencil) {
+         if (!(zstex->stencil_cleared_level_mask & BITFIELD_BIT(level)) ||
+             zstex->stencil_clear_value[level] != stencil) {
             sctx->db_stencil_disable_expclear = true;
          }
 
-         if (zstex->stencil_clear_value != (uint8_t)stencil) {
+         if (zstex->stencil_clear_value[level] != (uint8_t)stencil) {
             /* Update DB_STENCIL_CLEAR. */
-            zstex->stencil_clear_value = stencil;
+            zstex->stencil_clear_value[level] = stencil;
             sctx->framebuffer.dirty_zsbuf = true;
             si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
          }
@@ -822,14 +824,14 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
    if (sctx->db_depth_clear) {
       sctx->db_depth_clear = false;
       sctx->db_depth_disable_expclear = false;
-      zstex->depth_cleared = true;
+      zstex->depth_cleared_level_mask |= BITFIELD_BIT(zsbuf->u.tex.level);
       si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
    }
 
    if (sctx->db_stencil_clear) {
       sctx->db_stencil_clear = false;
       sctx->db_stencil_disable_expclear = false;
-      zstex->stencil_cleared = true;
+      zstex->stencil_cleared_level_mask |= BITFIELD_BIT(zsbuf->u.tex.level);
       si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
    }
 }
index 70dbf84..b2dba9b 100644 (file)
@@ -367,17 +367,17 @@ struct si_texture {
    unsigned num_planes;
 
    /* Depth buffer compression and fast clear. */
-   float depth_clear_value;
+   float depth_clear_value[RADEON_SURF_MAX_LEVELS];
+   uint8_t stencil_clear_value[RADEON_SURF_MAX_LEVELS];
+   uint16_t depth_cleared_level_mask;   /* if it was cleared at least once */
+   uint16_t stencil_cleared_level_mask; /* if it was cleared at least once */
    uint16_t dirty_level_mask;         /* each bit says if that mipmap is compressed */
    uint16_t stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */
    enum pipe_format db_render_format : 16;
-   uint8_t stencil_clear_value;
    bool fmask_is_identity : 1;
    bool tc_compatible_htile : 1;
    bool enable_tc_compatible_htile_next_clear : 1;
    bool htile_stencil_disabled : 1;
-   bool depth_cleared : 1;   /* if it was cleared at least once */
-   bool stencil_cleared : 1; /* if it was cleared at least once */
    bool upgraded_depth : 1;  /* upgraded from unorm to Z32_FLOAT */
    bool is_depth : 1;
    bool db_compatible : 1;
index ce49d66..60507f5 100644 (file)
@@ -3217,6 +3217,8 @@ static void si_emit_framebuffer_state(struct si_context *sctx)
          }
       }
 
+      unsigned level = zb->base.u.tex.level;
+
       if (sctx->chip_class >= GFX10) {
          radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
          radeon_set_context_reg(cs, R_02801C_DB_DEPTH_SIZE_XY, zb->db_depth_size);
@@ -3224,7 +3226,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx)
          radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 7);
          radeon_emit(cs, S_02803C_RESOURCE_LEVEL(1)); /* DB_DEPTH_INFO */
          radeon_emit(cs, db_z_info |                  /* DB_Z_INFO */
-                            S_028038_ZRANGE_PRECISION(tex->depth_clear_value != 0));
+                            S_028038_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0));
          radeon_emit(cs, db_stencil_info);     /* DB_STENCIL_INFO */
          radeon_emit(cs, zb->db_depth_base);   /* DB_Z_READ_BASE */
          radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_READ_BASE */
@@ -3246,7 +3248,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx)
 
          radeon_set_context_reg_seq(cs, R_028038_DB_Z_INFO, 10);
          radeon_emit(cs, db_z_info |                                   /* DB_Z_INFO */
-                            S_028038_ZRANGE_PRECISION(tex->depth_clear_value != 0));
+                            S_028038_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0));
          radeon_emit(cs, db_stencil_info);                             /* DB_STENCIL_INFO */
          radeon_emit(cs, zb->db_depth_base);                           /* DB_Z_READ_BASE */
          radeon_emit(cs, S_028044_BASE_HI(zb->db_depth_base >> 32));   /* DB_Z_READ_BASE_HI */
@@ -3292,7 +3294,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx)
          radeon_emit(cs, zb->db_depth_info |   /* DB_DEPTH_INFO */
                      S_02803C_ADDR5_SWIZZLE_MASK(!tex->tc_compatible_htile));
          radeon_emit(cs, db_z_info |           /* DB_Z_INFO */
-                            S_028040_ZRANGE_PRECISION(tex->depth_clear_value != 0));
+                            S_028040_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0));
          radeon_emit(cs, db_stencil_info);     /* DB_STENCIL_INFO */
          radeon_emit(cs, zb->db_depth_base);   /* DB_Z_READ_BASE */
          radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_READ_BASE */
@@ -3303,8 +3305,8 @@ static void si_emit_framebuffer_state(struct si_context *sctx)
       }
 
       radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2);
-      radeon_emit(cs, tex->stencil_clear_value);    /* R_028028_DB_STENCIL_CLEAR */
-      radeon_emit(cs, fui(tex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */
+      radeon_emit(cs, tex->stencil_clear_value[level]);    /* R_028028_DB_STENCIL_CLEAR */
+      radeon_emit(cs, fui(tex->depth_clear_value[level])); /* R_02802C_DB_DEPTH_CLEAR */
 
       radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
       radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, db_htile_surface);
index 3d101a3..8fd2962 100644 (file)
@@ -482,14 +482,14 @@ static void si_reallocate_texture_inplace(struct si_context *sctx, struct si_tex
    memcpy(tex->color_clear_value, new_tex->color_clear_value, sizeof(tex->color_clear_value));
    tex->last_msaa_resolve_target_micro_mode = new_tex->last_msaa_resolve_target_micro_mode;
 
-   tex->depth_clear_value = new_tex->depth_clear_value;
+   memcpy(tex->depth_clear_value, new_tex->depth_clear_value, sizeof(tex->depth_clear_value));
    tex->dirty_level_mask = new_tex->dirty_level_mask;
    tex->stencil_dirty_level_mask = new_tex->stencil_dirty_level_mask;
    tex->db_render_format = new_tex->db_render_format;
-   tex->stencil_clear_value = new_tex->stencil_clear_value;
+   memcpy(tex->stencil_clear_value, new_tex->stencil_clear_value, sizeof(tex->stencil_clear_value));
    tex->tc_compatible_htile = new_tex->tc_compatible_htile;
-   tex->depth_cleared = new_tex->depth_cleared;
-   tex->stencil_cleared = new_tex->stencil_cleared;
+   tex->depth_cleared_level_mask = new_tex->depth_cleared_level_mask;
+   tex->stencil_cleared_level_mask = new_tex->stencil_cleared_level_mask;
    tex->upgraded_depth = new_tex->upgraded_depth;
    tex->db_compatible = new_tex->db_compatible;
    tex->can_sample_z = new_tex->can_sample_z;
@@ -938,7 +938,8 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
    /* Use 1.0 as the default clear value to get optimal ZRANGE_PRECISION if we don't
     * get a fast clear.
     */
-   tex->depth_clear_value = 1.0;
+   for (unsigned i = 0; i < ARRAY_SIZE(tex->depth_clear_value); i++)
+      tex->depth_clear_value[i] = 1.0;
 
    /* On GFX8, HTILE uses different tiling depending on the TC_COMPATIBLE_HTILE
     * setting, so we have to enable it if we enabled it at allocation.