radeonsi: Put retile map in separate buffers.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Thu, 30 Jul 2020 15:14:38 +0000 (17:14 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Sat, 19 Sep 2020 07:15:25 +0000 (03:15 -0400)
The retile maps are a software mechanism and hence very suceptible
to change. As such I'd like to avoid making it part of the cross
driver ABI.

Ideally we'd just use the cached tile info + a shader to avoid these
buffers altogether.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6783>

src/amd/common/ac_surface.c
src/amd/common/ac_surface.h
src/gallium/drivers/radeonsi/si_compute_blit.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_texture.c

index 484db1d..df5b35b 100644 (file)
@@ -1642,34 +1642,32 @@ static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_
             /* Align the size to 4 (for the compute shader). */
             surf->u.gfx9.dcc_retile_num_elements = align(surf->u.gfx9.dcc_retile_num_elements, 4);
 
-            if (!(surf->flags & RADEON_SURF_IMPORTED)) {
-               /* Compute address mapping from non-displayable to displayable DCC. */
-               ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT addrin;
-               memset(&addrin, 0, sizeof(addrin));
-               addrin.size = sizeof(addrin);
-               addrin.swizzleMode = din.swizzleMode;
-               addrin.resourceType = din.resourceType;
-               addrin.bpp = din.bpp;
-               addrin.numSlices = 1;
-               addrin.numMipLevels = 1;
-               addrin.numFrags = 1;
-               addrin.pitch = dout.pitch;
-               addrin.height = dout.height;
-               addrin.compressBlkWidth = dout.compressBlkWidth;
-               addrin.compressBlkHeight = dout.compressBlkHeight;
-               addrin.compressBlkDepth = dout.compressBlkDepth;
-               addrin.metaBlkWidth = dout.metaBlkWidth;
-               addrin.metaBlkHeight = dout.metaBlkHeight;
-               addrin.metaBlkDepth = dout.metaBlkDepth;
-               addrin.dccRamSliceSize = 0; /* Don't care for non-layered images. */
-
-               surf->u.gfx9.dcc_retile_map = ac_compute_dcc_retile_map(
-                  addrlib, info, retile_dim[0], retile_dim[1], surf->u.gfx9.dcc.rb_aligned,
-                  surf->u.gfx9.dcc.pipe_aligned, surf->u.gfx9.dcc_retile_use_uint16,
-                  surf->u.gfx9.dcc_retile_num_elements, &addrin);
-               if (!surf->u.gfx9.dcc_retile_map)
-                  return ADDR_OUTOFMEMORY;
-            }
+            /* Compute address mapping from non-displayable to displayable DCC. */
+            ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT addrin;
+            memset(&addrin, 0, sizeof(addrin));
+            addrin.size = sizeof(addrin);
+            addrin.swizzleMode = din.swizzleMode;
+            addrin.resourceType = din.resourceType;
+            addrin.bpp = din.bpp;
+            addrin.numSlices = 1;
+            addrin.numMipLevels = 1;
+            addrin.numFrags = 1;
+            addrin.pitch = dout.pitch;
+            addrin.height = dout.height;
+            addrin.compressBlkWidth = dout.compressBlkWidth;
+            addrin.compressBlkHeight = dout.compressBlkHeight;
+            addrin.compressBlkDepth = dout.compressBlkDepth;
+            addrin.metaBlkWidth = dout.metaBlkWidth;
+            addrin.metaBlkHeight = dout.metaBlkHeight;
+            addrin.metaBlkDepth = dout.metaBlkDepth;
+            addrin.dccRamSliceSize = 0; /* Don't care for non-layered images. */
+
+            surf->u.gfx9.dcc_retile_map = ac_compute_dcc_retile_map(
+               addrlib, info, retile_dim[0], retile_dim[1], surf->u.gfx9.dcc.rb_aligned,
+               surf->u.gfx9.dcc.pipe_aligned, surf->u.gfx9.dcc_retile_use_uint16,
+               surf->u.gfx9.dcc_retile_num_elements, &addrin);
+            if (!surf->u.gfx9.dcc_retile_map)
+               return ADDR_OUTOFMEMORY;
          }
       }
 
@@ -2102,6 +2100,11 @@ int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *inf
    surf->total_size = surf->surf_size;
    surf->alignment = surf->surf_alignment;
 
+   /* Ensure the offsets are always 0 if not available. */
+   surf->dcc_offset = surf->display_dcc_offset = 0;
+   surf->fmask_offset = surf->cmask_offset = 0;
+   surf->htile_offset = 0;
+
    if (surf->htile_size) {
       surf->htile_offset = align64(surf->total_size, surf->htile_alignment);
       surf->total_size = surf->htile_offset + surf->htile_size;
@@ -2135,17 +2138,6 @@ int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *inf
          /* Add space for the displayable DCC buffer. */
          surf->display_dcc_offset = align64(surf->total_size, surf->u.gfx9.display_dcc_alignment);
          surf->total_size = surf->display_dcc_offset + surf->u.gfx9.display_dcc_size;
-
-         /* Add space for the DCC retile buffer. (16-bit or 32-bit elements) */
-         surf->dcc_retile_map_offset = align64(surf->total_size, info->tcc_cache_line_size);
-
-         if (surf->u.gfx9.dcc_retile_use_uint16) {
-            surf->total_size =
-               surf->dcc_retile_map_offset + surf->u.gfx9.dcc_retile_num_elements * 2;
-         } else {
-            surf->total_size =
-               surf->dcc_retile_map_offset + surf->u.gfx9.dcc_retile_num_elements * 4;
-         }
       }
 
       surf->dcc_offset = align64(surf->total_size, surf->dcc_alignment);
@@ -2161,7 +2153,6 @@ void ac_surface_zero_dcc_fields(struct radeon_surf *surf)
 {
    surf->dcc_offset = 0;
    surf->display_dcc_offset = 0;
-   surf->dcc_retile_map_offset = 0;
 }
 
 static unsigned eg_tile_split(unsigned tile_split)
@@ -2493,6 +2484,4 @@ void ac_surface_override_offset_stride(const struct radeon_info *info, struct ra
       surf->dcc_offset += offset;
    if (surf->display_dcc_offset)
       surf->display_dcc_offset += offset;
-   if (surf->dcc_retile_map_offset)
-      surf->dcc_retile_map_offset += offset;
 }
index ec6d044..eafe114 100644 (file)
@@ -249,7 +249,6 @@ struct radeon_surf {
    uint64_t cmask_offset;
    uint64_t dcc_offset;
    uint64_t display_dcc_offset;
-   uint64_t dcc_retile_map_offset;
    uint64_t total_size;
    uint32_t alignment;
 
index 958fa6e..a66968c 100644 (file)
@@ -626,18 +626,18 @@ void si_retile_dcc(struct si_context *sctx, struct si_texture *tex)
    unsigned num_elements = tex->surface.u.gfx9.dcc_retile_num_elements;
    struct pipe_image_view img[3];
 
-   assert(tex->surface.dcc_retile_map_offset && tex->surface.dcc_retile_map_offset <= UINT_MAX);
+   assert(tex->dcc_retile_buffer);
    assert(tex->surface.dcc_offset && tex->surface.dcc_offset <= UINT_MAX);
    assert(tex->surface.display_dcc_offset && tex->surface.display_dcc_offset <= UINT_MAX);
 
    for (unsigned i = 0; i < 3; i++) {
-      img[i].resource = &tex->buffer.b.b;
+      img[i].resource = i == 0 ? &tex->dcc_retile_buffer->b.b : &tex->buffer.b.b;
       img[i].access = i == 2 ? PIPE_IMAGE_ACCESS_WRITE : PIPE_IMAGE_ACCESS_READ;
       img[i].shader_access = SI_IMAGE_ACCESS_AS_BUFFER;
    }
 
    img[0].format = use_uint16 ? PIPE_FORMAT_R16G16B16A16_UINT : PIPE_FORMAT_R32G32B32A32_UINT;
-   img[0].u.buf.offset = tex->surface.dcc_retile_map_offset;
+   img[0].u.buf.offset = 0;
    img[0].u.buf.size = num_elements * (use_uint16 ? 2 : 4);
 
    img[1].format = PIPE_FORMAT_R8_UINT;
index 355033c..faa87ab 100644 (file)
@@ -384,6 +384,8 @@ struct si_texture {
    unsigned ps_draw_ratio;
    /* The number of clears since the last DCC usage analysis. */
    unsigned num_slow_clears;
+
+   struct si_resource *dcc_retile_buffer;
 };
 
 struct si_surface {
index 0143911..448e749 100644 (file)
@@ -534,6 +534,7 @@ static void si_reallocate_texture_inplace(struct si_context *sctx, struct si_tex
    tex->dcc_gather_statistics = new_tex->dcc_gather_statistics;
    si_resource_reference(&tex->dcc_separate_buffer, new_tex->dcc_separate_buffer);
    si_resource_reference(&tex->last_dcc_separate_buffer, new_tex->last_dcc_separate_buffer);
+   si_resource_reference(&tex->dcc_retile_buffer, new_tex->dcc_retile_buffer);
 
    if (new_bind_flag == PIPE_BIND_LINEAR) {
       assert(!tex->surface.htile_offset);
@@ -813,6 +814,7 @@ static void si_texture_destroy(struct pipe_screen *screen, struct pipe_resource
    pb_reference(&resource->buf, NULL);
    si_resource_reference(&tex->dcc_separate_buffer, NULL);
    si_resource_reference(&tex->last_dcc_separate_buffer, NULL);
+   si_resource_reference(&tex->dcc_retile_buffer, NULL);
    FREE(tex);
 }
 
@@ -1145,41 +1147,44 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
             }
          }
       }
+   }
 
-      /* Initialize displayable DCC that requires the retile blit. */
-      if (tex->surface.dcc_retile_map_offset) {
-         /* Uninitialized DCC can hang the display hw.
-          * Clear to white to indicate that. */
-         si_screen_clear_buffer(sscreen, &tex->buffer.b.b, tex->surface.display_dcc_offset,
-                                tex->surface.u.gfx9.display_dcc_size, DCC_CLEAR_COLOR_1111);
+   /* Initialize displayable DCC that requires the retile blit. */
+   if (tex->surface.display_dcc_offset) {
+      /* Uninitialized DCC can hang the display hw.
+       * Clear to white to indicate that. */
+      si_screen_clear_buffer(sscreen, &tex->buffer.b.b, tex->surface.display_dcc_offset,
+                             tex->surface.u.gfx9.display_dcc_size, DCC_CLEAR_COLOR_1111);
 
-         /* Upload the DCC retile map.
-          * Use a staging buffer for the upload, because
-          * the buffer backing the texture is unmappable.
-          */
-         bool use_uint16 = tex->surface.u.gfx9.dcc_retile_use_uint16;
-         unsigned num_elements = tex->surface.u.gfx9.dcc_retile_num_elements;
-         unsigned dcc_retile_map_size = num_elements * (use_uint16 ? 2 : 4);
-         struct si_resource *buf = si_aligned_buffer_create(screen, 0, PIPE_USAGE_STREAM,
-                                                            dcc_retile_map_size,
-                                                            sscreen->info.tcc_cache_line_size);
-         void *map = sscreen->ws->buffer_map(buf->buf, NULL, PIPE_TRANSFER_WRITE);
-
-         /* Upload the retile map into the staging buffer. */
-         memcpy(map, tex->surface.u.gfx9.dcc_retile_map, dcc_retile_map_size);
-
-         /* Copy the staging buffer to the buffer backing the texture. */
-         struct si_context *sctx = (struct si_context *)sscreen->aux_context;
-
-         assert(tex->surface.dcc_retile_map_offset <= UINT_MAX);
-         simple_mtx_lock(&sscreen->aux_context_lock);
-         si_sdma_copy_buffer(sctx, &tex->buffer.b.b, &buf->b.b, tex->surface.dcc_retile_map_offset,
-                             0, buf->b.b.width0);
-         sscreen->aux_context->flush(sscreen->aux_context, NULL, 0);
-         simple_mtx_unlock(&sscreen->aux_context_lock);
-
-         si_resource_reference(&buf, NULL);
-      }
+      /* Upload the DCC retile map.
+       * Use a staging buffer for the upload, because
+       * the buffer backing the texture is unmappable.
+       */
+      bool use_uint16 = tex->surface.u.gfx9.dcc_retile_use_uint16;
+      unsigned num_elements = tex->surface.u.gfx9.dcc_retile_num_elements;
+      unsigned dcc_retile_map_size = num_elements * (use_uint16 ? 2 : 4);
+
+      tex->dcc_retile_buffer = si_aligned_buffer_create(screen, 0, PIPE_USAGE_DEFAULT,
+                                                        dcc_retile_map_size,
+                                                        sscreen->info.tcc_cache_line_size);
+      struct si_resource *buf = si_aligned_buffer_create(screen, 0, PIPE_USAGE_STREAM,
+                                                         dcc_retile_map_size,
+                                                         sscreen->info.tcc_cache_line_size);
+      void *map = sscreen->ws->buffer_map(buf->buf, NULL, PIPE_TRANSFER_WRITE);
+
+      /* Upload the retile map into the staging buffer. */
+      memcpy(map, tex->surface.u.gfx9.dcc_retile_map, dcc_retile_map_size);
+
+      /* Copy the staging buffer to the buffer backing the texture. */
+      struct si_context *sctx = (struct si_context *)sscreen->aux_context;
+
+      simple_mtx_lock(&sscreen->aux_context_lock);
+      si_sdma_copy_buffer(sctx, &tex->dcc_retile_buffer->b.b, &buf->b.b, 0,
+                          0, buf->b.b.width0);
+      sscreen->aux_context->flush(sscreen->aux_context, NULL, 0);
+      simple_mtx_unlock(&sscreen->aux_context_lock);
+
+      si_resource_reference(&buf, NULL);
    }
 
    /* Initialize the CMASK base register value. */