r600: Elide downloads for discarded and immutable compute memories.
authorJason Volk <jason@zemos.net>
Fri, 22 Apr 2022 01:12:53 +0000 (18:12 -0700)
committerMarge Bot <emma+marge@anholt.net>
Thu, 12 May 2022 08:25:52 +0000 (08:25 +0000)
Compute memory item demotion invokes a device to host transfer unconditionally,
but there are at least two cases where this is not necessary:

1. The item is mapped for discarding with PIPE_MAP_DISCARD_RANGE (e.g.
CL_MAP_WRITE_INVALIDATE_REGION).

2. The item cannot be written to by the device.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16116>

src/gallium/drivers/r600/compute_memory_pool.c
src/gallium/drivers/r600/evergreen_compute.c

index 0157b05..2e4b26c 100644 (file)
@@ -430,12 +430,18 @@ void compute_memory_demote_item(struct compute_memory_pool *pool,
        dst = (struct pipe_resource *)item->real_buffer;
 
        /* We transfer the memory from the item in the pool to the
-        * temporary buffer */
-       u_box_1d(item->start_in_dw * 4, item->size_in_dw * 4, &box);
+        * temporary buffer. Download is skipped for items:
+        * - Not mapped for reading or writing (PIPE_MAP_DISCARD_RANGE).
+        * - Not writable by the device. */
+       if ((item->status & (ITEM_MAPPED_FOR_READING|ITEM_MAPPED_FOR_WRITING)) &&
+               !(r600_resource(dst)->flags & RADEON_FLAG_READ_ONLY)) {
+
+               u_box_1d(item->start_in_dw * 4, item->size_in_dw * 4, &box);
 
-       rctx->b.b.resource_copy_region(pipe,
-               dst, 0, 0, 0, 0,
-               src, 0, &box);
+               rctx->b.b.resource_copy_region(pipe,
+                       dst, 0, 0, 0, 0,
+                       src, 0, &box);
+       }
 
        /* Remember to mark the buffer as 'pending' by setting start_in_dw to -1 */
        item->start_in_dw = -1;
index 4d10e2b..18a50b5 100644 (file)
@@ -1247,6 +1247,12 @@ void *r600_compute_global_transfer_map(struct pipe_context *ctx,
        struct pipe_resource *dst = NULL;
        unsigned offset = box->x;
 
+       if (usage & PIPE_MAP_READ)
+               buffer->chunk->status |= ITEM_MAPPED_FOR_READING;
+
+       if (usage & PIPE_MAP_WRITE)
+               buffer->chunk->status |= ITEM_MAPPED_FOR_WRITING;
+
        if (is_item_in_pool(item)) {
                compute_memory_demote_item(pool, item, ctx);
        }
@@ -1259,9 +1265,6 @@ void *r600_compute_global_transfer_map(struct pipe_context *ctx,
 
        dst = (struct pipe_resource*)item->real_buffer;
 
-       if (usage & PIPE_MAP_READ)
-               buffer->chunk->status |= ITEM_MAPPED_FOR_READING;
-
        COMPUTE_DBG(rctx->screen, "* r600_compute_global_transfer_map()\n"
                        "level = %u, usage = %u, box(x = %u, y = %u, z = %u "
                        "width = %u, height = %u, depth = %u)\n", level, usage,
@@ -1282,7 +1285,7 @@ void *r600_compute_global_transfer_map(struct pipe_context *ctx,
 
        ///TODO: do it better, mapping is not possible if the pool is too big
        return pipe_buffer_map_range(ctx, dst,
-                       offset, box->width, usage, ptransfer);
+                       offset, box->width, usage & ~PIPE_MAP_READ, ptransfer);
 }
 
 void r600_compute_global_transfer_unmap(struct pipe_context *ctx,