From fcf594d00b1e06f6d86c5a31dac4beba3f548b34 Mon Sep 17 00:00:00 2001 From: Asahi Lina Date: Wed, 29 Mar 2023 19:20:12 +0900 Subject: [PATCH] asahi: Implement valid buffer range tracking A common pattern is to allocate a vertex/etc buffer and write to it in subsets. Some games interleave this with draw calls using the buffer. This causes very expensive flushing for every draw call. Fix this by tracking which range of a buffer has been written to, and elide syncs when the range was previously uninitialized. Fixes Source engine game performance and probably helps a bunch of others. Signed-off-by: Asahi Lina Part-of: --- src/gallium/drivers/asahi/agx_batch.c | 6 ++++++ src/gallium/drivers/asahi/agx_pipe.c | 30 ++++++++++++++++++++++++++++++ src/gallium/drivers/asahi/agx_state.h | 4 ++++ 3 files changed, 40 insertions(+) diff --git a/src/gallium/drivers/asahi/agx_batch.c b/src/gallium/drivers/asahi/agx_batch.c index aabf002..8ab4648 100644 --- a/src/gallium/drivers/asahi/agx_batch.c +++ b/src/gallium/drivers/asahi/agx_batch.c @@ -458,6 +458,12 @@ agx_batch_writes(struct agx_batch *batch, struct agx_resource *rsrc) */ agx_writer_remove(ctx, rsrc->bo->handle); agx_writer_add(ctx, agx_batch_idx(batch), rsrc->bo->handle); + + if (rsrc->base.target == PIPE_BUFFER) { + /* Assume BOs written by the GPU are fully valid */ + rsrc->valid_buffer_range.start = 0; + rsrc->valid_buffer_range.end = ~0; + } } /* diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c index c87dadd..18d7270 100644 --- a/src/gallium/drivers/asahi/agx_pipe.c +++ b/src/gallium/drivers/asahi/agx_pipe.c @@ -177,6 +177,11 @@ agx_resource_from_handle(struct pipe_screen *pscreen, /* failure is expected in some cases.. */ } + if (prsc->target == PIPE_BUFFER) { + assert(rsc->layout.tiling == AIL_TILING_LINEAR); + util_range_init(&rsc->valid_buffer_range); + } + return prsc; } @@ -440,6 +445,11 @@ agx_resource_create_with_modifiers(struct pipe_screen *screen, ail_make_miptree(&nresource->layout); + if (templ->target == PIPE_BUFFER) { + assert(nresource->layout.tiling == AIL_TILING_LINEAR); + util_range_init(&nresource->valid_buffer_range); + } + if (dev->ro && (templ->bind & PIPE_BIND_SCANOUT)) { struct winsys_handle handle; assert(util_format_get_blockwidth(templ->format) == 1); @@ -559,6 +569,9 @@ agx_resource_destroy(struct pipe_screen *screen, struct pipe_resource *prsrc) struct agx_resource *rsrc = (struct agx_resource *)prsrc; struct agx_screen *agx_screen = (struct agx_screen *)screen; + if (prsrc->target == PIPE_BUFFER) + util_range_destroy(&rsrc->valid_buffer_range); + if (rsrc->dt) { /* display target */ struct sw_winsys *winsys = agx_screen->winsys; @@ -647,6 +660,12 @@ agx_prepare_for_map(struct agx_context *ctx, struct agx_resource *rsrc, if (!(usage & PIPE_MAP_WRITE)) return; + /* If the range being written is uninitialized, we do not need to sync. */ + if (rsrc->base.target == PIPE_BUFFER && !(rsrc->bo->flags & AGX_BO_SHARED) && + !util_ranges_intersect(&rsrc->valid_buffer_range, box->x, + box->x + box->width)) + return; + /* If there are no readers, we're done. We check at the start to * avoid expensive shadowing paths or duplicated checks in this hapyp path. */ @@ -756,6 +775,17 @@ agx_transfer_map(struct pipe_context *pctx, struct pipe_resource *resource, agx_prepare_for_map(ctx, rsrc, level, usage, box); + /* Track the written buffer range */ + if (resource->target == PIPE_BUFFER) { + /* Note the ordering: DISCARD|WRITE is valid, so clear before adding. */ + if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) + util_range_set_empty(&rsrc->valid_buffer_range); + if (usage & PIPE_MAP_WRITE) { + util_range_add(resource, &rsrc->valid_buffer_range, box->x, + box->x + box->width); + } + } + struct agx_transfer *transfer = CALLOC_STRUCT(agx_transfer); transfer->base.level = level; transfer->base.usage = usage; diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index 78185e5..cc2a5d8 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -23,6 +23,7 @@ #include "util/bitset.h" #include "util/disk_cache.h" #include "util/hash_table.h" +#include "util/u_range.h" #include "agx_meta.h" struct agx_streamout_target { @@ -511,6 +512,9 @@ struct agx_resource { * resources. */ struct agx_resource *separate_stencil; + + /* Valid buffer range tracking, to optimize buffer appends */ + struct util_range valid_buffer_range; }; static inline struct agx_resource * -- 2.7.4