From 3d960491915daebcb2fe3858c5d007e649e36c05 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Mon, 13 Mar 2023 07:52:35 -0400 Subject: [PATCH] aux/tc: make some of the rp tracking api private MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit this enables some more under-the-hood changes without touching the header that will force all of gallium to be recompiled also update/clarify rules for using rp tracking; these haven't changed, but the documentation was less clear before Acked-by: Marek Olšák Part-of: --- src/gallium/auxiliary/util/u_threaded_context.c | 78 ++++++++++++++++--------- src/gallium/auxiliary/util/u_threaded_context.h | 17 +++--- src/gallium/drivers/zink/zink_context.c | 14 ++--- 3 files changed, 66 insertions(+), 43 deletions(-) diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c index a6e1637..75802fd 100644 --- a/src/gallium/auxiliary/util/u_threaded_context.c +++ b/src/gallium/auxiliary/util/u_threaded_context.c @@ -116,6 +116,23 @@ tc_clear_driver_thread(struct threaded_context *tc) #endif } +struct tc_batch_rp_info { + /* this is what drivers can see */ + struct tc_renderpass_info info; + /* determines whether the info can be "safely" read by drivers or if it may still be in use */ + struct util_queue_fence ready; + /* when a batch is full, the rp info rollsover onto 'next' */ + struct tc_batch_rp_info *next; + /* when rp info has rolled over onto this struct, 'prev' is used to update pointers for realloc */ + struct tc_batch_rp_info *prev; +}; + +static struct tc_batch_rp_info * +tc_batch_rp_info(struct tc_renderpass_info *info) +{ + return (struct tc_batch_rp_info *)info; +} + /* ensure the batch's array of renderpass data is large enough for the current index */ static void tc_batch_renderpass_infos_resize(struct threaded_context *tc, struct tc_batch *batch) @@ -123,29 +140,29 @@ tc_batch_renderpass_infos_resize(struct threaded_context *tc, struct tc_batch *b unsigned size = batch->renderpass_infos.capacity; unsigned cur_num = MAX2(batch->renderpass_info_idx, 0); - if (size / sizeof(struct tc_renderpass_info) > cur_num) + if (size / sizeof(struct tc_batch_rp_info) > cur_num) return; - struct tc_renderpass_info *infos = batch->renderpass_infos.data; + struct tc_batch_rp_info *infos = batch->renderpass_infos.data; unsigned old_idx = batch->renderpass_info_idx - 1; bool redo = tc->renderpass_info_recording && - tc->renderpass_info_recording == &infos[old_idx]; - if (!util_dynarray_resize(&batch->renderpass_infos, struct tc_renderpass_info, cur_num + 10)) + tc->renderpass_info_recording == &infos[old_idx].info; + if (!util_dynarray_resize(&batch->renderpass_infos, struct tc_batch_rp_info, cur_num + 10)) mesa_loge("tc: memory alloc fail!"); if (size != batch->renderpass_infos.capacity) { /* zero new allocation region */ uint8_t *data = batch->renderpass_infos.data; memset(data + size, 0, batch->renderpass_infos.capacity - size); - unsigned start = size / sizeof(struct tc_renderpass_info); + unsigned start = size / sizeof(struct tc_batch_rp_info); unsigned count = (batch->renderpass_infos.capacity - size) / - sizeof(struct tc_renderpass_info); + sizeof(struct tc_batch_rp_info); infos = batch->renderpass_infos.data; for (unsigned i = 0; i < count; i++) util_queue_fence_init(&infos[start + i].ready); /* re-set current recording info on resize */ if (redo) - tc->renderpass_info_recording = &infos[old_idx]; + tc->renderpass_info_recording = &infos[old_idx].info; } } @@ -154,8 +171,8 @@ static void tc_signal_renderpass_info_ready(struct threaded_context *tc) { if (tc->renderpass_info_recording && - !util_queue_fence_is_signalled(&tc->renderpass_info_recording->ready)) - util_queue_fence_signal(&tc->renderpass_info_recording->ready); + !util_queue_fence_is_signalled(&tc_batch_rp_info(tc->renderpass_info_recording)->ready)) + util_queue_fence_signal(&tc_batch_rp_info(tc->renderpass_info_recording)->ready); } /* increment the current renderpass info struct for recording @@ -165,7 +182,7 @@ static void tc_batch_increment_renderpass_info(struct threaded_context *tc, bool full_copy) { struct tc_batch *batch = &tc->batch_slots[tc->next]; - struct tc_renderpass_info *tc_info = batch->renderpass_infos.data; + struct tc_batch_rp_info *tc_info = batch->renderpass_infos.data; /* signal existing info since it will not be used anymore */ tc_signal_renderpass_info_ready(tc); @@ -176,21 +193,23 @@ tc_batch_increment_renderpass_info(struct threaded_context *tc, bool full_copy) if (full_copy) { /* copy the previous data in its entirety: this is still the same renderpass */ - if (tc->renderpass_info_recording) - tc_info[batch->renderpass_info_idx].data = tc->renderpass_info_recording->data; - else - tc_info[batch->renderpass_info_idx].data = 0; + if (tc->renderpass_info_recording) { + tc_info[batch->renderpass_info_idx].info.data = tc->renderpass_info_recording->data; + } else { + tc_info[batch->renderpass_info_idx].info.data = 0; + } } else { /* selectively copy: only the CSO metadata is copied, and a new framebuffer state will be added later */ - tc_info[batch->renderpass_info_idx].data = 0; - if (tc->renderpass_info_recording) - tc_info[batch->renderpass_info_idx].data16[2] = tc->renderpass_info_recording->data16[2]; + tc_info[batch->renderpass_info_idx].info.data = 0; + if (tc->renderpass_info_recording) { + tc_info[batch->renderpass_info_idx].info.data16[2] = tc->renderpass_info_recording->data16[2]; + } } util_queue_fence_reset(&tc_info[batch->renderpass_info_idx].ready); - assert(tc->renderpass_info_recording != &tc_info[batch->renderpass_info_idx]); + assert(tc->renderpass_info_recording != &tc_info[batch->renderpass_info_idx].info); /* this is now the current recording renderpass info */ - tc->renderpass_info_recording = &tc_info[batch->renderpass_info_idx]; + tc->renderpass_info_recording = &tc_info[batch->renderpass_info_idx].info; } static ALWAYS_INLINE struct tc_renderpass_info * @@ -300,6 +319,13 @@ tc_drop_vertex_state_references(struct pipe_vertex_state *dst, int num_refs) #define DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX \ offsetof(struct pipe_draw_info, min_index) +ALWAYS_INLINE static struct tc_renderpass_info * +incr_rp_info(struct tc_renderpass_info *tc_info) +{ + struct tc_batch_rp_info *info = tc_batch_rp_info(tc_info); + return &info[1].info; +} + ALWAYS_INLINE static void batch_execute(struct tc_batch *batch, struct pipe_context *pipe, uint64_t *last, bool parsing) { @@ -323,7 +349,7 @@ batch_execute(struct tc_batch *batch, struct pipe_context *pipe, uint64_t *last, if (parsing) { if (call->call_id == TC_CALL_flush) { /* always increment renderpass info for non-deferred flushes */ - batch->tc->renderpass_info++; + batch->tc->renderpass_info = incr_rp_info(batch->tc->renderpass_info); /* if a flush happens, renderpass info is always incremented after */ first = false; } else if (call->call_id == TC_CALL_set_framebuffer_state) { @@ -331,7 +357,7 @@ batch_execute(struct tc_batch *batch, struct pipe_context *pipe, uint64_t *last, * so don't increment on the first set_framebuffer_state call */ if (!first) - batch->tc->renderpass_info++; + batch->tc->renderpass_info = incr_rp_info(batch->tc->renderpass_info); first = false; } else if (call->call_id >= TC_CALL_draw_single && call->call_id <= TC_CALL_draw_vstate_multi) { @@ -5089,9 +5115,9 @@ threaded_context_init_bytes_mapped_limit(struct threaded_context *tc, unsigned d } const struct tc_renderpass_info * -threaded_context_get_renderpass_info(struct threaded_context *tc, bool wait) +threaded_context_get_renderpass_info(struct threaded_context *tc) { - if (tc->renderpass_info && wait) - util_queue_fence_wait(&tc->renderpass_info->ready); - return tc->renderpass_info; -} \ No newline at end of file + struct tc_batch_rp_info *info = tc_batch_rp_info(tc->renderpass_info); + util_queue_fence_wait(&info->ready); + return &info->info; +} diff --git a/src/gallium/auxiliary/util/u_threaded_context.h b/src/gallium/auxiliary/util/u_threaded_context.h index f3b31e0..30913b6 100644 --- a/src/gallium/auxiliary/util/u_threaded_context.h +++ b/src/gallium/auxiliary/util/u_threaded_context.h @@ -472,8 +472,6 @@ struct tc_renderpass_info { /* zsbuf fb info is in data8[3] */ uint8_t data8[8]; }; - /* determines whether the info can be "safely" read by drivers or if it may still be in use */ - struct util_queue_fence ready; }; static inline bool @@ -660,17 +658,18 @@ struct pipe_context *threaded_context_unwrap_sync(struct pipe_context *pipe); void tc_driver_internal_flush_notify(struct threaded_context *tc); /** function for getting the current renderpass info: - * - renderpass info is always valid - * - set 'wait=true' when calling during normal execution - * - set 'wait=true' when calling from flush + * - renderpass info is always non-null * * Rules: - * 1) this must be called with 'wait=true' after the driver receives a pipe_context::set_framebuffer_state callback - * 2) this should be called with 'wait=false' when the driver receives a blocking pipe_context::flush call - * 3) this must not be used during any internal driver operations (e.g., u_blitter) + * - threaded context must have been created with parse_renderpass_info=true + * - must be called after the driver receives a pipe_context::set_framebuffer_state callback + * - must be called after the driver receives a non-deferrable pipe_context::flush callback + * - renderpass info must not be used during any internal driver operations (e.g., u_blitter) + * - must not be called before the driver receives its first pipe_context::set_framebuffer_state callback + * - renderpass info is invalidated only for non-deferrable flushes and new framebuffer states */ const struct tc_renderpass_info * -threaded_context_get_renderpass_info(struct threaded_context *tc, bool wait); +threaded_context_get_renderpass_info(struct threaded_context *tc); struct pipe_context * threaded_context_create(struct pipe_context *pipe, diff --git a/src/gallium/drivers/zink/zink_context.c b/src/gallium/drivers/zink/zink_context.c index cdf9c88..69accf6 100644 --- a/src/gallium/drivers/zink/zink_context.c +++ b/src/gallium/drivers/zink/zink_context.c @@ -61,14 +61,12 @@ #include "util/xxhash.h" static void -update_tc_info(struct zink_context *ctx, bool wait) +update_tc_info(struct zink_context *ctx) { - if (ctx->tc) { - const struct tc_renderpass_info *info = threaded_context_get_renderpass_info(ctx->tc, wait); - if (info) { - ctx->rp_changed |= ctx->dynamic_fb.tc_info.data != info->data; - ctx->dynamic_fb.tc_info.data = info->data; - } + if (ctx->tc && zink_screen(ctx->base.screen)->driver_workarounds.track_renderpasses) { + const struct tc_renderpass_info *info = threaded_context_get_renderpass_info(ctx->tc); + ctx->rp_changed |= ctx->dynamic_fb.tc_info.data != info->data; + ctx->dynamic_fb.tc_info.data = info->data; } } @@ -2745,7 +2743,7 @@ zink_batch_rp(struct zink_context *ctx) } if (!ctx->blitting) { if (ctx->rp_tc_info_updated) - update_tc_info(ctx, true); + update_tc_info(ctx); ctx->rp_tc_info_updated = false; } bool maybe_has_query_ends = !ctx->tc || !zink_screen(ctx->base.screen)->driver_workarounds.track_renderpasses || ctx->dynamic_fb.tc_info.has_query_ends; -- 2.7.4