From 41ffb15de564849668c942aa79a570b6f262de99 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Thu, 22 Sep 2022 16:42:19 -0400 Subject: [PATCH] zink: implement async gfx precompile the pipe_context::link_shader hook is called when shaders are linked into a program by the application by leveraging this, it becomes possible to utilize the existing graphics pipeline library to implement precompilation by creating a partial pipeline containing only the shader stages and then adding in the vertex input and fragment output stages dynamically using the fast-link feature if all goes well, and if the vulkan driver's fast-linking is truly fast, the full pipeline should be dynamically combined in time to avoid stuttering, and an optimized variant will be queued for async compile to be used the next time the pipeline triggers a draw Part-of: --- src/gallium/drivers/zink/zink_compiler.c | 1 + src/gallium/drivers/zink/zink_program.c | 82 +++++++++++++++++++++++++ src/gallium/drivers/zink/zink_program.h | 2 + src/gallium/drivers/zink/zink_program_state.hpp | 7 ++- src/gallium/drivers/zink/zink_screen.c | 17 +++-- src/gallium/drivers/zink/zink_types.h | 3 + 6 files changed, 106 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 9b2fda5..5d5582e 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -3387,6 +3387,7 @@ zink_shader_free(struct zink_screen *screen, struct zink_shader *shader) _mesa_hash_table_remove(ht, he); prog->base.removed = true; simple_mtx_unlock(&prog->ctx->program_lock[idx]); + util_queue_fence_wait(&prog->base.cache_fence); } if (stage != MESA_SHADER_TESS_CTRL || !shader->is_generated) { prog->shaders[stage] = NULL; diff --git a/src/gallium/drivers/zink/zink_program.c b/src/gallium/drivers/zink/zink_program.c index c1a183b..ce8a405 100644 --- a/src/gallium/drivers/zink/zink_program.c +++ b/src/gallium/drivers/zink/zink_program.c @@ -607,6 +607,8 @@ ALWAYS_INLINE static void update_gfx_shader_module_optimal(struct zink_context *ctx, struct zink_gfx_program *prog, gl_shader_stage pstage) { struct zink_screen *screen = zink_screen(ctx->base.screen); + if (screen->info.have_EXT_graphics_pipeline_library) + util_queue_fence_wait(&prog->base.cache_fence); struct zink_shader_module *zm = get_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state); if (!zm) zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state); @@ -675,6 +677,28 @@ zink_gfx_program_update_optimal(struct zink_context *ctx) } static void +optimized_compile_job(void *data, void *gdata, int thread_index) +{ + struct zink_gfx_pipeline_cache_entry *pc_entry = data; + struct zink_screen *screen = gdata; + VkPipeline pipeline; + if (pc_entry->gkey) + pipeline = zink_create_gfx_pipeline_combined(screen, pc_entry->prog, pc_entry->ikey->pipeline, pc_entry->gkey->pipeline, pc_entry->okey->pipeline, false); + else + pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, &pc_entry->state, pc_entry->state.element_state->binding_map, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true); + if (pipeline) { + pc_entry->unoptimized_pipeline = pc_entry->pipeline; + pc_entry->pipeline = pipeline; + } +} + +void +zink_gfx_program_compile_queue(struct zink_context *ctx, struct zink_gfx_pipeline_cache_entry *pc_entry) +{ + util_queue_add_job(&zink_screen(ctx->base.screen)->cache_get_thread, pc_entry, &pc_entry->fence, optimized_compile_job, NULL, 0); +} + +static void update_cs_shader_module(struct zink_context *ctx, struct zink_compute_program *comp) { struct zink_screen *screen = zink_screen(ctx->base.screen); @@ -1149,7 +1173,9 @@ zink_destroy_gfx_program(struct zink_screen *screen, hash_table_foreach(&prog->pipelines[r][i], entry) { struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data; + util_queue_fence_wait(&pc_entry->fence); VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL); + VKSCR(DestroyPipeline)(screen->dev, pc_entry->unoptimized_pipeline, NULL); free(pc_entry); } } @@ -1540,6 +1566,59 @@ zink_create_pipeline_lib(struct zink_screen *screen, struct zink_gfx_program *pr return gkey; } +static void +precompile_job(void *data, void *gdata, int thread_index) +{ + struct zink_screen *screen = gdata; + struct zink_gfx_program *prog = data; + + struct zink_gfx_pipeline_state state = {0}; + state.shader_keys_optimal.key.vs_base.last_vertex_stage = true; + generate_gfx_program_modules_optimal(NULL, screen, prog, &state); + zink_screen_get_pipeline_cache(screen, &prog->base, true); + zink_create_pipeline_lib(screen, prog, &state); + zink_screen_update_pipeline_cache(screen, &prog->base, true); +} + +static void +zink_link_gfx_shader(struct pipe_context *pctx, void **shaders) +{ + struct zink_context *ctx = zink_context(pctx); + struct zink_shader **zshaders = (struct zink_shader **)shaders; + if (shaders[MESA_SHADER_COMPUTE]) + return; + /* can't precompile fixedfunc */ + if (!shaders[MESA_SHADER_VERTEX] || !shaders[MESA_SHADER_FRAGMENT]) + return; + unsigned hash = 0; + unsigned shader_stages = 0; + for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) { + if (zshaders[i]) { + hash ^= zshaders[i]->hash; + shader_stages |= BITFIELD_BIT(i); + } + } + unsigned tess_stages = BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL); + unsigned tess = shader_stages & tess_stages; + /* can't do fixedfunc tes either */ + if (tess && !shaders[MESA_SHADER_TESS_EVAL]) + return; + struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(shader_stages)]; + simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]); + /* link can be called repeatedly with the same shaders: ignore */ + if (_mesa_hash_table_search_pre_hashed(ht, hash, shaders)) { + simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]); + return; + } + struct zink_gfx_program *prog = zink_create_gfx_program(ctx, zshaders, 3); + u_foreach_bit(i, shader_stages) + assert(prog->shaders[i]); + _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog); + simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]); + // precompile_job(prog, ctx, 0); + util_queue_add_job(&zink_screen(pctx->screen)->cache_get_thread, prog, &prog->base.cache_fence, precompile_job, NULL, 0); +} + void zink_program_init(struct zink_context *ctx) { @@ -1585,6 +1664,9 @@ zink_program_init(struct zink_context *ctx) offsetof(struct zink_gfx_input_key, element_state) - offsetof(struct zink_gfx_input_key, input)); STATIC_ASSERT(sizeof(union zink_shader_key_optimal) == sizeof(uint32_t)); + + if (zink_screen(ctx->base.screen)->info.have_EXT_graphics_pipeline_library) + ctx->base.link_shader = zink_link_gfx_shader; } bool diff --git a/src/gallium/drivers/zink/zink_program.h b/src/gallium/drivers/zink/zink_program.h index 7cabc4b..9b2848a 100644 --- a/src/gallium/drivers/zink/zink_program.h +++ b/src/gallium/drivers/zink/zink_program.h @@ -126,6 +126,8 @@ uint32_t hash_gfx_output_ds3(const void *key); uint32_t hash_gfx_input(const void *key); uint32_t hash_gfx_input_dynamic(const void *key); +void +zink_gfx_program_compile_queue(struct zink_context *ctx, struct zink_gfx_pipeline_cache_entry *pc_entry); static inline unsigned get_primtype_idx(enum pipe_prim_type mode) diff --git a/src/gallium/drivers/zink/zink_program_state.hpp b/src/gallium/drivers/zink/zink_program_state.hpp index 3f41561..2bf588e 100644 --- a/src/gallium/drivers/zink/zink_program_state.hpp +++ b/src/gallium/drivers/zink/zink_program_state.hpp @@ -221,6 +221,8 @@ zink_get_gfx_pipeline(struct zink_context *ctx, if (!pc_entry) return VK_NULL_HANDLE; memcpy(&pc_entry->state, state, sizeof(*state)); + pc_entry->prog = prog; + util_queue_fence_init(&pc_entry->fence); entry = _mesa_hash_table_insert_pre_hashed(&prog->pipelines[rp_idx][idx], state->final_hash, pc_entry, pc_entry); if (HAVE_LIB && /* TODO: if there's ever a dynamic render extension with input attachments */ @@ -247,13 +249,16 @@ zink_get_gfx_pipeline(struct zink_context *ctx, pc_entry->okey = okey; pipeline = zink_create_gfx_pipeline_combined(screen, prog, ikey->pipeline, gkey->pipeline, okey->pipeline, true); } else { - pipeline = zink_create_gfx_pipeline(screen, prog, state, state->element_state->binding_map, vkmode, true); + /* optimize by default only when expecting precompiles in order to reduce stuttering */ + pipeline = zink_create_gfx_pipeline(screen, prog, state, state->element_state->binding_map, vkmode, !HAVE_LIB); } if (pipeline == VK_NULL_HANDLE) return VK_NULL_HANDLE; zink_screen_update_pipeline_cache(screen, &prog->base, false); pc_entry->pipeline = pipeline; + if (HAVE_LIB) + zink_gfx_program_compile_queue(ctx, pc_entry); } struct zink_gfx_pipeline_cache_entry *cache_entry = (struct zink_gfx_pipeline_cache_entry *)entry->data; diff --git a/src/gallium/drivers/zink/zink_screen.c b/src/gallium/drivers/zink/zink_screen.c index d593138..c52edaf 100644 --- a/src/gallium/drivers/zink/zink_screen.c +++ b/src/gallium/drivers/zink/zink_screen.c @@ -176,11 +176,18 @@ zink_set_max_shader_compiler_threads(struct pipe_screen *pscreen, unsigned max_t static bool zink_is_parallel_shader_compilation_finished(struct pipe_screen *screen, void *shader, enum pipe_shader_type shader_type) { - /* not supported yet */ - if (shader_type != MESA_SHADER_COMPUTE) - return true; - struct zink_program *pg = shader; - return !pg->can_precompile || util_queue_fence_is_signalled(&pg->cache_fence); + if (shader_type == MESA_SHADER_COMPUTE) { + struct zink_program *pg = shader; + return !pg->can_precompile || util_queue_fence_is_signalled(&pg->cache_fence); + } + + struct zink_shader *zs = shader; + bool finished = true; + set_foreach(zs->programs, entry) { + struct zink_gfx_program *prog = (void*)entry->key; + finished &= util_queue_fence_is_signalled(&prog->base.cache_fence); + } + return finished; } static VkDeviceSize diff --git a/src/gallium/drivers/zink/zink_types.h b/src/gallium/drivers/zink/zink_types.h index b2e25cf..01b1af1 100644 --- a/src/gallium/drivers/zink/zink_types.h +++ b/src/gallium/drivers/zink/zink_types.h @@ -830,9 +830,12 @@ struct zink_gfx_pipeline_cache_entry { struct zink_gfx_pipeline_state state; VkPipeline pipeline; /* GPL only */ + struct util_queue_fence fence; struct zink_gfx_input_key *ikey; struct zink_gfx_library_key *gkey; struct zink_gfx_output_key *okey; + struct zink_gfx_program *prog; + VkPipeline unoptimized_pipeline; }; struct zink_gfx_program { -- 2.7.4