From f76a6cb337f979fb26d3233d7e0ff208ec8885bb Mon Sep 17 00:00:00 2001 From: =?utf8?q?Nicolai=20H=C3=A4hnle?= Date: Sun, 22 Oct 2017 17:38:43 +0200 Subject: [PATCH] radeonsi: always use async compiles when creating shader/compute states MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit With Gallium threaded contexts, creating shader/compute states is effectively a screen operation, so we should not use context state. In particular, this allows us to avoid using the context's LLVM TargetMachine. This isn't an issue yet because u_threaded_context filters out non-async debug callbacks, and we disable threaded contexts for debug contexts. However, we may want to change that in the future. Reviewed-by: Marek Olšák --- src/gallium/drivers/radeonsi/si_compute.c | 42 +++++++++++++++---------- src/gallium/drivers/radeonsi/si_state_shaders.c | 42 +++++++++++++++---------- 2 files changed, 50 insertions(+), 34 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index e55988a..3eee907 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -23,6 +23,7 @@ */ #include "tgsi/tgsi_parse.h" +#include "util/u_async_debug.h" #include "util/u_memory.h" #include "util/u_upload_mgr.h" @@ -84,14 +85,10 @@ static void si_create_compute_state_async(void *job, int thread_index) LLVMTargetMachineRef tm; struct pipe_debug_callback *debug = &program->compiler_ctx_state.debug; - if (thread_index >= 0) { - assert(thread_index < ARRAY_SIZE(program->screen->tm)); - tm = program->screen->tm[thread_index]; - if (!debug->async) - debug = NULL; - } else { - tm = program->compiler_ctx_state.tm; - } + assert(!debug->debug_message || debug->async); + assert(thread_index >= 0); + assert(thread_index < ARRAY_SIZE(program->screen->tm)); + tm = program->screen->tm[thread_index]; memset(&sel, 0, sizeof(sel)); @@ -167,20 +164,31 @@ static void *si_create_compute_state( return NULL; } - program->compiler_ctx_state.tm = sctx->tm; program->compiler_ctx_state.debug = sctx->debug; program->compiler_ctx_state.is_debug_context = sctx->is_debug; p_atomic_inc(&sscreen->b.num_shaders_created); util_queue_fence_init(&program->ready); - if ((sctx->debug.debug_message && !sctx->debug.async) || - sctx->is_debug || - si_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE)) - si_create_compute_state_async(program, -1); - else - util_queue_add_job(&sscreen->shader_compiler_queue, - program, &program->ready, - si_create_compute_state_async, NULL); + struct util_async_debug_callback async_debug; + bool wait = + (sctx->debug.debug_message && !sctx->debug.async) || + sctx->is_debug || + si_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE); + + if (wait) { + u_async_debug_init(&async_debug); + program->compiler_ctx_state.debug = async_debug.base; + } + + util_queue_add_job(&sscreen->shader_compiler_queue, + program, &program->ready, + si_create_compute_state_async, NULL); + + if (wait) { + util_queue_fence_wait(&program->ready); + u_async_debug_drain(&async_debug, &sctx->debug); + u_async_debug_cleanup(&async_debug); + } } else { const struct pipe_llvm_program_header *header; const char *code; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 757e889..3edc340 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -30,6 +30,7 @@ #include "tgsi/tgsi_ureg.h" #include "util/hash_table.h" #include "util/crc32.h" +#include "util/u_async_debug.h" #include "util/u_memory.h" #include "util/u_prim.h" @@ -1840,14 +1841,10 @@ static void si_init_shader_selector_async(void *job, int thread_index) struct pipe_debug_callback *debug = &sel->compiler_ctx_state.debug; unsigned i; - if (thread_index >= 0) { - assert(thread_index < ARRAY_SIZE(sscreen->tm)); - tm = sscreen->tm[thread_index]; - if (!debug->async) - debug = NULL; - } else { - tm = sel->compiler_ctx_state.tm; - } + assert(!debug->debug_message || debug->async); + assert(thread_index >= 0); + assert(thread_index < ARRAY_SIZE(sscreen->tm)); + tm = sscreen->tm[thread_index]; /* Compile the main shader part for use with a prolog and/or epilog. * If this fails, the driver will try to compile a monolithic shader @@ -2042,7 +2039,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx, pipe_reference_init(&sel->reference, 1); sel->screen = sscreen; - sel->compiler_ctx_state.tm = sctx->tm; sel->compiler_ctx_state.debug = sctx->debug; sel->compiler_ctx_state.is_debug_context = sctx->is_debug; @@ -2272,14 +2268,26 @@ static void *si_create_shader_selector(struct pipe_context *ctx, (void) mtx_init(&sel->mutex, mtx_plain); util_queue_fence_init(&sel->ready); - if ((sctx->debug.debug_message && !sctx->debug.async) || - sctx->is_debug || - si_can_dump_shader(&sscreen->b, sel->info.processor)) - si_init_shader_selector_async(sel, -1); - else - util_queue_add_job(&sscreen->shader_compiler_queue, sel, - &sel->ready, si_init_shader_selector_async, - NULL); + struct util_async_debug_callback async_debug; + bool wait = + (sctx->debug.debug_message && !sctx->debug.async) || + sctx->is_debug || + si_can_dump_shader(&sscreen->b, sel->info.processor); + + if (wait) { + u_async_debug_init(&async_debug); + sel->compiler_ctx_state.debug = async_debug.base; + } + + util_queue_add_job(&sscreen->shader_compiler_queue, sel, + &sel->ready, si_init_shader_selector_async, + NULL); + + if (wait) { + util_queue_fence_wait(&sel->ready); + u_async_debug_drain(&async_debug, &sctx->debug); + u_async_debug_cleanup(&async_debug); + } return sel; } -- 2.7.4