From 4d1e43badb94afaba548c61144a6468845b4dd33 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 25 Oct 2019 20:25:59 -0400 Subject: [PATCH] radeonsi: initialize shader compilers in threads on demand It takes a noticable amount of time with piglit. Reviewed-by: Timothy Arceri --- src/gallium/drivers/radeonsi/si_compute.c | 3 +++ src/gallium/drivers/radeonsi/si_pipe.c | 10 ++-------- src/gallium/drivers/radeonsi/si_pipe.h | 3 +++ src/gallium/drivers/radeonsi/si_state_shaders.c | 6 ++++++ 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 19d4cca..6d9b5d0 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -121,6 +121,9 @@ static void si_create_compute_state_async(void *job, int thread_index) assert(thread_index < ARRAY_SIZE(sscreen->compiler)); compiler = &sscreen->compiler[thread_index]; + if (!compiler->passes) + si_init_compiler(sscreen, compiler); + if (program->ir_type == PIPE_SHADER_IR_TGSI) { tgsi_scan_shader(sel->tokens, &sel->info); } else { diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 1b7fdf2..be7424e 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -127,8 +127,7 @@ static const struct debug_named_value debug_options[] = { DEBUG_NAMED_VALUE_END /* must be last */ }; -static void si_init_compiler(struct si_screen *sscreen, - struct ac_llvm_compiler *compiler) +void si_init_compiler(struct si_screen *sscreen, struct ac_llvm_compiler *compiler) { /* Only create the less-optimizing version of the compiler on APUs * predating Ryzen (Raven). */ @@ -938,7 +937,7 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws, const struct pipe_screen_config *config) { struct si_screen *sscreen = CALLOC_STRUCT(si_screen); - unsigned hw_threads, num_comp_hi_threads, num_comp_lo_threads, i; + unsigned hw_threads, num_comp_hi_threads, num_comp_lo_threads; if (!sscreen) { return NULL; @@ -1224,11 +1223,6 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws, } } - for (i = 0; i < num_comp_hi_threads; i++) - si_init_compiler(sscreen, &sscreen->compiler[i]); - for (i = 0; i < num_comp_lo_threads; i++) - si_init_compiler(sscreen, &sscreen->compiler_lowp[i]); - sscreen->ge_wave_size = 64; sscreen->ps_wave_size = 64; sscreen->compute_wave_size = 64; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 1a65c30..6218383 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1426,6 +1426,9 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx, unsigned input_indexbuf_max_elements); void si_initialize_prim_discard_tunables(struct si_context *sctx); +/* si_pipe.c */ +void si_init_compiler(struct si_screen *sscreen, struct ac_llvm_compiler *compiler); + /* si_perfcounters.c */ void si_init_perfcounters(struct si_screen *screen); void si_destroy_perfcounters(struct si_screen *screen); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index e5e7d52..a630795 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -2105,6 +2105,9 @@ static void si_build_shader_variant(struct si_shader *shader, compiler = shader->compiler_ctx_state.compiler; } + if (!compiler->passes) + si_init_compiler(sscreen, compiler); + if (unlikely(!si_shader_create(sscreen, compiler, shader, debug))) { PRINT_ERR("Failed to build shader variant (type=%u)\n", sel->type); @@ -2472,6 +2475,9 @@ static void si_init_shader_selector_async(void *job, int thread_index) assert(thread_index < ARRAY_SIZE(sscreen->compiler)); compiler = &sscreen->compiler[thread_index]; + if (!compiler->passes) + si_init_compiler(sscreen, compiler); + /* Compile the main shader part for use with a prolog and/or epilog. * If this fails, the driver will try to compile a monolithic shader * on demand. -- 2.7.4