From 8f72f137ad168775e6b50b69b1af2ba2754dbcfe Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 12 Jul 2019 19:49:30 -0400 Subject: [PATCH] radeonsi/gfx10: add as_ngg variant for TES as ES to select Wave32/64 Legacy GS has to use Wave64, so TES before GS has to use Wave64 too. Acked-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Samuel Pitoiset --- src/gallium/drivers/radeonsi/si_pipe.h | 7 ++++--- src/gallium/drivers/radeonsi/si_shader.c | 24 ++++++++++++++++-------- src/gallium/drivers/radeonsi/si_shader.h | 3 +++ src/gallium/drivers/radeonsi/si_state_shaders.c | 13 +++++++++---- 4 files changed, 32 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 47cfac4..63e1bda 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1895,13 +1895,14 @@ static inline bool si_compute_prim_discard_enabled(struct si_context *sctx) static inline unsigned si_get_wave_size(struct si_screen *sscreen, enum pipe_shader_type shader_type, - bool ngg) + bool ngg, bool es) { if (shader_type == PIPE_SHADER_COMPUTE) return sscreen->compute_wave_size; else if (shader_type == PIPE_SHADER_FRAGMENT) return sscreen->ps_wave_size; - else if (shader_type == PIPE_SHADER_GEOMETRY && !ngg) /* legacy GS only supports Wave64 */ + else if ((shader_type == PIPE_SHADER_TESS_EVAL && es && !ngg) || + (shader_type == PIPE_SHADER_GEOMETRY && !ngg)) /* legacy GS only supports Wave64 */ return 64; else return sscreen->ge_wave_size; @@ -1910,7 +1911,7 @@ static inline unsigned si_get_wave_size(struct si_screen *sscreen, static inline unsigned si_get_shader_wave_size(struct si_shader *shader) { return si_get_wave_size(shader->selector->screen, shader->selector->type, - shader->key.as_ngg); + shader->key.as_ngg, shader->key.as_es); } #define PRINT_ERR(fmt, args...) \ diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 4bdaa7f..6fd7a56 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5727,7 +5727,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen, shader->is_gs_copy_shader = true; si_init_shader_ctx(&ctx, sscreen, compiler, - si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, false)); + si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, false, false)); ctx.shader = shader; ctx.type = PIPE_SHADER_VERTEX; @@ -6172,7 +6172,8 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx) } } - if (shader->key.as_ngg && ctx->type != PIPE_SHADER_GEOMETRY) { + if (ctx->type != PIPE_SHADER_GEOMETRY && + (shader->key.as_ngg && !shader->key.as_es)) { /* Unconditionally declare scratch space base for streamout and * vertex compaction. Whether space is actually allocated is * determined during linking / PM4 creation. @@ -6219,13 +6220,13 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx) ctx->param_merged_wave_info, 0); } else if (ctx->type == PIPE_SHADER_TESS_CTRL || ctx->type == PIPE_SHADER_GEOMETRY || - shader->key.as_ngg) { + (shader->key.as_ngg && !shader->key.as_es)) { LLVMValueRef num_threads; bool nested_barrier; if (!shader->is_monolithic || (ctx->type == PIPE_SHADER_TESS_EVAL && - shader->key.as_ngg)) + (shader->key.as_ngg && !shader->key.as_es))) ac_init_exec_full_mask(&ctx->ac); if (ctx->type == PIPE_SHADER_TESS_CTRL || @@ -7048,6 +7049,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, struct si_shader shader_es = {}; shader_es.selector = es; shader_es.key.as_es = 1; + shader_es.key.as_ngg = shader->key.as_ngg; shader_es.key.mono = shader->key.mono; shader_es.key.opt = shader->key.opt; shader_es.is_monolithic = true; @@ -7305,7 +7307,8 @@ si_get_shader_part(struct si_screen *sscreen, struct si_shader_context ctx; si_init_shader_ctx(&ctx, sscreen, compiler, - si_get_wave_size(sscreen, type, shader.key.as_ngg)); + si_get_wave_size(sscreen, type, shader.key.as_ngg, + shader.key.as_es)); ctx.shader = &shader; ctx.type = type; @@ -7703,10 +7706,15 @@ static bool si_shader_select_gs_parts(struct si_screen *sscreen, struct pipe_debug_callback *debug) { if (sscreen->info.chip_class >= GFX9) { - struct si_shader *es_main_part = - shader->key.part.gs.es->main_shader_part_es; + struct si_shader *es_main_part; + enum pipe_shader_type es_type = shader->key.part.gs.es->type; - if (shader->key.part.gs.es->type == PIPE_SHADER_VERTEX && + if (es_type == PIPE_SHADER_TESS_EVAL && shader->key.as_ngg) + es_main_part = shader->key.part.gs.es->main_shader_part_ngg_es; + else + es_main_part = shader->key.part.gs.es->main_shader_part_es; + + if (es_type == PIPE_SHADER_VERTEX && !si_get_vs_prolog(sscreen, compiler, shader, debug, es_main_part, &shader->key.part.gs.vs_prolog)) return false; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 1b4f29b..803045d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -337,6 +337,7 @@ struct si_shader_selector { struct si_shader *main_shader_part_ls; /* as_ls is set in the key */ struct si_shader *main_shader_part_es; /* as_es is set in the key */ struct si_shader *main_shader_part_ngg; /* as_ngg is set in the key */ + struct si_shader *main_shader_part_ngg_es; /* for Wave32 TES before legacy GS */ struct si_shader *gs_copy_shader; @@ -789,6 +790,8 @@ si_get_main_shader_part(struct si_shader_selector *sel, { if (key->as_ls) return &sel->main_shader_part_ls; + if (key->as_es && key->as_ngg) + return &sel->main_shader_part_ngg_es; if (key->as_es) return &sel->main_shader_part_es; if (key->as_ngg) diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 48b8b73..c5e9230 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1852,10 +1852,11 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, key->mono.u.ff_tcs_inputs_to_copy = sctx->vs_shader.cso->outputs_written; break; case PIPE_SHADER_TESS_EVAL: + key->as_ngg = stages_key.u.ngg; + if (sctx->gs_shader.cso) key->as_es = 1; else { - key->as_ngg = stages_key.u.ngg; si_shader_selector_key_hw_vs(sctx, sel, key); if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid) @@ -2269,6 +2270,10 @@ current_not_ready: else assert(0); + if (sel->type == PIPE_SHADER_GEOMETRY && + previous_stage_sel->type == PIPE_SHADER_TESS_EVAL) + shader1_key.as_ngg = key->as_ngg; + mtx_lock(&previous_stage_sel->mutex); ok = si_check_missing_main_part(sscreen, previous_stage_sel, @@ -2429,7 +2434,7 @@ static void si_init_shader_selector_async(void *job, int thread_index) if (sel->nir) { /* TODO: GS always sets wave size = default. Legacy GS will have * incorrect subgroup_size and ballot_bit_size. */ - si_lower_nir(sel, si_get_wave_size(sscreen, sel->type, true)); + si_lower_nir(sel, si_get_wave_size(sscreen, sel->type, true, false)); } /* Compile the main shader part for use with a prolog and/or epilog. @@ -2455,9 +2460,9 @@ static void si_init_shader_selector_async(void *job, int thread_index) sel->so.num_outputs != 0, &shader->key); if (sscreen->info.chip_class >= GFX10 && - (((sel->type == PIPE_SHADER_VERTEX || - sel->type == PIPE_SHADER_TESS_EVAL) && + ((sel->type == PIPE_SHADER_VERTEX && !shader->key.as_ls && !shader->key.as_es) || + sel->type == PIPE_SHADER_TESS_EVAL || sel->type == PIPE_SHADER_GEOMETRY)) shader->key.as_ngg = 1; -- 2.7.4