From a368385b23e4daf41280be12d2eb269a38ec04d0 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 16 Nov 2021 19:45:20 -0500 Subject: [PATCH] radeonsi: add is_gs parameter into si_vs_needs_prolog and disable the VS prolog code for GS. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_shader.c | 11 +++++++---- src/gallium/drivers/radeonsi/si_shader_internal.h | 2 +- src/gallium/drivers/radeonsi/si_shader_llvm.c | 11 ++++++----- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index c9df3cc..4c53477 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1289,7 +1289,8 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f) bool si_vs_needs_prolog(const struct si_shader_selector *sel, const struct si_vs_prolog_bits *prolog_key, - const union si_shader_key *key, bool ngg_cull_shader) + const union si_shader_key *key, bool ngg_cull_shader, + bool is_gs) { assert(sel->info.stage == MESA_SHADER_VERTEX); @@ -1297,7 +1298,7 @@ bool si_vs_needs_prolog(const struct si_shader_selector *sel, * VS prolog. */ return sel->vs_needs_prolog || prolog_key->ls_vgpr_fix || /* The 2nd VS prolog loads input VGPRs from LDS */ - (key->ge.opt.ngg_culling && !ngg_cull_shader); + (key->ge.opt.ngg_culling && !ngg_cull_shader && !is_gs); } /** @@ -1323,7 +1324,8 @@ void si_get_vs_prolog_key(const struct si_shader_info *info, unsigned num_input_ key->vs_prolog.as_es = shader_out->key.ge.as_es; key->vs_prolog.as_ngg = shader_out->key.ge.as_ngg; - if (!ngg_cull_shader && shader_out->key.ge.opt.ngg_culling) + if (shader_out->selector->info.stage != MESA_SHADER_GEOMETRY && + !ngg_cull_shader && shader_out->key.ge.opt.ngg_culling) key->vs_prolog.load_vgprs_after_culling = 1; if (shader_out->selector->info.stage == MESA_SHADER_TESS_CTRL) { @@ -1652,7 +1654,8 @@ static bool si_get_vs_prolog(struct si_screen *sscreen, struct ac_llvm_compiler { struct si_shader_selector *vs = main_part->selector; - if (!si_vs_needs_prolog(vs, key, &shader->key, false)) + if (!si_vs_needs_prolog(vs, key, &shader->key, false, + shader->selector->info.stage == MESA_SHADER_GEOMETRY)) return true; /* Get the prolog. */ diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 2acdd83..4a7d216 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -171,7 +171,7 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader); unsigned si_get_max_workgroup_size(const struct si_shader *shader); bool si_vs_needs_prolog(const struct si_shader_selector *sel, const struct si_vs_prolog_bits *prolog_key, - const union si_shader_key *key, bool ngg_cull_shader); + const union si_shader_key *key, bool ngg_cull_shader, bool is_gs); void si_get_vs_prolog_key(const struct si_shader_info *info, unsigned num_input_sgprs, bool ngg_cull_shader, const struct si_vs_prolog_bits *prolog_key, struct si_shader *shader_out, union si_shader_part_key *key); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index dd944e7..063b24d 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -955,7 +955,8 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad if ((!shader->is_monolithic || no_wrapper_func) && (ctx->stage == MESA_SHADER_TESS_EVAL || (ctx->stage == MESA_SHADER_VERTEX && - !si_vs_needs_prolog(sel, &shader->key.ge.part.vs.prolog, &shader->key, ngg_cull_shader)))) + !si_vs_needs_prolog(sel, &shader->key.ge.part.vs.prolog, &shader->key, ngg_cull_shader, + false)))) ac_init_exec_full_mask(&ctx->ac); /* NGG VS and NGG TES: Send gs_alloc_req and the prim export at the beginning to decrease @@ -1113,7 +1114,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * LLVMValueRef main_fn = ctx.main_fn; if (ngg_cull_main_fn) { - if (si_vs_needs_prolog(sel, &shader->key.ge.part.vs.prolog, &shader->key, true)) { + if (si_vs_needs_prolog(sel, &shader->key.ge.part.vs.prolog, &shader->key, true, false)) { union si_shader_part_key prolog_key; si_get_vs_prolog_key(&sel->info, shader->info.num_input_sgprs, true, &shader->key.ge.part.vs.prolog, shader, &prolog_key); @@ -1125,7 +1126,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * parts[num_parts++] = ngg_cull_main_fn; } - if (si_vs_needs_prolog(sel, &shader->key.ge.part.vs.prolog, &shader->key, false)) { + if (si_vs_needs_prolog(sel, &shader->key.ge.part.vs.prolog, &shader->key, false, false)) { union si_shader_part_key prolog_key; si_get_vs_prolog_key(&sel->info, shader->info.num_input_sgprs, false, &shader->key.ge.part.vs.prolog, shader, &prolog_key); @@ -1162,7 +1163,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * struct si_shader_selector *ls = shader->key.ge.part.tcs.ls; LLVMValueRef parts[4]; bool vs_needs_prolog = - si_vs_needs_prolog(ls, &shader->key.ge.part.tcs.ls_prolog, &shader->key, false); + si_vs_needs_prolog(ls, &shader->key.ge.part.tcs.ls_prolog, &shader->key, false, false); /* TCS main part */ parts[2] = ctx.main_fn; @@ -1254,7 +1255,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * /* ES prolog */ if (es->info.stage == MESA_SHADER_VERTEX && - si_vs_needs_prolog(es, &shader->key.ge.part.gs.vs_prolog, &shader->key, false)) { + si_vs_needs_prolog(es, &shader->key.ge.part.gs.vs_prolog, &shader->key, false, true)) { union si_shader_part_key vs_prolog_key; si_get_vs_prolog_key(&es->info, shader_es.info.num_input_sgprs, false, &shader->key.ge.part.gs.vs_prolog, shader, &vs_prolog_key); -- 2.7.4