From f3e004cb56af3c770e1ad1ee00a39f958314560f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Thu, 8 Apr 2021 17:40:28 +0200 Subject: [PATCH] aco: Add a simple heuristic to decide early or late primitive export. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Late export is theoretically better if used with LATE_ALLOC, but in practice, the early export has an advantage of lower register usage, therefore more concurrent waves. The idea of this commit is that "small" shaders benefit from early primitive export more, due to being able to launch much more waves. Let's consider a NIR shader "small" when it has only 1 block. This yields both better performance, and better stats, than always using late export. Fossil DB on Sienna: Totals from 12807 (8.76% of 146265) affected shaders: VGPRs: 609128 -> 620216 (+1.82%); split: -0.01%, +1.83% SpillSGPRs: 1458 -> 1538 (+5.49%) CodeSize: 37028204 -> 37019320 (-0.02%); split: -0.17%, +0.14% MaxWaves: 282902 -> 278516 (-1.55%) Instrs: 7163142 -> 7162925 (-0.00%); split: -0.18%, +0.18% VClause: 169285 -> 169547 (+0.15%); split: -1.15%, +1.30% SClause: 267373 -> 267151 (-0.08%); split: -0.24%, +0.16% Copies: 446442 -> 444567 (-0.42%); split: -2.68%, +2.26% Branches: 156245 -> 156195 (-0.03%); split: -0.30%, +0.26% PreSGPRs: 434701 -> 447396 (+2.92%) PreVGPRs: 527783 -> 540527 (+2.41%) Signed-off-by: Timur Kristóf Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_instruction_selection_setup.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index 29b3845..fb3e265 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -392,7 +392,7 @@ setup_vs_variables(isel_context *ctx, nir_shader *nir) assert(!ctx->args->shader_info->so.num_outputs); /* TODO: check if the shader writes edge flags (not in Vulkan) */ - ctx->ngg_nogs_early_prim_export = true; + ctx->ngg_nogs_early_prim_export = exec_list_is_singular(&nir_shader_get_entrypoint(nir)->body); } if (ctx->stage == vertex_ngg && ctx->args->options->key.vs_common_out.export_prim_id) { @@ -464,8 +464,7 @@ setup_tes_variables(isel_context *ctx, nir_shader *nir) if (ctx->stage.hw == HWStage::NGG) assert(!ctx->args->shader_info->so.num_outputs); - /* Tess eval shaders can't write edge flags, so this can be always true. */ - ctx->ngg_nogs_early_prim_export = true; + ctx->ngg_nogs_early_prim_export = exec_list_is_singular(&nir_shader_get_entrypoint(nir)->body); } } -- 2.7.4