From 0f847b18bc91dced5725169e8c96bef6c077db90 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Thu, 26 Mar 2020 19:36:05 +0100 Subject: [PATCH] aco: Don't store LS VS outputs to LDS when TCS doesn't need them. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Totals: Code Size: 254764624 -> 254745104 (-0.01 %) bytes Totals from affected shaders: VGPRS: 12132 -> 12112 (-0.16 %) Code Size: 573364 -> 553844 (-3.40 %) bytes Signed-off-by: Timur Kristóf Reviewed-by: Rhys Perry Tested-by: Marge Bot Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 9 +++++++-- src/amd/compiler/aco_instruction_selection_setup.cpp | 7 +++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 716853d..fa3d38e 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -3366,8 +3366,13 @@ void visit_store_ls_or_es_output(isel_context *ctx, nir_intrinsic_instr *instr) unsigned write_mask = nir_intrinsic_write_mask(instr); unsigned elem_size_bytes = instr->src[0].ssa->bit_size / 8u; - if (ctx->tcs_in_out_eq) - store_output_to_temps(ctx, instr); + if (ctx->tcs_in_out_eq && store_output_to_temps(ctx, instr)) { + /* When the TCS only reads this output directly and for the same vertices as its invocation id, it is unnecessary to store the VS output to LDS. */ + bool indirect_write; + bool temp_only_input = tcs_driver_location_matches_api_mask(ctx, instr, true, ctx->tcs_temp_only_inputs, &indirect_write); + if (temp_only_input && !indirect_write) + return; + } if (ctx->stage == vertex_es || ctx->stage == tess_eval_es) { /* GFX6-8: ES stage is not merged into GS, data is passed from ES to GS in VMEM. */ diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index bd90dca..644bc15 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -103,6 +103,7 @@ struct isel_context { /* tessellation information */ unsigned tcs_tess_lvl_out_loc; unsigned tcs_tess_lvl_in_loc; + uint64_t tcs_temp_only_inputs; uint32_t tcs_num_inputs; uint32_t tcs_num_patches; bool tcs_in_out_eq = false; @@ -908,6 +909,12 @@ setup_tcs_info(isel_context *ctx, nir_shader *nir) ctx->tcs_num_inputs = ctx->args->options->key.tcs.num_inputs; } else if (ctx->stage == vertex_tess_control_hs) { ctx->tcs_num_inputs = util_last_bit64(ctx->args->shader_info->vs.ls_outputs_written); + + if (ctx->tcs_in_out_eq) { + ctx->tcs_temp_only_inputs = ~nir->info.tess.tcs_cross_invocation_inputs_read & + ~nir->info.inputs_read_indirectly & + nir->info.inputs_read; + } } else { unreachable("Unsupported TCS shader stage"); } -- 2.7.4