From c9b177db0e51c96a297466ba2fc8a9b6a11e124f Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 11 Aug 2023 20:58:32 +0100 Subject: [PATCH] aco: don't create sendmsg(dealloc_vgprs) if scratch is used MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit LLVM does something similar: https://reviews.llvm.org/D153295 fossil-db (gfx1100): Totals from 21 (0.02% of 133461) affected shaders: Instrs: 147428 -> 147396 (-0.02%) CodeSize: 797188 -> 797060 (-0.02%) Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Fixes: 2930317cea53 ("aco/gfx11: deallocate VGPRs at the end of the shader") Part-of: --- src/amd/compiler/aco_ir.cpp | 5 +++++ src/amd/compiler/aco_ir.h | 2 ++ src/amd/compiler/aco_live_var_analysis.cpp | 15 +++++++++------ 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index ecf34ed..9bc20b7 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -1345,6 +1345,11 @@ dealloc_vgprs(Program* program) if (program->max_reg_demand.vgpr <= get_addr_vgpr_from_waves(program, max_waves)) return false; + /* sendmsg(dealloc_vgprs) releases scratch, so this isn't safe if there is a in-progress scratch + * store. */ + if (uses_scratch(program)) + return false; + Block& block = program->blocks.back(); /* don't bother checking if there is a pending VMEM store or export: there almost always is */ diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index b1f0280..716d019 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -2334,6 +2334,8 @@ uint16_t get_vgpr_alloc(Program* program, uint16_t addressable_vgprs); uint16_t get_addr_sgpr_from_waves(Program* program, uint16_t max_waves); uint16_t get_addr_vgpr_from_waves(Program* program, uint16_t max_waves); +bool uses_scratch(Program* program); + typedef struct { const int16_t opcode_gfx7[static_cast(aco_opcode::num_opcodes)]; const int16_t opcode_gfx9[static_cast(aco_opcode::num_opcodes)]; diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp index 989e26b..ee1455f 100644 --- a/src/amd/compiler/aco_live_var_analysis.cpp +++ b/src/amd/compiler/aco_live_var_analysis.cpp @@ -325,15 +325,18 @@ calc_waves_per_workgroup(Program* program) } } /* end namespace */ +bool +uses_scratch(Program* program) +{ + /* RT uses scratch but we don't yet know how much. */ + return program->config->scratch_bytes_per_wave || program->stage == raytracing_cs; +} + uint16_t get_extra_sgprs(Program* program) { - /* We don't use this register on GFX6-8 and it's removed on GFX10+. RT uses scratch but we don't - * yet know how much. - */ - bool needs_flat_scr = - (program->config->scratch_bytes_per_wave || program->stage == raytracing_cs) && - program->gfx_level == GFX9; + /* We don't use this register on GFX6-8 and it's removed on GFX10+. */ + bool needs_flat_scr = uses_scratch(program) && program->gfx_level == GFX9; if (program->gfx_level >= GFX10) { assert(!program->dev.xnack_enabled); -- 2.7.4