LLVM does something similar: https://reviews.llvm.org/
D153295
fossil-db (gfx1100):
Totals from 21 (0.02% of 133461) affected shaders:
Instrs: 147428 -> 147396 (-0.02%)
CodeSize: 797188 -> 797060 (-0.02%)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Fixes: 2930317cea53 ("aco/gfx11: deallocate VGPRs at the end of the shader")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24669>
if (program->max_reg_demand.vgpr <= get_addr_vgpr_from_waves(program, max_waves))
return false;
+ /* sendmsg(dealloc_vgprs) releases scratch, so this isn't safe if there is a in-progress scratch
+ * store. */
+ if (uses_scratch(program))
+ return false;
+
Block& block = program->blocks.back();
/* don't bother checking if there is a pending VMEM store or export: there almost always is */
uint16_t get_addr_sgpr_from_waves(Program* program, uint16_t max_waves);
uint16_t get_addr_vgpr_from_waves(Program* program, uint16_t max_waves);
+bool uses_scratch(Program* program);
+
typedef struct {
const int16_t opcode_gfx7[static_cast<int>(aco_opcode::num_opcodes)];
const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];
}
} /* end namespace */
+bool
+uses_scratch(Program* program)
+{
+ /* RT uses scratch but we don't yet know how much. */
+ return program->config->scratch_bytes_per_wave || program->stage == raytracing_cs;
+}
+
uint16_t
get_extra_sgprs(Program* program)
{
- /* We don't use this register on GFX6-8 and it's removed on GFX10+. RT uses scratch but we don't
- * yet know how much.
- */
- bool needs_flat_scr =
- (program->config->scratch_bytes_per_wave || program->stage == raytracing_cs) &&
- program->gfx_level == GFX9;
+ /* We don't use this register on GFX6-8 and it's removed on GFX10+. */
+ bool needs_flat_scr = uses_scratch(program) && program->gfx_level == GFX9;
if (program->gfx_level >= GFX10) {
assert(!program->dev.xnack_enabled);