return false;
}
+bool
+dealloc_vgprs(Program* program)
+{
+ if (program->gfx_level < GFX11)
+ return false;
+
+ /* skip if deallocating VGPRs won't increase occupancy */
+ uint16_t max_waves = program->dev.max_wave64_per_simd * (64 / program->wave_size);
+ max_waves = max_suitable_waves(program, max_waves);
+ if (program->max_reg_demand.vgpr <= get_addr_vgpr_from_waves(program, max_waves))
+ return false;
+
+ Block& block = program->blocks.back();
+
+ /* don't bother checking if there is a pending VMEM store or export: there almost always is */
+ Builder bld(program);
+ if (!block.instructions.empty() && block.instructions.back()->opcode == aco_opcode::s_endpgm) {
+ bld.reset(&block.instructions, block.instructions.begin() + (block.instructions.size() - 1));
+ bld.sopp(aco_opcode::s_sendmsg, -1, sendmsg_dealloc_vgprs);
+ }
+
+ return true;
+}
+
} // namespace aco
void schedule_program(Program* program, live& live_vars);
void spill(Program* program, live& live_vars);
void insert_wait_states(Program* program);
+bool dealloc_vgprs(Program* program);
void insert_NOPs(Program* program);
void form_hard_clauses(Program* program);
unsigned emit_program(Program* program, std::vector<uint32_t>& code);
{
Block* discard_block = NULL;
+ bool should_dealloc_vgprs = dealloc_vgprs(program);
+
for (int block_idx = program->blocks.size() - 1; block_idx >= 0; block_idx--) {
Block* block = &program->blocks[block_idx];
lower_context ctx;
block = &program->blocks[block_idx];
bld.reset(discard_block);
+ if (should_dealloc_vgprs)
+ bld.sopp(aco_opcode::s_sendmsg, -1, sendmsg_dealloc_vgprs);
bld.exp(aco_opcode::exp, Operand(v1), Operand(v1), Operand(v1), Operand(v1), 0,
program->gfx_level >= GFX11 ? V_008DFC_SQ_EXP_MRT : V_008DFC_SQ_EXP_NULL,
false, true, true);