From 5a536eca9ca763f53bf3e4c8b75752b527f8fc01 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Mon, 26 Jul 2021 17:55:48 +0100 Subject: [PATCH] aco: calculate correct register demand for branch instructions MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Since copies for the successor's linear phis are inserted before the branch, we should consider the definitions and operands of the successor's linear phis. Fixes a Detroit: Become Human spilling failure with GCM+GVN. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_live_var_analysis.cpp | 41 ++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp index 24002e1..0e94118 100644 --- a/src/amd/compiler/aco_live_var_analysis.cpp +++ b/src/amd/compiler/aco_live_var_analysis.cpp @@ -82,9 +82,15 @@ get_demand_before(RegisterDemand demand, aco_ptr& instr, } namespace { +struct PhiInfo { + uint16_t logical_phi_sgpr_ops = 0; + uint16_t linear_phi_ops = 0; + uint16_t linear_phi_defs = 0; +}; + void -process_live_temps_per_block(Program* program, live& lives, Block* block, - unsigned& worklist, std::vector& phi_sgpr_ops) +process_live_temps_per_block(Program* program, live& lives, Block* block, unsigned& worklist, + std::vector& phi_info) { std::vector& register_demand = lives.register_demand[block->index]; RegisterDemand new_demand; @@ -96,7 +102,7 @@ process_live_temps_per_block(Program* program, live& lives, Block* block, /* initialize register demand */ for (unsigned t : live) new_demand += Temp(t, program->temp_rc[t]); - new_demand.sgpr -= phi_sgpr_ops[block->index]; + new_demand.sgpr -= phi_info[block->index].logical_phi_sgpr_ops; /* traverse the instructions backwards */ int idx; @@ -129,7 +135,7 @@ process_live_temps_per_block(Program* program, live& lives, Block* block, /* GEN */ if (insn->opcode == aco_opcode::p_logical_end) { - new_demand.sgpr += phi_sgpr_ops[block->index]; + new_demand.sgpr += phi_info[block->index].logical_phi_sgpr_ops; } else { /* we need to do this in a separate loop because the next one can * setKill() for several operands at once and we don't want to @@ -170,6 +176,7 @@ process_live_temps_per_block(Program* program, live& lives, Block* block, block->register_demand = block_register_demand; /* handle phi definitions */ + uint16_t linear_phi_defs = 0; int phi_idx = idx; while (phi_idx >= 0) { register_demand[phi_idx] = new_demand; @@ -192,9 +199,17 @@ process_live_temps_per_block(Program* program, live& lives, Block* block, else definition.setKill(true); + if (insn->opcode == aco_opcode::p_linear_phi) { + assert(definition.getTemp().type() == RegType::sgpr); + linear_phi_defs += definition.size(); + } + phi_idx--; } + for (unsigned pred_idx : block->linear_preds) + phi_info[pred_idx].linear_phi_defs = linear_phi_defs; + /* now, we need to merge the live-ins into the live-out sets */ for (unsigned t : live) { RegClass rc = program->temp_rc[t]; @@ -231,8 +246,12 @@ process_live_temps_per_block(Program* program, live& lives, Block* block, const bool inserted = lives.live_out[preds[i]].insert(operand.tempId()).second; if (inserted) { worklist = std::max(worklist, preds[i] + 1); - if (insn->opcode == aco_opcode::p_phi && operand.getTemp().type() == RegType::sgpr) - phi_sgpr_ops[preds[i]] += operand.size(); + if (insn->opcode == aco_opcode::p_phi && operand.getTemp().type() == RegType::sgpr) { + phi_info[preds[i]].logical_phi_sgpr_ops += operand.size(); + } else if (insn->opcode == aco_opcode::p_linear_phi) { + assert(operand.getTemp().type() == RegType::sgpr); + phi_info[preds[i]].linear_phi_ops += operand.size(); + } } /* set if the operand is killed by this (or another) phi instruction */ @@ -386,7 +405,7 @@ live_var_analysis(Program* program) result.live_out.resize(program->blocks.size()); result.register_demand.resize(program->blocks.size()); unsigned worklist = program->blocks.size(); - std::vector phi_sgpr_ops(program->blocks.size()); + std::vector phi_info(program->blocks.size()); RegisterDemand new_demand; program->needs_vcc = false; @@ -396,10 +415,16 @@ live_var_analysis(Program* program) while (worklist) { unsigned block_idx = --worklist; process_live_temps_per_block(program, result, &program->blocks[block_idx], worklist, - phi_sgpr_ops); + phi_info); new_demand.update(program->blocks[block_idx].register_demand); } + /* Handle branches: we will insert copies created for linear phis just before the branch. */ + for (Block& block : program->blocks) { + result.register_demand[block.index].back().sgpr += phi_info[block.index].linear_phi_defs; + result.register_demand[block.index].back().sgpr -= phi_info[block.index].linear_phi_ops; + } + /* calculate the program's register demand and number of waves */ if (program->progress < CompilationProgress::after_ra) update_vgpr_sgpr_demand(program, new_demand); -- 2.7.4