From: Rhys Perry Date: Mon, 26 Jul 2021 16:55:48 +0000 (+0100) Subject: aco: calculate correct register demand for branch instructions X-Git-Tag: upstream/22.3.5~19527 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=5a536eca9ca763f53bf3e4c8b75752b527f8fc01;p=platform%2Fupstream%2Fmesa.git aco: calculate correct register demand for branch instructions Since copies for the successor's linear phis are inserted before the branch, we should consider the definitions and operands of the successor's linear phis. Fixes a Detroit: Become Human spilling failure with GCM+GVN. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp index 24002e1..0e94118 100644 --- a/src/amd/compiler/aco_live_var_analysis.cpp +++ b/src/amd/compiler/aco_live_var_analysis.cpp @@ -82,9 +82,15 @@ get_demand_before(RegisterDemand demand, aco_ptr& instr, } namespace { +struct PhiInfo { + uint16_t logical_phi_sgpr_ops = 0; + uint16_t linear_phi_ops = 0; + uint16_t linear_phi_defs = 0; +}; + void -process_live_temps_per_block(Program* program, live& lives, Block* block, - unsigned& worklist, std::vector& phi_sgpr_ops) +process_live_temps_per_block(Program* program, live& lives, Block* block, unsigned& worklist, + std::vector& phi_info) { std::vector& register_demand = lives.register_demand[block->index]; RegisterDemand new_demand; @@ -96,7 +102,7 @@ process_live_temps_per_block(Program* program, live& lives, Block* block, /* initialize register demand */ for (unsigned t : live) new_demand += Temp(t, program->temp_rc[t]); - new_demand.sgpr -= phi_sgpr_ops[block->index]; + new_demand.sgpr -= phi_info[block->index].logical_phi_sgpr_ops; /* traverse the instructions backwards */ int idx; @@ -129,7 +135,7 @@ process_live_temps_per_block(Program* program, live& lives, Block* block, /* GEN */ if (insn->opcode == aco_opcode::p_logical_end) { - new_demand.sgpr += phi_sgpr_ops[block->index]; + new_demand.sgpr += phi_info[block->index].logical_phi_sgpr_ops; } else { /* we need to do this in a separate loop because the next one can * setKill() for several operands at once and we don't want to @@ -170,6 +176,7 @@ process_live_temps_per_block(Program* program, live& lives, Block* block, block->register_demand = block_register_demand; /* handle phi definitions */ + uint16_t linear_phi_defs = 0; int phi_idx = idx; while (phi_idx >= 0) { register_demand[phi_idx] = new_demand; @@ -192,9 +199,17 @@ process_live_temps_per_block(Program* program, live& lives, Block* block, else definition.setKill(true); + if (insn->opcode == aco_opcode::p_linear_phi) { + assert(definition.getTemp().type() == RegType::sgpr); + linear_phi_defs += definition.size(); + } + phi_idx--; } + for (unsigned pred_idx : block->linear_preds) + phi_info[pred_idx].linear_phi_defs = linear_phi_defs; + /* now, we need to merge the live-ins into the live-out sets */ for (unsigned t : live) { RegClass rc = program->temp_rc[t]; @@ -231,8 +246,12 @@ process_live_temps_per_block(Program* program, live& lives, Block* block, const bool inserted = lives.live_out[preds[i]].insert(operand.tempId()).second; if (inserted) { worklist = std::max(worklist, preds[i] + 1); - if (insn->opcode == aco_opcode::p_phi && operand.getTemp().type() == RegType::sgpr) - phi_sgpr_ops[preds[i]] += operand.size(); + if (insn->opcode == aco_opcode::p_phi && operand.getTemp().type() == RegType::sgpr) { + phi_info[preds[i]].logical_phi_sgpr_ops += operand.size(); + } else if (insn->opcode == aco_opcode::p_linear_phi) { + assert(operand.getTemp().type() == RegType::sgpr); + phi_info[preds[i]].linear_phi_ops += operand.size(); + } } /* set if the operand is killed by this (or another) phi instruction */ @@ -386,7 +405,7 @@ live_var_analysis(Program* program) result.live_out.resize(program->blocks.size()); result.register_demand.resize(program->blocks.size()); unsigned worklist = program->blocks.size(); - std::vector phi_sgpr_ops(program->blocks.size()); + std::vector phi_info(program->blocks.size()); RegisterDemand new_demand; program->needs_vcc = false; @@ -396,10 +415,16 @@ live_var_analysis(Program* program) while (worklist) { unsigned block_idx = --worklist; process_live_temps_per_block(program, result, &program->blocks[block_idx], worklist, - phi_sgpr_ops); + phi_info); new_demand.update(program->blocks[block_idx].register_demand); } + /* Handle branches: we will insert copies created for linear phis just before the branch. */ + for (Block& block : program->blocks) { + result.register_demand[block.index].back().sgpr += phi_info[block.index].linear_phi_defs; + result.register_demand[block.index].back().sgpr -= phi_info[block.index].linear_phi_ops; + } + /* calculate the program's register demand and number of waves */ if (program->progress < CompilationProgress::after_ra) update_vgpr_sgpr_demand(program, new_demand);