From: Rhys Perry Date: Tue, 10 Jan 2023 17:33:52 +0000 (+0000) Subject: aco/spill: always end spill vgpr after control flow X-Git-Tag: upstream/22.3.5~46 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b8ae58820e3150615ef6a16d89b1ff76626a1beb;p=platform%2Fupstream%2Fmesa.git aco/spill: always end spill vgpr after control flow To fix a hypothetical issue: v0 = start_linear_vgpr if (...) { } else { use_linear_vgpr(v0) } v0 = phi We need a p_end_linear_vgpr to ensure that the phi does not use the same VGPR as the linear VGPR. This is also much simpler. fossil-db (gfx1100): Totals from 1195 (0.89% of 134574) affected shaders: Instrs: 4123856 -> 4123826 (-0.00%); split: -0.00%, +0.00% CodeSize: 21461256 -> 21461100 (-0.00%); split: -0.00%, +0.00% Latency: 62816001 -> 62812999 (-0.00%); split: -0.00%, +0.00% InvThroughput: 9339049 -> 9338564 (-0.01%); split: -0.01%, +0.00% Copies: 304028 -> 304005 (-0.01%); split: -0.02%, +0.01% PreVGPRs: 115761 -> 115762 (+0.00%) Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Cc: mesa-stable Part-of: (cherry picked from commit bbc5247bf71cebfdb2ee79646bd2231a909a74eb) --- diff --git a/.pick_status.json b/.pick_status.json index ae537c8..830b6d7 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -3730,7 +3730,7 @@ "description": "aco/spill: always end spill vgpr after control flow", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null }, diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp index 38a5cf8..12cb333 100644 --- a/src/amd/compiler/aco_spill.cpp +++ b/src/amd/compiler/aco_spill.cpp @@ -1662,6 +1662,38 @@ assign_spill_slots_helper(spill_ctx& ctx, RegType type, std::vector& is_as } void +end_unused_spill_vgprs(spill_ctx& ctx, Block& block, std::vector& vgpr_spill_temps, + const std::vector& slots, + const std::unordered_map& spills) +{ + std::vector is_used(vgpr_spill_temps.size()); + for (std::pair pair : spills) { + if (pair.first.type() == RegType::sgpr && ctx.is_reloaded[pair.second]) + is_used[slots[pair.second] / ctx.wave_size] = true; + } + + std::vector temps; + for (unsigned i = 0; i < vgpr_spill_temps.size(); i++) { + if (vgpr_spill_temps[i].id() && !is_used[i]) { + temps.push_back(vgpr_spill_temps[i]); + vgpr_spill_temps[i] = Temp(); + } + } + if (temps.empty()) + return; + + aco_ptr destr{create_instruction( + aco_opcode::p_end_linear_vgpr, Format::PSEUDO, temps.size(), 0)}; + for (unsigned i = 0; i < temps.size(); i++) + destr->operands[i] = Operand(temps[i]); + + std::vector>::iterator it = block.instructions.begin(); + while (is_phi(*it)) + ++it; + block.instructions.insert(it, std::move(destr)); +} + +void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) { std::vector slots(ctx.interferences.size()); @@ -1709,54 +1741,12 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) /* replace pseudo instructions with actual hardware instructions */ unsigned last_top_level_block_idx = 0; - std::vector reload_in_loop(vgpr_spill_temps.size()); for (Block& block : ctx.program->blocks) { - /* after loops, we insert a user if there was a reload inside the loop */ - if (block.loop_nest_depth == 0) { - int end_vgprs = 0; - for (unsigned i = 0; i < vgpr_spill_temps.size(); i++) { - if (reload_in_loop[i]) - end_vgprs++; - } - - if (end_vgprs > 0) { - aco_ptr destr{create_instruction( - aco_opcode::p_end_linear_vgpr, Format::PSEUDO, end_vgprs, 0)}; - int k = 0; - for (unsigned i = 0; i < vgpr_spill_temps.size(); i++) { - if (reload_in_loop[i]) - destr->operands[k++] = Operand(vgpr_spill_temps[i]); - reload_in_loop[i] = false; - } - /* find insertion point */ - std::vector>::iterator it = block.instructions.begin(); - while ((*it)->opcode == aco_opcode::p_linear_phi || (*it)->opcode == aco_opcode::p_phi) - ++it; - block.instructions.insert(it, std::move(destr)); - } - } - if (block.kind & block_kind_top_level && !block.linear_preds.empty()) { last_top_level_block_idx = block.index; - /* check if any spilled variables use a created linear vgpr, otherwise destroy them */ - for (unsigned i = 0; i < vgpr_spill_temps.size(); i++) { - if (vgpr_spill_temps[i] == Temp()) - continue; - - bool can_destroy = true; - for (std::pair pair : ctx.spills_entry[block.index]) { - - if (ctx.interferences[pair.second].first.type() == RegType::sgpr && - slots[pair.second] / ctx.wave_size == i) { - can_destroy = false; - break; - } - } - if (can_destroy) - vgpr_spill_temps[i] = Temp(); - } + end_unused_spill_vgprs(ctx, block, vgpr_spill_temps, slots, ctx.spills_entry[block.index]); } std::vector>::iterator it; @@ -1818,7 +1808,6 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) reload_vgpr(ctx, block, instructions, *it, slots); } else { uint32_t spill_slot = slots[spill_id]; - reload_in_loop[spill_slot / ctx.wave_size] = block.loop_nest_depth > 0; /* check if the linear vgpr already exists */ if (vgpr_spill_temps[spill_slot / ctx.wave_size] == Temp()) { @@ -1858,58 +1847,6 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) /* update required scratch memory */ ctx.program->config->scratch_bytes_per_wave += align(ctx.vgpr_spill_slots * 4 * ctx.program->wave_size, 1024); - - /* SSA elimination inserts copies for logical phis right before p_logical_end - * So if a linear vgpr is used between that p_logical_end and the branch, - * we need to ensure logical phis don't choose a definition which aliases - * the linear vgpr. - * TODO: Moving the spills and reloads to before p_logical_end might produce - * slightly better code. */ - for (Block& block : ctx.program->blocks) { - /* loops exits are already handled */ - if (block.logical_preds.size() <= 1) - continue; - - bool has_logical_phis = false; - for (aco_ptr& instr : block.instructions) { - if (instr->opcode == aco_opcode::p_phi) { - has_logical_phis = true; - break; - } else if (instr->opcode != aco_opcode::p_linear_phi) { - break; - } - } - if (!has_logical_phis) - continue; - - std::set vgprs; - for (unsigned pred_idx : block.logical_preds) { - Block& pred = ctx.program->blocks[pred_idx]; - for (int i = pred.instructions.size() - 1; i >= 0; i--) { - aco_ptr& pred_instr = pred.instructions[i]; - if (pred_instr->opcode == aco_opcode::p_logical_end) { - break; - } else if (pred_instr->opcode == aco_opcode::p_spill || - pred_instr->opcode == aco_opcode::p_reload) { - vgprs.insert(pred_instr->operands[0].getTemp()); - } - } - } - if (!vgprs.size()) - continue; - - aco_ptr destr{create_instruction( - aco_opcode::p_end_linear_vgpr, Format::PSEUDO, vgprs.size(), 0)}; - int k = 0; - for (Temp tmp : vgprs) { - destr->operands[k++] = Operand(tmp); - } - /* find insertion point */ - std::vector>::iterator it = block.instructions.begin(); - while ((*it)->opcode == aco_opcode::p_linear_phi || (*it)->opcode == aco_opcode::p_phi) - ++it; - block.instructions.insert(it, std::move(destr)); - } } } /* end namespace */