From a8dd07518c59af0087ed311cee232c31c3e8268c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Thu, 8 Sep 2022 12:55:31 +0200 Subject: [PATCH] aco/optimizer_postRA: Fix logical control flow handling. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Change reset_block() so it only considers the logical predecessors for VGPRs. Relevant for some optimizations across loops. This commit fixes an assertion failure which was triggered by Zink in a piglit test. Fossil DB stats unaffected on Navi 21. Fixes: 2e56e2342094e8ec90afa5265b1c43503f662939 Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_optimizer_postRA.cpp | 46 ++++++++++++++++++++---- src/amd/compiler/tests/test_optimizer_postRA.cpp | 4 ++- 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/src/amd/compiler/aco_optimizer_postRA.cpp b/src/amd/compiler/aco_optimizer_postRA.cpp index d91d23e..88934274 100644 --- a/src/amd/compiler/aco_optimizer_postRA.cpp +++ b/src/amd/compiler/aco_optimizer_postRA.cpp @@ -34,6 +34,9 @@ namespace aco { namespace { constexpr const size_t max_reg_cnt = 512; +constexpr const size_t max_sgpr_cnt = 128; +constexpr const size_t min_vgpr = 256; +constexpr const size_t max_vgpr_cnt = 256; struct Idx { bool operator==(const Idx& other) const { return block == other.block && instr == other.instr; } @@ -66,18 +69,47 @@ struct pr_opt_ctx { std::fill(instr_idx_by_regs[block->index].begin(), instr_idx_by_regs[block->index].end(), not_written_in_block); } else { - unsigned first_pred = block->linear_preds[0]; - for (unsigned i = 0; i < max_reg_cnt; i++) { - bool all_same = std::all_of( - std::next(block->linear_preds.begin()), block->linear_preds.end(), - [&](unsigned pred) - { return instr_idx_by_regs[pred][i] == instr_idx_by_regs[first_pred][i]; }); + const uint32_t first_linear_pred = block->linear_preds[0]; + const std::vector& linear_preds = block->linear_preds; + + for (unsigned i = 0; i < max_sgpr_cnt; i++) { + const bool all_same = std::all_of( + std::next(linear_preds.begin()), linear_preds.end(), + [=](unsigned pred) + { return instr_idx_by_regs[pred][i] == instr_idx_by_regs[first_linear_pred][i]; }); if (all_same) - instr_idx_by_regs[block->index][i] = instr_idx_by_regs[first_pred][i]; + instr_idx_by_regs[block->index][i] = instr_idx_by_regs[first_linear_pred][i]; else instr_idx_by_regs[block->index][i] = written_by_multiple_instrs; } + + if (!block->logical_preds.empty()) { + /* We assume that VGPRs are only read by blocks which have a logical predecessor, + * ie. any block that reads any VGPR has at least 1 logical predecessor. + */ + const unsigned first_logical_pred = block->logical_preds[0]; + const std::vector& logical_preds = block->logical_preds; + + for (unsigned i = min_vgpr; i < (min_vgpr + max_vgpr_cnt); i++) { + const bool all_same = std::all_of( + std::next(logical_preds.begin()), logical_preds.end(), + [=](unsigned pred) { + return instr_idx_by_regs[pred][i] == instr_idx_by_regs[first_logical_pred][i]; + }); + + if (all_same) + instr_idx_by_regs[block->index][i] = instr_idx_by_regs[first_logical_pred][i]; + else + instr_idx_by_regs[block->index][i] = written_by_multiple_instrs; + } + } else { + /* If a block has no logical predecessors, it is not part of the + * logical CFG and therefore it also won't have any logical successors. + * Such a block does not write any VGPRs ever. + */ + assert(block->logical_succs.empty()); + } } } diff --git a/src/amd/compiler/tests/test_optimizer_postRA.cpp b/src/amd/compiler/tests/test_optimizer_postRA.cpp index c018937..d3d3fb0 100644 --- a/src/amd/compiler/tests/test_optimizer_postRA.cpp +++ b/src/amd/compiler/tests/test_optimizer_postRA.cpp @@ -425,14 +425,16 @@ BEGIN_TEST(optimizer_postRA.dpp) /* control flow */ //! BB1 - //! /* logical preds: / linear preds: BB0, / kind: uniform, */ + //! /* logical preds: BB0, / linear preds: BB0, / kind: uniform, */ //! v1: %res10:v[2] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 //! p_unit_test 10, %res10:v[2] Temp tmp10 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); bld.reset(program->create_and_insert_block()); program->blocks[0].linear_succs.push_back(1); + program->blocks[0].logical_succs.push_back(1); program->blocks[1].linear_preds.push_back(0); + program->blocks[1].logical_preds.push_back(0); Temp res10 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp10, reg_v2), b); writeout(10, Operand(res10, reg_v2)); -- 2.7.4