From 6aafa2bb4903db20269b3062dfa73b3fe781facd Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Fri, 21 Jul 2023 17:04:46 +0200 Subject: [PATCH] r600/sfn: Splizt ALU blocks in scheduler to fit into 128 slots With that the backend doesn't split these ALU CFs any more. Signed-off-by: Gert Wollny Part-of: --- src/gallium/drivers/r600/sfn/sfn_scheduler.cpp | 92 ++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp index feb2ccc..3767bfd 100644 --- a/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp @@ -187,6 +187,8 @@ private: bool schedule_exports(Shader::ShaderBlocks& out_blocks, std::list& ready_list); + int maybe_split_alu_block(Shader::ShaderBlocks& out_blocks); + template bool schedule(std::list& ready_list); template bool schedule_block(std::list& ready_list); @@ -513,11 +515,17 @@ BlockScheduler::schedule_block(Block& in_block, if (cir.m_cf_instr) { // Assert that if condition is ready + if (m_current_block->type() != Block::alu) { + start_new_block(out_blocks, Block::alu); + } m_current_block->push_back(cir.m_cf_instr); cir.m_cf_instr->set_scheduled(); } - out_blocks.push_back(m_current_block); + if (m_current_block->type() == Block::alu) + maybe_split_alu_block(out_blocks); + else + out_blocks.push_back(m_current_block); } void @@ -677,6 +685,12 @@ BlockScheduler::schedule_alu(Shader::ShaderBlocks& out_blocks) m_idx1_pending |= m_idx1_loading; m_idx1_loading = false; + if (!m_current_block->lds_group_active() && + m_current_block->expected_ar_uses() == 0 && + (!addr || is_index)) { + group->set_instr_flag(Instr::no_lds_or_addr_group); + } + if (group->has_lds_group_start()) m_current_block->lds_group_start(*group->begin()); @@ -704,7 +718,7 @@ BlockScheduler::schedule_tex(Shader::ShaderBlocks& out_blocks) auto ii = tex_ready.begin(); sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n"; - if (m_current_block->remaining_slots() < 1 + (*ii)->prepare_instr().size()) + if ((unsigned)m_current_block->remaining_slots() < 1 + (*ii)->prepare_instr().size()) start_new_block(out_blocks, Block::tex); for (auto prep : (*ii)->prepare_instr()) { @@ -749,9 +763,13 @@ BlockScheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type ty if (!m_current_block->empty()) { sfn_log << SfnLog::schedule << "Start new block\n"; assert(!m_current_block->lds_group_active()); - out_blocks.push_back(m_current_block); - m_current_block = - new Block(m_current_block->nesting_depth(), m_current_block->id()); + + int next_id = m_current_block->id() + 1; + if (m_current_block->type() != Block::alu) + out_blocks.push_back(m_current_block); + else + next_id = maybe_split_alu_block(out_blocks); + m_current_block = new Block(m_current_block->nesting_depth(), next_id); m_current_block->set_instr_flag(Instr::force_cf); m_idx0_pending = m_idx1_pending = false; @@ -759,6 +777,70 @@ BlockScheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type ty m_current_block->set_type(type, m_chip_class); } +int BlockScheduler::maybe_split_alu_block(Shader::ShaderBlocks& out_blocks) +{ + // TODO: needs fixing + if (m_current_block->remaining_slots() > 0) { + out_blocks.push_back(m_current_block); + return m_current_block->id() + 1; + } + + int used_slots = 0; + int pending_slots = 0; + + Instr *next_block_start = nullptr; + for (auto cur_group : *m_current_block) { + /* This limit is a bit fishy, it should be 128 */ + if (used_slots + pending_slots + cur_group->slots() < 128) { + if (cur_group->can_start_alu_block()) { + next_block_start = cur_group; + used_slots += pending_slots; + pending_slots = cur_group->slots(); + } else { + pending_slots += cur_group->slots(); + } + } else { + assert(next_block_start); + next_block_start->set_instr_flag(Instr::force_cf); + used_slots = pending_slots; + pending_slots = cur_group->slots(); + } + } + + int sub_block_id = m_current_block->id(); + Block *sub_block = new Block(m_current_block->nesting_depth(), + sub_block_id++); + sub_block->set_type(Block::alu, m_chip_class); + sub_block->set_instr_flag(Instr::force_cf); + + for (auto instr : *m_current_block) { + auto group = instr->as_alu_group(); + if (!group) { + sub_block->push_back(instr); + continue; + } + + if (group->group_force_alu_cf()) { + assert(!sub_block->lds_group_active()); + out_blocks.push_back(sub_block); + sub_block = new Block(m_current_block->nesting_depth(), + sub_block_id++); + sub_block->set_type(Block::alu, m_chip_class); + sub_block->set_instr_flag(Instr::force_cf); + } + sub_block->push_back(group); + if (group->has_lds_group_start()) + sub_block->lds_group_start(*group->begin()); + + if (group->has_lds_group_end()) + sub_block->lds_group_end(); + + } + if (!sub_block->empty()) + out_blocks.push_back(sub_block); + return sub_block_id; +} + template bool BlockScheduler::schedule_cf(Shader::ShaderBlocks& out_blocks, std::list& ready_list) -- 2.7.4