bool schedule_exports(Shader::ShaderBlocks& out_blocks,
std::list<ExportInstr *>& ready_list);
+ int maybe_split_alu_block(Shader::ShaderBlocks& out_blocks);
+
template <typename I> bool schedule(std::list<I *>& ready_list);
template <typename I> bool schedule_block(std::list<I *>& ready_list);
if (cir.m_cf_instr) {
// Assert that if condition is ready
+ if (m_current_block->type() != Block::alu) {
+ start_new_block(out_blocks, Block::alu);
+ }
m_current_block->push_back(cir.m_cf_instr);
cir.m_cf_instr->set_scheduled();
}
- out_blocks.push_back(m_current_block);
+ if (m_current_block->type() == Block::alu)
+ maybe_split_alu_block(out_blocks);
+ else
+ out_blocks.push_back(m_current_block);
}
void
m_idx1_pending |= m_idx1_loading;
m_idx1_loading = false;
+ if (!m_current_block->lds_group_active() &&
+ m_current_block->expected_ar_uses() == 0 &&
+ (!addr || is_index)) {
+ group->set_instr_flag(Instr::no_lds_or_addr_group);
+ }
+
if (group->has_lds_group_start())
m_current_block->lds_group_start(*group->begin());
auto ii = tex_ready.begin();
sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
- if (m_current_block->remaining_slots() < 1 + (*ii)->prepare_instr().size())
+ if ((unsigned)m_current_block->remaining_slots() < 1 + (*ii)->prepare_instr().size())
start_new_block(out_blocks, Block::tex);
for (auto prep : (*ii)->prepare_instr()) {
if (!m_current_block->empty()) {
sfn_log << SfnLog::schedule << "Start new block\n";
assert(!m_current_block->lds_group_active());
- out_blocks.push_back(m_current_block);
- m_current_block =
- new Block(m_current_block->nesting_depth(), m_current_block->id());
+
+ int next_id = m_current_block->id() + 1;
+ if (m_current_block->type() != Block::alu)
+ out_blocks.push_back(m_current_block);
+ else
+ next_id = maybe_split_alu_block(out_blocks);
+ m_current_block = new Block(m_current_block->nesting_depth(), next_id);
m_current_block->set_instr_flag(Instr::force_cf);
m_idx0_pending = m_idx1_pending = false;
m_current_block->set_type(type, m_chip_class);
}
+int BlockScheduler::maybe_split_alu_block(Shader::ShaderBlocks& out_blocks)
+{
+ // TODO: needs fixing
+ if (m_current_block->remaining_slots() > 0) {
+ out_blocks.push_back(m_current_block);
+ return m_current_block->id() + 1;
+ }
+
+ int used_slots = 0;
+ int pending_slots = 0;
+
+ Instr *next_block_start = nullptr;
+ for (auto cur_group : *m_current_block) {
+ /* This limit is a bit fishy, it should be 128 */
+ if (used_slots + pending_slots + cur_group->slots() < 128) {
+ if (cur_group->can_start_alu_block()) {
+ next_block_start = cur_group;
+ used_slots += pending_slots;
+ pending_slots = cur_group->slots();
+ } else {
+ pending_slots += cur_group->slots();
+ }
+ } else {
+ assert(next_block_start);
+ next_block_start->set_instr_flag(Instr::force_cf);
+ used_slots = pending_slots;
+ pending_slots = cur_group->slots();
+ }
+ }
+
+ int sub_block_id = m_current_block->id();
+ Block *sub_block = new Block(m_current_block->nesting_depth(),
+ sub_block_id++);
+ sub_block->set_type(Block::alu, m_chip_class);
+ sub_block->set_instr_flag(Instr::force_cf);
+
+ for (auto instr : *m_current_block) {
+ auto group = instr->as_alu_group();
+ if (!group) {
+ sub_block->push_back(instr);
+ continue;
+ }
+
+ if (group->group_force_alu_cf()) {
+ assert(!sub_block->lds_group_active());
+ out_blocks.push_back(sub_block);
+ sub_block = new Block(m_current_block->nesting_depth(),
+ sub_block_id++);
+ sub_block->set_type(Block::alu, m_chip_class);
+ sub_block->set_instr_flag(Instr::force_cf);
+ }
+ sub_block->push_back(group);
+ if (group->has_lds_group_start())
+ sub_block->lds_group_start(*group->begin());
+
+ if (group->has_lds_group_end())
+ sub_block->lds_group_end();
+
+ }
+ if (!sub_block->empty())
+ out_blocks.push_back(sub_block);
+ return sub_block_id;
+}
+
template <typename I>
bool
BlockScheduler::schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)