r600/sfn: Splizt ALU blocks in scheduler to fit into 128 slots
authorGert Wollny <gert.wollny@collabora.com>
Fri, 21 Jul 2023 15:04:46 +0000 (17:04 +0200)
committerMarge Bot <emma+marge@anholt.net>
Wed, 26 Jul 2023 19:33:43 +0000 (19:33 +0000)
With that the backend doesn't split these ALU CFs any more.

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24297>

src/gallium/drivers/r600/sfn/sfn_scheduler.cpp

index feb2ccc..3767bfd 100644 (file)
@@ -187,6 +187,8 @@ private:
    bool schedule_exports(Shader::ShaderBlocks& out_blocks,
                          std::list<ExportInstr *>& ready_list);
 
+   int maybe_split_alu_block(Shader::ShaderBlocks& out_blocks);
+
    template <typename I> bool schedule(std::list<I *>& ready_list);
 
    template <typename I> bool schedule_block(std::list<I *>& ready_list);
@@ -513,11 +515,17 @@ BlockScheduler::schedule_block(Block& in_block,
 
    if (cir.m_cf_instr) {
       // Assert that if condition is ready
+      if (m_current_block->type() != Block::alu) {
+         start_new_block(out_blocks, Block::alu);
+      }
       m_current_block->push_back(cir.m_cf_instr);
       cir.m_cf_instr->set_scheduled();
    }
 
-   out_blocks.push_back(m_current_block);
+   if (m_current_block->type() == Block::alu)
+      maybe_split_alu_block(out_blocks);
+   else
+      out_blocks.push_back(m_current_block);
 }
 
 void
@@ -677,6 +685,12 @@ BlockScheduler::schedule_alu(Shader::ShaderBlocks& out_blocks)
    m_idx1_pending |= m_idx1_loading;
    m_idx1_loading = false;
 
+   if (!m_current_block->lds_group_active() &&
+       m_current_block->expected_ar_uses() == 0 &&
+       (!addr || is_index)) {
+      group->set_instr_flag(Instr::no_lds_or_addr_group);
+   }
+
    if (group->has_lds_group_start())
       m_current_block->lds_group_start(*group->begin());
 
@@ -704,7 +718,7 @@ BlockScheduler::schedule_tex(Shader::ShaderBlocks& out_blocks)
       auto ii = tex_ready.begin();
       sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
 
-      if (m_current_block->remaining_slots() < 1 + (*ii)->prepare_instr().size())
+      if ((unsigned)m_current_block->remaining_slots() < 1 + (*ii)->prepare_instr().size())
          start_new_block(out_blocks, Block::tex);
 
       for (auto prep : (*ii)->prepare_instr()) {
@@ -749,9 +763,13 @@ BlockScheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type ty
    if (!m_current_block->empty()) {
       sfn_log << SfnLog::schedule << "Start new block\n";
       assert(!m_current_block->lds_group_active());
-      out_blocks.push_back(m_current_block);
-      m_current_block =
-         new Block(m_current_block->nesting_depth(), m_current_block->id());
+
+      int next_id = m_current_block->id() + 1;
+      if (m_current_block->type() != Block::alu)
+         out_blocks.push_back(m_current_block);
+      else
+         next_id = maybe_split_alu_block(out_blocks);
+      m_current_block = new Block(m_current_block->nesting_depth(), next_id);
       m_current_block->set_instr_flag(Instr::force_cf);
       m_idx0_pending = m_idx1_pending = false;
 
@@ -759,6 +777,70 @@ BlockScheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type ty
    m_current_block->set_type(type, m_chip_class);
 }
 
+int BlockScheduler::maybe_split_alu_block(Shader::ShaderBlocks& out_blocks)
+{
+   // TODO: needs fixing
+   if (m_current_block->remaining_slots() > 0) {
+      out_blocks.push_back(m_current_block);
+      return m_current_block->id() + 1;
+   }
+
+   int used_slots = 0;
+   int pending_slots = 0;
+
+   Instr *next_block_start = nullptr;
+   for (auto cur_group : *m_current_block) {
+      /* This limit is a bit fishy, it should be 128 */
+      if (used_slots + pending_slots + cur_group->slots() < 128) {
+         if (cur_group->can_start_alu_block()) {
+            next_block_start = cur_group;
+            used_slots += pending_slots;
+            pending_slots = cur_group->slots();
+         } else {
+            pending_slots += cur_group->slots();
+         }
+      } else {
+         assert(next_block_start);
+         next_block_start->set_instr_flag(Instr::force_cf);
+         used_slots = pending_slots;
+         pending_slots = cur_group->slots();
+      }
+   }
+
+   int sub_block_id = m_current_block->id();
+   Block *sub_block = new Block(m_current_block->nesting_depth(),
+                                sub_block_id++);
+   sub_block->set_type(Block::alu, m_chip_class);
+   sub_block->set_instr_flag(Instr::force_cf);
+
+   for (auto instr : *m_current_block) {
+      auto group = instr->as_alu_group();
+      if (!group) {
+            sub_block->push_back(instr);
+            continue;
+      }
+
+      if (group->group_force_alu_cf()) {
+         assert(!sub_block->lds_group_active());
+         out_blocks.push_back(sub_block);
+         sub_block = new Block(m_current_block->nesting_depth(),
+                                         sub_block_id++);
+         sub_block->set_type(Block::alu, m_chip_class);
+         sub_block->set_instr_flag(Instr::force_cf);
+      }
+      sub_block->push_back(group);
+      if (group->has_lds_group_start())
+         sub_block->lds_group_start(*group->begin());
+
+      if (group->has_lds_group_end())
+         sub_block->lds_group_end();
+
+   }
+   if (!sub_block->empty())
+      out_blocks.push_back(sub_block);
+   return sub_block_id;
+}
+
 template <typename I>
 bool
 BlockScheduler::schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)