r600/sfn: Splizt ALU blocks in scheduler to fit into 128 slots

author Gert Wollny <gert.wollny@collabora.com>

Fri, 21 Jul 2023 15:04:46 +0000 (17:04 +0200)

committer Marge Bot <emma+marge@anholt.net>

Wed, 26 Jul 2023 19:33:43 +0000 (19:33 +0000)
author Gert Wollny <gert.wollny@collabora.com>
Fri, 21 Jul 2023 15:04:46 +0000 (17:04 +0200)
committer Marge Bot <emma+marge@anholt.net>
Wed, 26 Jul 2023 19:33:43 +0000 (19:33 +0000)
diff --git a/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp

index feb2ccc..3767bfd 100644 (file)
--- a/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp
@@ -187,6 +187,8 @@ private:
     bool schedule_exports(Shader::ShaderBlocks& out_blocks,
                           std::list<ExportInstr *>& ready_list);
  
+   int maybe_split_alu_block(Shader::ShaderBlocks& out_blocks);
+
     template <typename I> bool schedule(std::list<I *>& ready_list);
  
     template <typename I> bool schedule_block(std::list<I *>& ready_list);
@@ -513,11 +515,17 @@ BlockScheduler::schedule_block(Block& in_block,
  
     if (cir.m_cf_instr) {
        // Assert that if condition is ready
+      if (m_current_block->type() != Block::alu) {
+         start_new_block(out_blocks, Block::alu);
+      }
        m_current_block->push_back(cir.m_cf_instr);
        cir.m_cf_instr->set_scheduled();
     }
  
-   out_blocks.push_back(m_current_block);
+   if (m_current_block->type() == Block::alu)
+      maybe_split_alu_block(out_blocks);
+   else
+      out_blocks.push_back(m_current_block);
  }
  
  void
@@ -677,6 +685,12 @@ BlockScheduler::schedule_alu(Shader::ShaderBlocks& out_blocks)
     m_idx1_pending |= m_idx1_loading;
     m_idx1_loading = false;
  
+   if (!m_current_block->lds_group_active() &&
+       m_current_block->expected_ar_uses() == 0 &&
+       (!addr || is_index)) {
+      group->set_instr_flag(Instr::no_lds_or_addr_group);
+   }
+
     if (group->has_lds_group_start())
        m_current_block->lds_group_start(*group->begin());
  
@@ -704,7 +718,7 @@ BlockScheduler::schedule_tex(Shader::ShaderBlocks& out_blocks)
        auto ii = tex_ready.begin();
        sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
  
-      if (m_current_block->remaining_slots() < 1 + (*ii)->prepare_instr().size())
+      if ((unsigned)m_current_block->remaining_slots() < 1 + (*ii)->prepare_instr().size())
           start_new_block(out_blocks, Block::tex);
  
        for (auto prep : (*ii)->prepare_instr()) {
@@ -749,9 +763,13 @@ BlockScheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type ty
     if (!m_current_block->empty()) {
        sfn_log << SfnLog::schedule << "Start new block\n";
        assert(!m_current_block->lds_group_active());
-      out_blocks.push_back(m_current_block);
-      m_current_block =
-         new Block(m_current_block->nesting_depth(), m_current_block->id());
+
+      int next_id = m_current_block->id() + 1;
+      if (m_current_block->type() != Block::alu)
+         out_blocks.push_back(m_current_block);
+      else
+         next_id = maybe_split_alu_block(out_blocks);
+      m_current_block = new Block(m_current_block->nesting_depth(), next_id);
        m_current_block->set_instr_flag(Instr::force_cf);
        m_idx0_pending = m_idx1_pending = false;
  
@@ -759,6 +777,70 @@ BlockScheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type ty
     m_current_block->set_type(type, m_chip_class);
  }
  
+int BlockScheduler::maybe_split_alu_block(Shader::ShaderBlocks& out_blocks)
+{
+   // TODO: needs fixing
+   if (m_current_block->remaining_slots() > 0) {
+      out_blocks.push_back(m_current_block);
+      return m_current_block->id() + 1;
+   }
+
+   int used_slots = 0;
+   int pending_slots = 0;
+
+   Instr *next_block_start = nullptr;
+   for (auto cur_group : *m_current_block) {
+      /* This limit is a bit fishy, it should be 128 */
+      if (used_slots + pending_slots + cur_group->slots() < 128) {
+         if (cur_group->can_start_alu_block()) {
+            next_block_start = cur_group;
+            used_slots += pending_slots;
+            pending_slots = cur_group->slots();
+         } else {
+            pending_slots += cur_group->slots();
+         }
+      } else {
+         assert(next_block_start);
+         next_block_start->set_instr_flag(Instr::force_cf);
+         used_slots = pending_slots;
+         pending_slots = cur_group->slots();
+      }
+   }
+
+   int sub_block_id = m_current_block->id();
+   Block *sub_block = new Block(m_current_block->nesting_depth(),
+                                sub_block_id++);
+   sub_block->set_type(Block::alu, m_chip_class);
+   sub_block->set_instr_flag(Instr::force_cf);
+
+   for (auto instr : *m_current_block) {
+      auto group = instr->as_alu_group();
+      if (!group) {
+            sub_block->push_back(instr);
+            continue;
+      }
+
+      if (group->group_force_alu_cf()) {
+         assert(!sub_block->lds_group_active());
+         out_blocks.push_back(sub_block);
+         sub_block = new Block(m_current_block->nesting_depth(),
+                                         sub_block_id++);
+         sub_block->set_type(Block::alu, m_chip_class);
+         sub_block->set_instr_flag(Instr::force_cf);
+      }
+      sub_block->push_back(group);
+      if (group->has_lds_group_start())
+         sub_block->lds_group_start(*group->begin());
+
+      if (group->has_lds_group_end())
+         sub_block->lds_group_end();
+
+   }
+   if (!sub_block->empty())
+      out_blocks.push_back(sub_block);
+   return sub_block_id;
+}
+
  template <typename I>
  bool
  BlockScheduler::schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)
author	Gert Wollny <gert.wollny@collabora.com>
	Fri, 21 Jul 2023 15:04:46 +0000 (17:04 +0200)
committer	Marge Bot <emma+marge@anholt.net>
	Wed, 26 Jul 2023 19:33:43 +0000 (19:33 +0000)