r600/sfn: Fix the kcache failure handling

author Gert Wollny <gert.wollny@collabora.com>

Thu, 21 Jul 2022 15:52:48 +0000 (17:52 +0200)

committer Marge Bot <emma+marge@anholt.net>

Sat, 23 Jul 2022 13:10:45 +0000 (13:10 +0000)
author Gert Wollny <gert.wollny@collabora.com>
Thu, 21 Jul 2022 15:52:48 +0000 (17:52 +0200)
committer Marge Bot <emma+marge@anholt.net>
Sat, 23 Jul 2022 13:10:45 +0000 (13:10 +0000)
diff --git a/src/gallium/drivers/r600/sfn/sfn_instr.cpp b/src/gallium/drivers/r600/sfn/sfn_instr.cpp

index d81e329..6ab2518 100644 (file)
--- a/src/gallium/drivers/r600/sfn/sfn_instr.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instr.cpp
@@ -302,17 +302,43 @@ void Block::push_back(PInst instr)
  
  bool Block::try_reserve_kcache(const AluGroup& group)
  {
+   auto kcache = m_kcache;
+
     auto kcache_constants = group.get_kconsts();
     for (auto& kc : kcache_constants)  {
        auto u = kc->as_uniform();
        assert(u);
-      if (!try_reserve_kcache(*u))
+      if (!try_reserve_kcache(*u, kcache)) {
+         m_kcache_alloc_failed = true;
           return false;
+      }
+   }
+
+   m_kcache = kcache;
+   m_kcache_alloc_failed = false;
+   return true;
+}
+
+bool Block::try_reserve_kcache(const AluInstr& instr)
+{
+   auto kcache = m_kcache;
+
+   for (auto& src : instr.sources()) {
+      auto u = src->as_uniform();
+      if (u) {
+         if (!try_reserve_kcache(*u, kcache)) {
+            m_kcache_alloc_failed = true;
+            return false;
+         }
+      }
     }
+   m_kcache = kcache;
+   m_kcache_alloc_failed = false;
     return true;
  }
  
-bool Block::try_reserve_kcache(const UniformValue& u)
+bool Block::try_reserve_kcache(const UniformValue& u,
+                               std::array<KCacheLine, 4>& kcache) const
  {
     const int kcache_banks = 4; // TODO: handle pre-evergreen
  
@@ -323,49 +349,50 @@ bool Block::try_reserve_kcache(const UniformValue& u)
     bool found = false;
  
     for (int i = 0; i < kcache_banks && !found; ++i) {
-      if (m_kcache[i].mode) {
-         if (m_kcache[i].bank < bank)
+      if (kcache[i].mode) {
+         if (kcache[i].bank < bank)
              continue;
  
-         if ((m_kcache[i].bank == bank &&
-              m_kcache[i].addr > line  + 1) ||
-             m_kcache[i].bank > bank) {
-            if (m_kcache[kcache_banks - 1].mode)
+         if ((kcache[i].bank == bank &&
+              kcache[i].addr > line  + 1) ||
+             kcache[i].bank > bank) {
+            if (kcache[kcache_banks - 1].mode)
                 return false;
  
-            memmove(&m_kcache[i+1],&m_kcache[i], (kcache_banks-i-1)*sizeof(KCacheLine));
-            m_kcache[i].mode = KCacheLine::lock_1;
-            m_kcache[i].bank = bank;
-            m_kcache[i].addr = line;
+            memmove(&kcache[i+1],&kcache[i], (kcache_banks-i-1)*sizeof(KCacheLine));
+            kcache[i].mode = KCacheLine::lock_1;
+            kcache[i].bank = bank;
+            kcache[i].addr = line;
              return true;
           }
  
-         int d = line - m_kcache[i].addr;
+         int d = line - kcache[i].addr;
  
           if (d == -1) {
-            m_kcache[i].addr--;
-            if (m_kcache[i].mode == KCacheLine::lock_2) {
+            kcache[i].addr--;
+            if (kcache[i].mode == KCacheLine::lock_2) {
                 /* we are prepending the line to the current set,
-          * discarding the existing second line,
-          * so we'll have to insert line+2 after it */
+                * discarding the existing second line,
+                * so we'll have to insert line+2 after it */
                 line += 2;
                 continue;
-            } else if (m_kcache[i].mode == KCacheLine::lock_1) {
-               m_kcache[i].mode = KCacheLine::lock_2;
+            } else if (kcache[i].mode == KCacheLine::lock_1) {
+               kcache[i].mode = KCacheLine::lock_2;
                 return true;
              } else {
                 /* V_SQ_CF_KCACHE_LOCK_LOOP_INDEX is not supported */
                 return false;
              }
           } else if (d == 1) {
-            m_kcache[i].mode = KCacheLine::lock_2;
+            kcache[i].mode = KCacheLine::lock_2;
              return true;
-         } else if (d == 0)
+         } else if (d == 0) {
              return true;
+         }
        } else { /* free kcache set - use it */
-         m_kcache[i].mode = KCacheLine::lock_1;
-         m_kcache[i].bank = bank;
-         m_kcache[i].addr = line;
+         kcache[i].mode = KCacheLine::lock_1;
+         kcache[i].bank = bank;
+         kcache[i].addr = line;
           return true;
        }
     }
diff --git a/src/gallium/drivers/r600/sfn/sfn_instr.h b/src/gallium/drivers/r600/sfn/sfn_instr.h

index c70427e..19f1181 100644 (file)
--- a/src/gallium/drivers/r600/sfn/sfn_instr.h
+++ b/src/gallium/drivers/r600/sfn/sfn_instr.h
@@ -196,7 +196,8 @@ public:
     void set_type(Type t);
     uint32_t remaining_slots() const { return m_remaining_slots;}
  
-   bool try_reserve_kcache(const AluGroup& group);
+   bool try_reserve_kcache(const AluGroup& instr);
+   bool try_reserve_kcache(const AluInstr& group);
  
     auto last_lds_instr() {return m_last_lds_instr;}
     void set_last_lds_instr(Instr *instr) {m_last_lds_instr = instr;}
@@ -207,8 +208,11 @@ public:
  
     size_t size() const { return m_instructions.size();}
  
+   bool kcache_reservation_failed() const { return m_kcache_alloc_failed;}
+
  private:
-   bool try_reserve_kcache(const UniformValue& u);
+   bool try_reserve_kcache(const UniformValue& u,
+                           std::array<KCacheLine, 4>& kcache) const;
  
     bool do_ready() const override {return true;};
     void do_print(std::ostream& os) const override;
@@ -221,11 +225,13 @@ private:
     uint32_t m_remaining_slots{0xffff};
  
     std::array<KCacheLine, 4> m_kcache;
+   bool m_kcache_alloc_failed{false};
  
     Instr *m_last_lds_instr{nullptr};
  
     int m_lds_group_requirement{0};
     AluInstr *m_lds_group_start{nullptr};
+
  };
  
  class InstrWithVectorResult : public Instr {
diff --git a/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp

index faf116a..87ab579 100644 (file)
--- a/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp
@@ -489,83 +489,84 @@ bool BlockSheduler::schedule_alu(Shader::ShaderBlocks& out_blocks)
     bool has_lds_ready = !alu_vec_ready.empty() &&
                          (*alu_vec_ready.begin())->has_lds_access();
  
+   /* If we have ready ALU instructions we have to start a new ALU block */
+   if (has_alu_ready ||  !alu_groups_ready.empty()) {
+      if (m_current_block->type() != Block::alu) {
+         start_new_block(out_blocks, Block::alu);
+         m_alu_groups_schduled = 0;
+      }
+   }
+
     /* Schedule groups first. unless we have a pending LDS instuction
      * We don't want the LDS instructions to be too far apart because the
      * fetch + read from queue has to be in the same ALU CF block */
     if (!alu_groups_ready.empty() && !has_lds_ready) {
        group = *alu_groups_ready.begin();
+      if (!m_current_block->try_reserve_kcache(*group)) {
+         start_new_block(out_blocks, Block::alu);
+         m_current_block->set_instr_flag(Instr::force_cf);
+      }
+
+      if (!m_current_block->try_reserve_kcache(*group))
+         unreachable("Scheduling a group in a new block should always succeed");
        alu_groups_ready.erase(alu_groups_ready.begin());
        sfn_log << SfnLog::schedule << "Schedule ALU group\n";
        success = true;
+   } else if (has_alu_ready) {
+      group = new AluGroup();
+      sfn_log << SfnLog::schedule << "START new ALU group\n";
     } else {
-      if (has_alu_ready) {
-         group = new AluGroup();
-         sfn_log << SfnLog::schedule << "START new ALU group\n";
-      }
+      return false;
     }
  
-   if (group) {
-      int free_slots = group->free_slots();
+   assert(group);
  
-      if (free_slots && has_alu_ready) {
-         if (!alu_vec_ready.empty())
-            success |= schedule_alu_to_group_vec(group);
-
-         /* Apparently one can't schedule a t-slot if there is already
-          * and LDS instruction scheduled.
-          * TODO: check whether this is only relevant for actual LDS instructions
-          * or also for instructions that read from the LDS return value queue */
-
-         if (free_slots & 0x10 && !has_lds_ready) {
-            sfn_log << SfnLog::schedule << "Try schedule TRANS channel\n";
-            if (!alu_trans_ready.empty())
-               success |= schedule_alu_to_group_trans(group, alu_trans_ready);
-            if (!alu_vec_ready.empty())
-               success |= schedule_alu_to_group_trans(group, alu_vec_ready);
-         }
-      }
+   int free_slots = group->free_slots();
  
-      sfn_log << SfnLog::schedule << "Finalize ALU group\n";
-      group->set_scheduled();
-      group->fix_last_flag();
-      group->set_nesting_depth(m_current_block->nesting_depth());
+   while (free_slots && has_alu_ready) {
+      if (!alu_vec_ready.empty())
+         success |= schedule_alu_to_group_vec(group);
  
+      /* Apparently one can't schedule a t-slot if there is already
+       * and LDS instruction scheduled.
+       * TODO: check whether this is only relevant for actual LDS instructions
+       * or also for instructions that read from the LDS return value queue */
  
-      if (m_current_block->type() != Block::alu) {
-         start_new_block(out_blocks, Block::alu);
-         m_alu_groups_schduled = 0;
+      if (free_slots & 0x10 && !has_lds_ready) {
+         sfn_log << SfnLog::schedule << "Try schedule TRANS channel\n";
+         if (!alu_trans_ready.empty())
+            success |= schedule_alu_to_group_trans(group, alu_trans_ready);
+         if (!alu_vec_ready.empty())
+            success |= schedule_alu_to_group_trans(group, alu_vec_ready);
        }
  
-      /* Pessimistic hack: If we have started an LDS group,
-       * make sure 8 instructions groups still fit into the CF
-       * TODO: take care of Address slot emission
-       * TODO: maybe do this CF split only in the assembler
-       */
-      /*if (group->slots() > m_current_block->remaining_slots() ||
-          (group->has_lds_group_start() &&
-           m_current_block->remaining_slots() < 7 * 8)) {
-         //assert(!m_current_block->lds_group_active());
-         start_new_block(out_blocks, Block::alu);
-      }*/
-
-      if (!m_current_block->try_reserve_kcache(*group)) {
+      if (success) {
+         ++m_alu_groups_schduled;
+         break;
+      } else if (m_current_block->kcache_reservation_failed()) {
+         // LDS read groups should not lead to impossible
+         // kcache constellations
           assert(!m_current_block->lds_group_active());
+
+         // kcache reservation failed, so we have to start a new CF
           start_new_block(out_blocks, Block::alu);
           m_current_block->set_instr_flag(Instr::force_cf);
+      } else {
+         return false;
        }
+   }
  
-      assert(m_current_block->try_reserve_kcache(*group));
-
-      if (group->has_lds_group_start())
-         m_current_block->lds_group_start(*group->begin());
+   sfn_log << SfnLog::schedule << "Finalize ALU group\n";
+   group->set_scheduled();
+   group->fix_last_flag();
+   group->set_nesting_depth(m_current_block->nesting_depth());
+   m_current_block->push_back(group);
  
-      m_current_block->push_back(group);
-      if (group->has_lds_group_end())
-         m_current_block->lds_group_end();
-   }
+   if (group->has_lds_group_start())
+      m_current_block->lds_group_start(*group->begin());
  
-   if (success)
-      ++m_alu_groups_schduled;
+   if (group->has_lds_group_end())
+      m_current_block->lds_group_end();
  
     return success;
  }
@@ -652,6 +653,13 @@ bool BlockSheduler::schedule_alu_to_group_vec(AluGroup *group)
     auto e = alu_vec_ready.end();
     while (i != e) {
        sfn_log << SfnLog::schedule << "Try schedule to vec " << **i;
+
+      if (!m_current_block->try_reserve_kcache(**i)) {
+           sfn_log << SfnLog::schedule << " failed (kcache)\n";
+         ++i;
+         continue;
+      }
+
        if (group->add_vec_instructions(*i)) {
           auto old_i = i;
           ++i;
@@ -679,6 +687,12 @@ bool BlockSheduler::schedule_alu_to_group_trans(AluGroup *group, std::list<AluIn
     auto e = readylist.end();
     while (i != e) {
        sfn_log << SfnLog::schedule << "Try schedule to trans " << **i;
+      if (!m_current_block->try_reserve_kcache(**i)) {
+           sfn_log << SfnLog::schedule << " failed (kcache)\n";
+         ++i;
+         continue;
+      }
+
        if (group->add_trans_instructions(*i)) {
           auto old_i = i;
           ++i;
author	Gert Wollny <gert.wollny@collabora.com>
	Thu, 21 Jul 2022 15:52:48 +0000 (17:52 +0200)
committer	Marge Bot <emma+marge@anholt.net>
	Sat, 23 Jul 2022 13:10:45 +0000 (13:10 +0000)
src/gallium/drivers/r600/sfn/sfn_instr.cpp		patch \| blob \| history
src/gallium/drivers/r600/sfn/sfn_instr.h		patch \| blob \| history
src/gallium/drivers/r600/sfn/sfn_scheduler.cpp		patch \| blob \| history