r600/sfn: Use a heuristic to keep SSBO setup and store close
authorGert Wollny <gert.wollny@collabora.com>
Tue, 23 Aug 2022 13:46:36 +0000 (15:46 +0200)
committerMarge Bot <emma+marge@anholt.net>
Fri, 26 Aug 2022 08:27:42 +0000 (08:27 +0000)
When SSBO instructions use constant address values the address loading
is immediately ready, scheduling the address loads early increases
the register pressure, so force a new instruction block to work around
this problem.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6975

Fixes: 79ca456b4837b3bc21cf9ef3c03c505c4b4909f6
   r600/sfn: rewrite NIR backend

v2: do handling in shader block to be thread save (hinted to by Filip)

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Reviewed-by: Filip Gawin <filip@gawin.net> (v1)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18212>

src/gallium/drivers/r600/sfn/sfn_instr.h
src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp
src/gallium/drivers/r600/sfn/sfn_instr_mem.h
src/gallium/drivers/r600/sfn/sfn_shader.cpp

index a715305..d3beb8f 100644 (file)
@@ -210,6 +210,8 @@ public:
 
    bool kcache_reservation_failed() const { return m_kcache_alloc_failed;}
 
+   int inc_rat_emitted() { return  ++m_emitted_rat_instr;}
+
    static void set_chipclass(r600_chip_class chip_class);
 
 private:
@@ -234,6 +236,7 @@ private:
    int m_lds_group_requirement{0};
    AluInstr *m_lds_group_start{nullptr};
    static unsigned s_max_kcache_banks;
+   int m_emitted_rat_instr{0};
 };
 
 class InstrWithVectorResult : public Instr {
index 65e52ca..4b0d835 100644 (file)
@@ -559,12 +559,6 @@ bool RatInstr::emit_ssbo_load(nir_intrinsic_instr *intr, Shader& shader)
 
 bool RatInstr::emit_ssbo_store(nir_intrinsic_instr *instr, Shader& shader)
 {
-
-   /* Forche the scheduler to not move the preparation too far away, by starting
-    * a new block (TODO: better priority handling in the scheduler)*/
-   if (nir_src_num_components(instr->src[0]) > 2)
-      shader.start_new_block(0);
-
    auto &vf = shader.value_factory();
    auto orig_addr = vf.src(instr->src[2], 0);
 
index 0aece6b..9d0132d 100644 (file)
@@ -194,7 +194,6 @@ private:
    int m_element_size{3};
    bool m_need_ack{false};
    bool m_need_mark{false};
-
 };
 
 
index ce2ca43..c0a6856 100644 (file)
@@ -1085,6 +1085,9 @@ void Shader::InstructionChain::visit(RatInstr *instr)
 
    if (prepare_mem_barrier)
       instr->set_ack();
+
+   if (this_shader->m_current_block->inc_rat_emitted() > 15)
+      this_shader->start_new_block(0);
 }
 
 void Shader::InstructionChain::apply(Instr *current, Instr **last) {