r600/sfn; go back to not lowering uniforms to UBOs
authorGert Wollny <gert.wollny@collabora.com>
Sat, 26 Sep 2020 18:14:00 +0000 (20:14 +0200)
committerMarge Bot <eric+marge@anholt.net>
Mon, 28 Sep 2020 18:16:29 +0000 (18:16 +0000)
Lowering uniforms to UBOs results in an aditional iadd for the
UBO buffer id evaluation, and for indirect buffers access that
results in an unnecessary op that can be avoided by not lowering
uniforms. There is some code duplication when reading the uniforms
but it saves a whole instruction group per indirect cont buffer
access.

This reverts commit 98eb00face93b9af5aac19008ecff5a2bf376745 with
some additional fixes.

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6879>

src/gallium/drivers/r600/r600_pipe_common.c
src/gallium/drivers/r600/sfn/sfn_shader_base.cpp
src/gallium/drivers/r600/sfn/sfn_shader_base.h
src/gallium/drivers/r600/sfn/sfn_value.cpp

index eab7ce9..9dc00cf 100644 (file)
@@ -1201,7 +1201,6 @@ const struct nir_shader_compiler_options r600_nir_fs_options = {
        .vectorize_io = true,
        .has_umad24 = true,
        .has_umul24 = true,
-        .lower_uniforms_to_ubo = true
 };
 
 const struct nir_shader_compiler_options r600_nir_options = {
@@ -1224,7 +1223,6 @@ const struct nir_shader_compiler_options r600_nir_options = {
        .vectorize_io = true,
        .has_umad24 = true,
        .has_umul24 = true,
-        .lower_uniforms_to_ubo = true
 };
 
 
index d6ade30..d288b96 100644 (file)
@@ -657,6 +657,8 @@ bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* ins
       return emit_load_scratch(instr);
    case nir_intrinsic_store_deref:
       return emit_store_deref(instr);
+   case nir_intrinsic_load_uniform:
+      return load_uniform(instr);
    case nir_intrinsic_discard:
    case nir_intrinsic_discard_if:
       return emit_discard_if(instr);
@@ -868,7 +870,7 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
          for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
             int cmp = buf_cmp + i;
             assert(cmp < 4);
-            auto u = PValue(new UniformValue(512 +  buf_offset->u32, cmp, bufid->u32));
+            auto u = PValue(new UniformValue(512 +  buf_offset->u32, cmp, bufid->u32 + 1));
             if (instr->dest.is_ssa)
                load_preloaded_value(instr->dest, i, u);
             else {
@@ -881,7 +883,7 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
          return true;
 
       } else {
-         return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, bufid->u32);
+         return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, bufid->u32 + 1);
       }
    } else {
       if (buf_offset) {
@@ -915,7 +917,7 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
       }
 
       auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
-                                     0, bufid, bim_zero);
+                                     1, bufid, bim_zero);
       ir->set_dest_swizzle(swz);
 
       emit_instruction(ir);
@@ -952,6 +954,46 @@ bool ShaderFromNirProcessor::emit_load_input_deref(const nir_variable *var,
    return do_emit_load_deref(var, instr);
 }
 
+bool ShaderFromNirProcessor::load_uniform(nir_intrinsic_instr* instr)
+{
+   r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
+                 << *reinterpret_cast<nir_instr*>(instr)
+                 << "'\n";
+
+
+   /* If the target register is a SSA register and the loading is not
+    * indirect then we can do lazy loading, i.e. the uniform value can
+    * be used directly. Otherwise we have to load the data for real
+    * rigt away.
+    */
+   auto literal = nir_src_as_const_value(instr->src[0]);
+   int base = nir_intrinsic_base(instr);
+
+   if (literal) {
+      AluInstruction *ir = nullptr;
+
+      for (int i = 0; i < instr->num_components ; ++i) {
+         PValue u = PValue(new UniformValue(512 + literal->u32 + base, i));
+         sfn_log << SfnLog::io << "uniform "
+                 << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
+
+         if (instr->dest.is_ssa)
+            load_preloaded_value(instr->dest, i, u);
+         else {
+            ir = new AluInstruction(op1_mov, from_nir(instr->dest, i),
+                                                   u, {alu_write});
+             emit_instruction(ir);
+         }
+      }
+      if (ir)
+         ir->set_flag(alu_last_instr);
+   } else {
+      PValue addr = from_nir(instr->src[0], 0, 0);
+      return load_uniform_indirect(instr, addr, 16 * base, 0);
+   }
+   return true;
+}
+
 bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid)
 {
    if (!addr) {
@@ -963,7 +1005,7 @@ bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, P
    std::array<int, 4> swz = {7,7,7,7};
    for (int i = 0; i < 4; ++i) {
       trgt.set_reg_i(i, from_nir(instr->dest, i));
-      swz[i] = i + nir_intrinsic_component(instr);
+      swz[i] = i;
    }
 
    if (addr->type() != Value::gpr) {
@@ -971,7 +1013,6 @@ bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, P
       addr = trgt.reg_i(0);
    }
 
-   /* FIXME: buffer index and index mode are not set correctly */
    auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest,
                                   bufferid, PValue(), bim_none);
    ir->set_dest_swizzle(swz);
index 309493f..50a0b9e 100644 (file)
@@ -166,6 +166,7 @@ private:
 
    bool emit_store_deref(nir_intrinsic_instr* instr);
 
+   bool load_uniform(nir_intrinsic_instr* instr);
    bool process_uniforms(nir_variable *uniform);
    bool process_inputs(nir_variable *input);
    bool process_outputs(nir_variable *output);
index 3a5a3ce..cdd1367 100644 (file)
@@ -210,7 +210,7 @@ UniformValue::UniformValue(uint32_t sel, uint32_t chan, uint32_t kcache_bank):
 UniformValue::UniformValue(uint32_t sel, uint32_t chan, PValue addr):
    Value(Value::kconst, chan),
    m_index(sel),
-   m_kcache_bank(0),
+   m_kcache_bank(1),
    m_addr(addr)
 {