r600/sfn: Add experimental support for load/store_global
authorGert Wollny <gert.wollny@collabora.com>
Fri, 9 Dec 2022 16:02:37 +0000 (17:02 +0100)
committerGert Wollny <gert.wollny@collabora.com>
Fri, 9 Jun 2023 06:49:49 +0000 (08:49 +0200)
This is needed for rusticl, but the results may be unexpected.

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20252>

src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp
src/gallium/drivers/r600/sfn/sfn_instr_mem.h
src/gallium/drivers/r600/sfn/sfn_shader.cpp
src/gallium/drivers/r600/sfn/sfn_shader.h

index 60892cb..f07255c 100644 (file)
@@ -511,6 +511,8 @@ RatInstr::emit(nir_intrinsic_instr *intr, Shader& shader)
    case nir_intrinsic_ssbo_atomic:
    case nir_intrinsic_ssbo_atomic_swap:
       return emit_ssbo_atomic_op(intr, shader);
+   case nir_intrinsic_store_global:
+      return emit_global_store(intr, shader);
    case nir_intrinsic_image_store:
       return emit_image_store(intr, shader);
    case nir_intrinsic_image_load:
@@ -569,6 +571,50 @@ RatInstr::emit_ssbo_load(nir_intrinsic_instr *intr, Shader& shader)
 }
 
 bool
+RatInstr::emit_global_store(nir_intrinsic_instr *intr, Shader& shader)
+{
+   auto& vf = shader.value_factory();
+   auto addr_orig = vf.src(intr->src[1], 0);
+   auto addr_vec = vf.temp_vec4(pin_chan, {0, 7, 7, 7});
+
+   shader.emit_instruction(
+      new AluInstr(op2_lshr_int, addr_vec[0], addr_orig, vf.literal(2),
+                   AluInstr::last_write));
+
+   RegisterVec4::Swizzle value_swz = {0,7,7,7};
+   auto mask = nir_intrinsic_write_mask(intr);
+   for (int i = 0; i < 4; ++i) {
+      if (mask & (1 << i))
+         value_swz[i] = i;
+   }
+
+   auto value_vec = vf.temp_vec4(pin_chgr, value_swz);
+
+   AluInstr *ir = nullptr;
+   for (int i = 0; i < 4; ++i) {
+      if (value_swz[i] < 4) {
+         ir = new AluInstr(op1_mov, value_vec[i],
+                           vf.src(intr->src[0], i), AluInstr::write);
+         shader.emit_instruction(ir);
+      }
+   }
+   if (ir)
+      ir->set_alu_flag(alu_last_instr);
+
+   auto store = new RatInstr(cf_mem_rat_cacheless,
+                             RatInstr::STORE_RAW,
+                             value_vec,
+                             addr_vec,
+                             shader.ssbo_image_offset(),
+                             nullptr,
+                             1,
+                             mask,
+                             0);
+   shader.emit_instruction(store);
+   return true;
+}
+
+bool
 RatInstr::emit_ssbo_store(nir_intrinsic_instr *instr, Shader& shader)
 {
    auto& vf = shader.value_factory();
index d2e1264..65c2b4b 100644 (file)
@@ -167,6 +167,8 @@ public:
    static bool emit(nir_intrinsic_instr *intr, Shader& shader);
 
 private:
+   static bool emit_global_store(nir_intrinsic_instr *intr, Shader& shader);
+
    static bool emit_ssbo_load(nir_intrinsic_instr *intr, Shader& shader);
    static bool emit_ssbo_store(nir_intrinsic_instr *intr, Shader& shader);
    static bool emit_ssbo_atomic_op(nir_intrinsic_instr *intr, Shader& shader);
index 49bdf12..a48980a 100644 (file)
@@ -903,6 +903,8 @@ Shader::process_intrinsic(nir_intrinsic_instr *intr)
       return emit_load_scratch(intr);
    case nir_intrinsic_store_local_shared_r600:
       return emit_local_store(intr);
+   case nir_intrinsic_load_global:
+      return emit_load_global(intr);
    case nir_intrinsic_load_local_shared_r600:
       return emit_local_load(intr);
    case nir_intrinsic_load_tcs_in_param_base_r600:
@@ -1130,6 +1132,25 @@ Shader::emit_load_scratch(nir_intrinsic_instr *intr)
    return true;
 }
 
+bool Shader::emit_load_global(nir_intrinsic_instr *intr)
+{
+   auto dest = value_factory().dest_vec4(intr->dest, pin_group);
+
+   auto src_value = value_factory().src(intr->src[0], 0);
+   auto src = src_value->as_register();
+   if (!src) {
+      src = value_factory().temp_register();
+      emit_instruction(new AluInstr(op1_mov, src, src_value, AluInstr::last_write));
+   }
+   auto load = new LoadFromBuffer(dest, {0,7,7,7}, src, 0, 1, NULL, fmt_32);
+   load->set_mfc(4);
+   load->set_num_format(vtx_nf_int);
+   load->reset_fetch_flag(FetchInstr::format_comp_signed);
+
+   emit_instruction(load);
+   return true;
+}
+
 bool
 Shader::emit_local_store(nir_intrinsic_instr *instr)
 {
index c873244..1634380 100644 (file)
@@ -314,6 +314,7 @@ private:
    bool emit_control_flow(ControlFlowInstr::CFType type);
    bool emit_store_scratch(nir_intrinsic_instr *intr);
    bool emit_load_scratch(nir_intrinsic_instr *intr);
+   bool emit_load_global(nir_intrinsic_instr *intr);
    bool emit_local_store(nir_intrinsic_instr *intr);
    bool emit_local_load(nir_intrinsic_instr *instr);
    bool emit_load_tcs_param_base(nir_intrinsic_instr *instr, int offset);