freedreno/ir3: Add load/store_global lowering

author Rob Clark <robdclark@chromium.org>

Sun, 22 Aug 2021 16:53:56 +0000 (09:53 -0700)

committer Marge Bot <eric+marge@anholt.net>

Thu, 21 Oct 2021 18:59:57 +0000 (18:59 +0000)
author Rob Clark <robdclark@chromium.org>
Sun, 22 Aug 2021 16:53:56 +0000 (09:53 -0700)
committer Marge Bot <eric+marge@anholt.net>
Thu, 21 Oct 2021 18:59:57 +0000 (18:59 +0000)
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c

index 2f3426e..a630b05 100644 (file)
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -705,6 +705,7 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
     /* Lower scratch writemasks */
     progress |= OPT(s, nir_lower_wrmasks, should_split_wrmask, s);
  
+   progress |= OPT(s, ir3_nir_lower_64b_global);
     progress |= OPT(s, ir3_nir_lower_64b_intrinsics);
     progress |= OPT(s, ir3_nir_lower_64b_undef);
     progress |= OPT(s, nir_lower_int64);
diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h

index 10bc8a9..2de6adf 100644 (file)
--- a/src/freedreno/ir3/ir3_nir.h
+++ b/src/freedreno/ir3/ir3_nir.h
@@ -59,6 +59,7 @@ void ir3_nir_lower_gs(nir_shader *shader);
   */
  bool ir3_nir_lower_64b_intrinsics(nir_shader *shader);
  bool ir3_nir_lower_64b_undef(nir_shader *shader);
+bool ir3_nir_lower_64b_global(nir_shader *shader);
  
  const nir_shader_compiler_options *
  ir3_get_compiler_options(struct ir3_compiler *compiler);
diff --git a/src/freedreno/ir3/ir3_nir_lower_64b.c b/src/freedreno/ir3/ir3_nir_lower_64b.c

index 05a2c9d..e02a6df 100644 (file)
--- a/src/freedreno/ir3/ir3_nir_lower_64b.c
+++ b/src/freedreno/ir3/ir3_nir_lower_64b.c
@@ -214,3 +214,71 @@ ir3_nir_lower_64b_undef(nir_shader *shader)
           shader, lower_64b_undef_filter,
           lower_64b_undef, NULL);
  }
+
+/*
+ * Lowering for load_global/store_global with 64b addresses to ir3
+ * variants, which instead take a uvec2_32
+ */
+
+static bool
+lower_64b_global_filter(const nir_instr *instr, const void *unused)
+{
+   (void)unused;
+
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   return (intr->intrinsic == nir_intrinsic_load_global) ||
+          (intr->intrinsic == nir_intrinsic_load_global_constant) ||
+          (intr->intrinsic == nir_intrinsic_store_global);
+}
+
+static nir_ssa_def *
+lower_64b_global(nir_builder *b, nir_instr *instr, void *unused)
+{
+   (void)unused;
+
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   bool load = intr->intrinsic != nir_intrinsic_store_global;
+
+   nir_ssa_def *addr64 = nir_ssa_for_src(b, intr->src[load ? 0 : 1], 1);
+   nir_ssa_def *addr = nir_unpack_64_2x32(b, addr64);
+
+   /*
+    * Note that we can get vec8/vec16 with OpenCL.. we need to split
+    * those up into max 4 components per load/store.
+    */
+
+   if (load) {
+      unsigned num_comp = nir_intrinsic_dest_components(intr);
+      nir_ssa_def *components[num_comp];
+      for (unsigned off = 0; off < num_comp;) {
+         unsigned c = MIN2(num_comp - off, 4);
+         nir_ssa_def *val = nir_build_load_global_ir3(
+               b, c, nir_dest_bit_size(intr->dest),
+               addr, nir_imm_int(b, off));
+         for (unsigned i = 0; i < c; i++) {
+            components[off++] = nir_channel(b, val, i);
+         }
+      }
+      return nir_build_alu_src_arr(b, nir_op_vec(num_comp), components);
+   } else {
+      unsigned num_comp = nir_intrinsic_src_components(intr, 0);
+      nir_ssa_def *value = nir_ssa_for_src(b, intr->src[0], num_comp);
+      for (unsigned off = 0; off < num_comp; off += 4) {
+         unsigned c = MIN2(num_comp - off, 4);
+         nir_ssa_def *v = nir_channels(b, value, BITFIELD_MASK(c) << off);
+         nir_build_store_global_ir3(b, v, addr, nir_imm_int(b, off));
+      }
+      return NIR_LOWER_INSTR_PROGRESS_REPLACE;
+   }
+}
+
+bool
+ir3_nir_lower_64b_global(nir_shader *shader)
+{
+   return nir_shader_lower_instructions(
+         shader, lower_64b_global_filter,
+         lower_64b_global, NULL);
+}
author	Rob Clark <robdclark@chromium.org>
	Sun, 22 Aug 2021 16:53:56 +0000 (09:53 -0700)
committer	Marge Bot <eric+marge@anholt.net>
	Thu, 21 Oct 2021 18:59:57 +0000 (18:59 +0000)
src/freedreno/ir3/ir3_nir.c		patch \| blob \| history
src/freedreno/ir3/ir3_nir.h		patch \| blob \| history
src/freedreno/ir3/ir3_nir_lower_64b.c		patch \| blob \| history