ir3, tu: Cleanup indirect i/o lowering

author Connor Abbott <cwabbott0@gmail.com>

Thu, 22 Oct 2020 12:58:01 +0000 (14:58 +0200)

committer Marge Bot <eric+marge@anholt.net>

Mon, 26 Apr 2021 17:07:02 +0000 (17:07 +0000)
author Connor Abbott <cwabbott0@gmail.com>
Thu, 22 Oct 2020 12:58:01 +0000 (14:58 +0200)
committer Marge Bot <eric+marge@anholt.net>
Mon, 26 Apr 2021 17:07:02 +0000 (17:07 +0000)
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c

index 284ec29..eb449bb 100644 (file)
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -53,7 +53,6 @@ static const nir_shader_compiler_options options = {
                 .vertex_id_zero_based = true,
                 .lower_extract_byte = true,
                 .lower_extract_word = true,
-               .lower_all_io_to_elements = true,
                 .lower_helper_invocation = true,
                 .lower_bitfield_insert_to_shifts = true,
                 .lower_bitfield_extract_to_shifts = true,
@@ -107,7 +106,6 @@ static const nir_shader_compiler_options options_a6xx = {
                 .vertex_id_zero_based = false,
                 .lower_extract_byte = true,
                 .lower_extract_word = true,
-               .lower_all_io_to_elements = true,
                 .lower_helper_invocation = true,
                 .lower_bitfield_insert_to_shifts = true,
                 .lower_bitfield_extract_to_shifts = true,
@@ -279,6 +277,44 @@ should_split_wrmask(const nir_instr *instr, const void *data)
  }
  
  void
+ir3_nir_lower_io_to_temporaries(nir_shader *s)
+{
+       /* Outputs consumed by the VPC, VS inputs, and FS outputs are all handled
+        * by the hardware pre-loading registers at the beginning and then reading
+        * them at the end, so we can't access them indirectly except through
+        * normal register-indirect accesses, and therefore ir3 doesn't support
+        * indirect accesses on those. Other i/o is lowered in ir3_nir_lower_tess,
+        * and indirects work just fine for those. GS outputs may be consumed by
+        * VPC, but have their own lowering in ir3_nir_lower_gs() which does
+        * something similar to nir_lower_io_to_temporaries so we shouldn't need
+        * to lower them.
+        *
+        * Note: this might be a little inefficient for VS or TES outputs which are
+        * when the next stage isn't an FS, but it probably don't make sense to
+        * depend on the next stage before variant creation.
+        *
+        * TODO: for gallium, mesa/st also does some redundant lowering, including
+        * running this pass for GS inputs/outputs which we don't want but not
+        * including TES outputs or FS inputs which we do need. We should probably
+        * stop doing that once we're sure all drivers are doing their own
+        * indirect i/o lowering.
+        */
+       bool lower_input = s->info.stage == MESA_SHADER_VERTEX || s->info.stage == MESA_SHADER_FRAGMENT;
+       bool lower_output = s->info.stage != MESA_SHADER_TESS_CTRL && s->info.stage != MESA_SHADER_GEOMETRY;
+       if (lower_input || lower_output) {
+               NIR_PASS_V(s, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(s),
+                                 lower_output, lower_input);
+
+               /* nir_lower_io_to_temporaries() creates global variables and copy
+                * instructions which need to be cleaned up.
+                */
+               NIR_PASS_V(s, nir_split_var_copies);
+               NIR_PASS_V(s, nir_lower_var_copies);
+               NIR_PASS_V(s, nir_lower_global_vars_to_local);
+       }
+}
+
+void
  ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s)
  {
         struct nir_lower_tex_options tex_options = {
@@ -303,8 +339,6 @@ ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s)
         if (s->info.stage == MESA_SHADER_GEOMETRY)
                 NIR_PASS_V(s, ir3_nir_lower_gs);
  
-       NIR_PASS_V(s, nir_lower_io_arrays_to_elements_no_indirects, false);
-
         NIR_PASS_V(s, nir_lower_amul, ir3_glsl_type_size);
  
         OPT_V(s, nir_lower_regs_to_ssa);
diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h

index 76eef3b..03e847e 100644 (file)
--- a/src/freedreno/ir3/ir3_nir.h
+++ b/src/freedreno/ir3/ir3_nir.h
@@ -53,6 +53,7 @@ void ir3_nir_lower_gs(nir_shader *shader);
  
  const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler);
  void ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s);
+void ir3_nir_lower_io_to_temporaries(nir_shader *s);
  void ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s);
  void ir3_nir_post_finalize(struct ir3_compiler *compiler, nir_shader *s);
  void ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s);
diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c

index ef9def7..80414c0 100644 (file)
--- a/src/freedreno/vulkan/tu_shader.c
+++ b/src/freedreno/vulkan/tu_shader.c
@@ -168,8 +168,6 @@ tu_spirv_to_nir(struct tu_device *dev,
  
     NIR_PASS_V(nir, nir_propagate_invariant);
  
-   NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true);
-
     NIR_PASS_V(nir, nir_lower_global_vars_to_local);
     NIR_PASS_V(nir, nir_split_var_copies);
     NIR_PASS_V(nir, nir_lower_var_copies);
@@ -177,16 +175,6 @@ tu_spirv_to_nir(struct tu_device *dev,
     NIR_PASS_V(nir, nir_opt_copy_prop_vars);
     NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all);
  
-   /* ir3 doesn't support indirect input/output */
-   /* TODO: We shouldn't perform this lowering pass on gl_TessLevelInner
-    * and gl_TessLevelOuter. Since the tess levels are actually stored in
-    * a global BO, they can be directly accessed via stg and ldg.
-    * nir_lower_indirect_derefs will instead generate a big if-ladder which
-    * isn't *incorrect* but is much less efficient. */
-   NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out, UINT32_MAX);
-
-   NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
-
     NIR_PASS_V(nir, nir_lower_is_helper_invocation);
  
     NIR_PASS_V(nir, nir_lower_system_values);
@@ -825,6 +813,13 @@ tu_shader_create(struct tu_device *dev,
                   });
     }
  
+   /* This needs to happen before multiview lowering which rewrites store
+    * instructions of the position variable, so that we can just rewrite one
+    * store at the end instead of having to rewrite every store specified by
+    * the user.
+    */
+   ir3_nir_lower_io_to_temporaries(nir);
+
     if (nir->info.stage == MESA_SHADER_VERTEX && multiview_mask) {
        tu_nir_lower_multiview(nir, multiview_mask,
                               &shader->multi_pos_output, dev);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c

index 7b7a5ed..f713e2c 100644 (file)
--- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
@@ -403,6 +403,7 @@ main(int argc, char **argv)
        return -1;
     }
  
+   ir3_nir_lower_io_to_temporaries(nir);
     ir3_finalize_nir(compiler, nir);
     ir3_nir_post_finalize(compiler, nir);
  
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c

index 65e13ef..ef984a0 100644 (file)
--- a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
@@ -466,6 +466,7 @@ ir3_screen_finalize_nir(struct pipe_screen *pscreen, void *nir, bool optimize)
  {
     struct fd_screen *screen = fd_screen(pscreen);
  
+   ir3_nir_lower_io_to_temporaries(nir);
     ir3_finalize_nir(screen->compiler, nir);
  }
author	Connor Abbott <cwabbott0@gmail.com>
	Thu, 22 Oct 2020 12:58:01 +0000 (14:58 +0200)
committer	Marge Bot <eric+marge@anholt.net>
	Mon, 26 Apr 2021 17:07:02 +0000 (17:07 +0000)
src/freedreno/ir3/ir3_nir.c		patch \| blob \| history
src/freedreno/ir3/ir3_nir.h		patch \| blob \| history
src/freedreno/vulkan/tu_shader.c		patch \| blob \| history
src/gallium/drivers/freedreno/ir3/ir3_cmdline.c		patch \| blob \| history
src/gallium/drivers/freedreno/ir3/ir3_gallium.c		patch \| blob \| history