From a9ed4538abf1247b82adce5cfff9f2bde0b76a05 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <tarceri@itsqueeze.com>
Date: Thu, 29 Jul 2021 19:34:26 +1000
Subject: [PATCH] nir: add indirect loop unrolling to compiler options

This is where it should be rather than having to pass it into the
optimisation pass every time.

It also allows us to call the loop analysis pass without having to
duplicate these options which we will do later in this series.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12064>
---
 src/amd/vulkan/radv_shader.c                       |  2 +-
 src/asahi/compiler/agx_compile.c                   |  5 +-
 src/asahi/compiler/agx_compile.h                   |  2 +
 src/broadcom/compiler/nir_to_vir.c                 |  4 +-
 src/broadcom/vulkan/v3dv_pipeline.c                |  1 +
 src/compiler/nir/nir.h                             |  8 +++-
 src/compiler/nir/nir_opt_loop_unroll.c             |  3 +-
 src/freedreno/ir3/ir3_nir.c                        |  3 +-
 src/gallium/auxiliary/nir/nir_to_tgsi.c            |  9 ++--
 src/gallium/auxiliary/nir/tgsi_to_nir.c            |  2 +-
 src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c |  2 +-
 src/gallium/drivers/etnaviv/etnaviv_screen.c       |  1 +
 src/gallium/drivers/freedreno/a2xx/ir2_nir.c       |  3 +-
 src/gallium/drivers/lima/lima_program.c            | 12 ++---
 src/gallium/drivers/radeonsi/si_shader_nir.c       |  2 +-
 src/gallium/drivers/vc4/vc4_program.c              |  6 +--
 src/intel/compiler/brw_compiler.c                  |  6 ++-
 src/intel/compiler/brw_nir.c                       | 55 +---------------------
 src/intel/compiler/brw_shader.h                    | 44 +++++++++++++++++
 src/mesa/state_tracker/st_glsl_to_nir.cpp          |  2 +-
 src/panfrost/bifrost/bifrost_compile.c             |  5 +-
 src/panfrost/bifrost/bifrost_compile.h             |  1 +
 src/panfrost/midgard/midgard_compile.c             |  5 +-
 src/panfrost/midgard/midgard_compile.h             |  1 +
 24 files changed, 90 insertions(+), 94 deletions(-)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index c9f4028..82dade3 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -181,7 +181,7 @@ radv_optimize_nir(const struct radv_device *device, struct nir_shader *shader,
       NIR_PASS(progress, shader, nir_opt_shrink_vectors,
                !device->instance->disable_shrink_image_store);
       if (shader->options->max_unroll_iterations) {
-         NIR_PASS(progress, shader, nir_opt_loop_unroll, 0);
+         NIR_PASS(progress, shader, nir_opt_loop_unroll);
       }
    } while (progress && !optimize_conservatively);
 
diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c
index e0458ba..1066ed4 100644
--- a/src/asahi/compiler/agx_compile.c
+++ b/src/asahi/compiler/agx_compile.c
@@ -1235,10 +1235,7 @@ agx_optimize_nir(nir_shader *nir)
       NIR_PASS(progress, nir, nir_opt_undef);
       NIR_PASS(progress, nir, nir_lower_undef_to_zero);
 
-      NIR_PASS(progress, nir, nir_opt_loop_unroll,
-               nir_var_shader_in |
-               nir_var_shader_out |
-               nir_var_function_temp);
+      NIR_PASS(progress, nir, nir_opt_loop_unroll);
    } while (progress);
 
    NIR_PASS_V(nir, nir_opt_algebraic_late);
diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h
index 6e8b14a..b874ee1 100644
--- a/src/asahi/compiler/agx_compile.h
+++ b/src/asahi/compiler/agx_compile.h
@@ -213,6 +213,8 @@ static const nir_shader_compiler_options agx_nir_options = {
    .lower_doubles_options = nir_lower_dmod,
    .lower_int64_options = ~(nir_lower_iadd64 | nir_lower_imul_2x32_64),
 
+   .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
+
    .has_fsub = true,
    .has_isub = true,
    .has_cs_global_id = true,
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index 190d7c3..b0cc497 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -1877,9 +1877,7 @@ v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s)
                 if (c && !c->disable_loop_unrolling &&
                     s->options->max_unroll_iterations > 0) {
                        bool local_progress = false;
-                       NIR_PASS(local_progress, s, nir_opt_loop_unroll,
-                                nir_var_shader_in |
-                                nir_var_function_temp);
+                       NIR_PASS(local_progress, s, nir_opt_loop_unroll);
                        c->unrolled_any_loops |= local_progress;
                        progress |= local_progress;
                 }
diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c
index 4447e4d..4e692bbf 100644
--- a/src/broadcom/vulkan/v3dv_pipeline.c
+++ b/src/broadcom/vulkan/v3dv_pipeline.c
@@ -248,6 +248,7 @@ const nir_shader_compiler_options v3dv_nir_options = {
                                    * needs to be supported */
    .lower_interpolate_at = true,
    .max_unroll_iterations = 16,
+   .force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp),
    .divergence_analysis_options =
       nir_divergence_multiple_workgroup_per_compute_subgroup
 };
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 63bdc6f..76ca1f9 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3690,6 +3690,12 @@ typedef struct nir_shader_compiler_options {
     * vectorized IO can pack more varyings when linking. */
    bool linker_ignore_precision;
 
+   /**
+    * Specifies which type of indirectly accessed variables should force
+    * loop unrolling.
+    */
+   nir_variable_mode force_indirect_unrolling;
+
    nir_lower_int64_options lower_int64_options;
    nir_lower_doubles_options lower_doubles_options;
    nir_divergence_options divergence_analysis_options;
@@ -5436,7 +5442,7 @@ bool nir_opt_large_constants(nir_shader *shader,
                              glsl_type_size_align_func size_align,
                              unsigned threshold);
 
-bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask);
+bool nir_opt_loop_unroll(nir_shader *shader);
 
 typedef enum {
     nir_move_const_undef = (1 << 0),
diff --git a/src/compiler/nir/nir_opt_loop_unroll.c b/src/compiler/nir/nir_opt_loop_unroll.c
index c4c7674..3dec5e1 100644
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -1058,10 +1058,11 @@ nir_opt_loop_unroll_impl(nir_function_impl *impl,
  * should force loop unrolling.
  */
 bool
-nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask)
+nir_opt_loop_unroll(nir_shader *shader)
 {
    bool progress = false;
 
+   nir_variable_mode indirect_mask = shader->options->force_indirect_unrolling;
    nir_foreach_function(function, shader) {
       if (function->impl) {
          progress |= nir_opt_loop_unroll_impl(function->impl, indirect_mask);
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index 2b65167..c0c9a7b 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -132,6 +132,7 @@ static const nir_shader_compiler_options options_a6xx = {
    .has_fsub = true,
    .has_isub = true,
    .max_unroll_iterations = 32,
+   .force_indirect_unrolling = nir_var_all,
    .lower_wpos_pntc = true,
    .lower_cs_local_index_from_id = true,
 
@@ -269,7 +270,7 @@ ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s)
          OPT(s, nir_opt_dce);
       }
       progress |= OPT(s, nir_opt_if, false);
-      progress |= OPT(s, nir_opt_loop_unroll, nir_var_all);
+      progress |= OPT(s, nir_opt_loop_unroll);
       progress |= OPT(s, nir_opt_remove_phis);
       progress |= OPT(s, nir_opt_undef);
    } while (progress);
diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c
index c8b0b9c..135793d 100644
--- a/src/gallium/auxiliary/nir/nir_to_tgsi.c
+++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c
@@ -2458,7 +2458,6 @@ static void
 ntt_optimize_nir(struct nir_shader *s, struct pipe_screen *screen)
 {
    bool progress;
-   nir_variable_mode no_indirects_mask = ntt_no_indirects_mask(s, screen);
    unsigned pipe_stage = pipe_shader_type_from_mesa(s->info.stage);
    unsigned control_flow_depth =
       screen->get_shader_param(screen, pipe_stage,
@@ -2492,7 +2491,7 @@ ntt_optimize_nir(struct nir_shader *s, struct pipe_screen *screen)
       NIR_PASS(progress, s, nir_opt_trivial_continues);
       NIR_PASS(progress, s, nir_opt_vectorize, ntt_should_vectorize_instr, NULL);
       NIR_PASS(progress, s, nir_opt_undef);
-      NIR_PASS(progress, s, nir_opt_loop_unroll, no_indirects_mask);
+      NIR_PASS(progress, s, nir_opt_loop_unroll);
 
    } while (progress);
 }
@@ -2802,6 +2801,8 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s)
       !screen->get_shader_param(screen, pipe_shader_type_from_mesa(s->info.stage),
                                 PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);
 
+   nir_variable_mode no_indirects_mask = ntt_no_indirects_mask(s, screen);
+
    if (!options->lower_extract_byte ||
        !options->lower_extract_word ||
        !options->lower_insert_byte ||
@@ -2812,7 +2813,8 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s)
        !options->lower_rotate ||
        !options->lower_uniforms_to_ubo ||
        !options->lower_vector_cmp ||
-       options->lower_fsqrt != lower_fsqrt) {
+       options->lower_fsqrt != lower_fsqrt ||
+       options->force_indirect_unrolling != no_indirects_mask) {
       nir_shader_compiler_options *new_options = ralloc(s, nir_shader_compiler_options);
       *new_options = *s->options;
 
@@ -2827,6 +2829,7 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s)
       new_options->lower_uniforms_to_ubo = true,
       new_options->lower_vector_cmp = true;
       new_options->lower_fsqrt = lower_fsqrt;
+      new_options->force_indirect_unrolling = no_indirects_mask;
 
       s->options = new_options;
    }
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index cf9a49b..29f110e 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -2444,7 +2444,7 @@ ttn_optimize_nir(nir_shader *nir)
       NIR_PASS(progress, nir, nir_opt_conditional_discard);
 
       if (nir->options->max_unroll_iterations) {
-         NIR_PASS(progress, nir, nir_opt_loop_unroll, (nir_variable_mode)0);
+         NIR_PASS(progress, nir, nir_opt_loop_unroll);
       }
 
    } while (progress);
diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
index 6d28fee..b1ca2e0 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
@@ -164,7 +164,7 @@ etna_optimize_loop(nir_shader *s)
          OPT(s, nir_copy_prop);
          OPT(s, nir_opt_dce);
       }
-      progress |= OPT(s, nir_opt_loop_unroll, nir_var_all);
+      progress |= OPT(s, nir_opt_loop_unroll);
       progress |= OPT(s, nir_opt_if, false);
       progress |= OPT(s, nir_opt_remove_phis);
       progress |= OPT(s, nir_opt_undef);
diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c b/src/gallium/drivers/etnaviv/etnaviv_screen.c
index 82a0e15..45fede5 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -1076,6 +1076,7 @@ etna_screen_create(struct etna_device *dev, struct etna_gpu *gpu,
       .lower_fsqrt = !screen->specs.has_sin_cos_sqrt,
       .lower_sincos = !screen->specs.has_sin_cos_sqrt,
       .lower_uniforms_to_ubo = screen->specs.halti >= 2,
+      .force_indirect_unrolling = nir_var_all,
    };
 
    /* apply debug options that disable individual features */
diff --git a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
index 46c7ad7..7433127 100644
--- a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
+++ b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
@@ -49,6 +49,7 @@ static const nir_shader_compiler_options options = {
    .has_isub = true,
    .lower_insert_byte = true,
    .lower_insert_word = true,
+   .force_indirect_unrolling = nir_var_all,
 };
 
 const nir_shader_compiler_options *
@@ -92,7 +93,7 @@ ir2_optimize_loop(nir_shader *s)
          OPT(s, nir_copy_prop);
          OPT(s, nir_opt_dce);
       }
-      progress |= OPT(s, nir_opt_loop_unroll, nir_var_all);
+      progress |= OPT(s, nir_opt_loop_unroll);
       progress |= OPT(s, nir_opt_if, false);
       progress |= OPT(s, nir_opt_remove_phis);
       progress |= OPT(s, nir_opt_undef);
diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c
index 562586b..e43fa9e 100644
--- a/src/gallium/drivers/lima/lima_program.c
+++ b/src/gallium/drivers/lima/lima_program.c
@@ -61,6 +61,7 @@ static const nir_shader_compiler_options vs_nir_options = {
    .lower_fceil = true,
    .lower_insert_byte = true,
    .lower_insert_word = true,
+   .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
 };
 
 static const nir_shader_compiler_options fs_nir_options = {
@@ -80,6 +81,7 @@ static const nir_shader_compiler_options fs_nir_options = {
    .lower_insert_word = true,
    .lower_bitops = true,
    .lower_vector_cmp = true,
+   .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
 };
 
 const void *
@@ -131,10 +133,7 @@ lima_program_optimize_vs_nir(struct nir_shader *s)
       NIR_PASS(progress, s, lima_nir_lower_ftrunc);
       NIR_PASS(progress, s, nir_opt_constant_folding);
       NIR_PASS(progress, s, nir_opt_undef);
-      NIR_PASS(progress, s, nir_opt_loop_unroll,
-               nir_var_shader_in |
-               nir_var_shader_out |
-               nir_var_function_temp);
+      NIR_PASS(progress, s, nir_opt_loop_unroll);
    } while (progress);
 
    NIR_PASS_V(s, nir_lower_int_to_float);
@@ -236,10 +235,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s,
       NIR_PASS(progress, s, nir_opt_algebraic);
       NIR_PASS(progress, s, nir_opt_constant_folding);
       NIR_PASS(progress, s, nir_opt_undef);
-      NIR_PASS(progress, s, nir_opt_loop_unroll,
-               nir_var_shader_in |
-               nir_var_shader_out |
-               nir_var_function_temp);
+      NIR_PASS(progress, s, nir_opt_loop_unroll);
       NIR_PASS(progress, s, lima_nir_split_load_input);
    } while (progress);
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 5573cc7..044c1ff 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -596,7 +596,7 @@ void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool first)
       NIR_PASS(progress, nir, nir_opt_undef);
       NIR_PASS(progress, nir, nir_opt_conditional_discard);
       if (nir->options->max_unroll_iterations) {
-         NIR_PASS(progress, nir, nir_opt_loop_unroll, 0);
+         NIR_PASS(progress, nir, nir_opt_loop_unroll);
       }
 
       if (nir->info.stage == MESA_SHADER_FRAGMENT)
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 0c4ff75..782127f 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1548,10 +1548,7 @@ vc4_optimize_nir(struct nir_shader *s)
                 }
 
                 NIR_PASS(progress, s, nir_opt_undef);
-                NIR_PASS(progress, s, nir_opt_loop_unroll,
-                         nir_var_shader_in |
-                         nir_var_shader_out |
-                         nir_var_function_temp);
+                NIR_PASS(progress, s, nir_opt_loop_unroll);
         } while (progress);
 }
 
@@ -2195,6 +2192,7 @@ static const nir_shader_compiler_options nir_options = {
         .has_fsub = true,
         .has_isub = true,
         .max_unroll_iterations = 32,
+        .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
 };
 
 const void *
diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c
index c1c4f86..4336ff7 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -68,7 +68,8 @@
    .lower_usub_sat64 = true,                                                  \
    .lower_hadd64 = true,                                                      \
    .lower_bfe_with_two_constants = true,                                      \
-   .max_unroll_iterations = 32
+   .max_unroll_iterations = 32,                                               \
+   .force_indirect_unrolling = nir_var_function_temp
 
 static const struct nir_shader_compiler_options scalar_nir_options = {
    COMMON_OPTIONS,
@@ -196,6 +197,9 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
 
       nir_options->unify_interfaces = i < MESA_SHADER_FRAGMENT;
 
+      nir_options->force_indirect_unrolling |=
+         brw_nir_no_indirect_mask(compiler, i);
+
       compiler->glsl_compiler_options[i].NirOptions = nir_options;
 
       compiler->glsl_compiler_options[i].ClampBlockIndicesToArrayBounds = true;
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index dc09dcb..a50a76e 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -520,63 +520,10 @@ brw_nir_lower_fs_outputs(nir_shader *nir)
    this_progress;                                          \
 })
 
-static nir_variable_mode
-brw_nir_no_indirect_mask(const struct brw_compiler *compiler,
-                         gl_shader_stage stage)
-{
-   const struct intel_device_info *devinfo = compiler->devinfo;
-   const bool is_scalar = compiler->scalar_stage[stage];
-   nir_variable_mode indirect_mask = 0;
-
-   switch (stage) {
-   case MESA_SHADER_VERTEX:
-   case MESA_SHADER_FRAGMENT:
-      indirect_mask |= nir_var_shader_in;
-      break;
-
-   case MESA_SHADER_GEOMETRY:
-      if (!is_scalar)
-         indirect_mask |= nir_var_shader_in;
-      break;
-
-   default:
-      /* Everything else can handle indirect inputs */
-      break;
-   }
-
-   if (is_scalar && stage != MESA_SHADER_TESS_CTRL)
-      indirect_mask |= nir_var_shader_out;
-
-   /* On HSW+, we allow indirects in scalar shaders.  They get implemented
-    * using nir_lower_vars_to_explicit_types and nir_lower_explicit_io in
-    * brw_postprocess_nir.
-    *
-    * We haven't plumbed through the indirect scratch messages on gfx6 or
-    * earlier so doing indirects via scratch doesn't work there. On gfx7 and
-    * earlier the scratch space size is limited to 12kB.  If we allowed
-    * indirects as scratch all the time, we may easily exceed this limit
-    * without having any fallback.
-    */
-   if (is_scalar && devinfo->verx10 <= 70)
-      indirect_mask |= nir_var_function_temp;
-
-   return indirect_mask;
-}
-
 void
 brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
                  bool is_scalar, bool allow_copies)
 {
-   nir_variable_mode loop_indirect_mask =
-      brw_nir_no_indirect_mask(compiler, nir->info.stage);
-
-   /* We can handle indirects via scratch messages.  However, they are
-    * expensive so we'd rather not if we can avoid it.  Have loop unrolling
-    * try to get rid of them.
-    */
-   if (is_scalar)
-      loop_indirect_mask |= nir_var_function_temp;
-
    bool progress;
    unsigned lower_flrp =
       (nir->options->lower_flrp16 ? 16 : 0) |
@@ -671,7 +618,7 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
       OPT(nir_opt_if, false);
       OPT(nir_opt_conditional_discard);
       if (nir->options->max_unroll_iterations != 0) {
-         OPT(nir_opt_loop_unroll, loop_indirect_mask);
+         OPT(nir_opt_loop_unroll);
       }
       OPT(nir_opt_remove_phis);
       OPT(nir_opt_gcm, false);
diff --git a/src/intel/compiler/brw_shader.h b/src/intel/compiler/brw_shader.h
index 20e9281..8d0c9c6 100644
--- a/src/intel/compiler/brw_shader.h
+++ b/src/intel/compiler/brw_shader.h
@@ -127,6 +127,50 @@ brw_get_scratch_size(int size)
    return MAX2(1024, util_next_power_of_two(size));
 }
 
+
+static inline nir_variable_mode
+brw_nir_no_indirect_mask(const struct brw_compiler *compiler,
+                         gl_shader_stage stage)
+{
+   const struct intel_device_info *devinfo = compiler->devinfo;
+   const bool is_scalar = compiler->scalar_stage[stage];
+   nir_variable_mode indirect_mask = (nir_variable_mode) 0;
+
+   switch (stage) {
+   case MESA_SHADER_VERTEX:
+   case MESA_SHADER_FRAGMENT:
+      indirect_mask |= nir_var_shader_in;
+      break;
+
+   case MESA_SHADER_GEOMETRY:
+      if (!is_scalar)
+         indirect_mask |= nir_var_shader_in;
+      break;
+
+   default:
+      /* Everything else can handle indirect inputs */
+      break;
+   }
+
+   if (is_scalar && stage != MESA_SHADER_TESS_CTRL)
+      indirect_mask |= nir_var_shader_out;
+
+   /* On HSW+, we allow indirects in scalar shaders.  They get implemented
+    * using nir_lower_vars_to_explicit_types and nir_lower_explicit_io in
+    * brw_postprocess_nir.
+    *
+    * We haven't plumbed through the indirect scratch messages on gfx6 or
+    * earlier so doing indirects via scratch doesn't work there. On gfx7 and
+    * earlier the scratch space size is limited to 12kB.  If we allowed
+    * indirects as scratch all the time, we may easily exceed this limit
+    * without having any fallback.
+    */
+   if (is_scalar && devinfo->verx10 <= 70)
+      indirect_mask |= nir_var_function_temp;
+
+   return indirect_mask;
+}
+
 bool brw_texture_offset(const nir_tex_instr *tex, unsigned src,
                         uint32_t *offset_bits);
 
diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index 182644b..caeb195 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -324,7 +324,7 @@ st_nir_opts(nir_shader *nir)
       NIR_PASS(progress, nir, nir_opt_undef);
       NIR_PASS(progress, nir, nir_opt_conditional_discard);
       if (nir->options->max_unroll_iterations) {
-         NIR_PASS(progress, nir, nir_opt_loop_unroll, (nir_variable_mode)0);
+         NIR_PASS(progress, nir, nir_opt_loop_unroll);
       }
    } while (progress);
 }
diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c
index afacef2..a8df910 100644
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -3306,10 +3306,7 @@ bi_optimize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend)
                 NIR_PASS(progress, nir, nir_opt_undef);
                 NIR_PASS(progress, nir, nir_lower_undef_to_zero);
 
-                NIR_PASS(progress, nir, nir_opt_loop_unroll,
-                         nir_var_shader_in |
-                         nir_var_shader_out |
-                         nir_var_function_temp);
+                NIR_PASS(progress, nir, nir_opt_loop_unroll);
         } while (progress);
 
         /* TODO: Why is 64-bit getting rematerialized?
diff --git a/src/panfrost/bifrost/bifrost_compile.h b/src/panfrost/bifrost/bifrost_compile.h
index 45bfd52..9b320d7 100644
--- a/src/panfrost/bifrost/bifrost_compile.h
+++ b/src/panfrost/bifrost/bifrost_compile.h
@@ -93,6 +93,7 @@ static const nir_shader_compiler_options bifrost_nir_options = {
         .vertex_id_zero_based = true,
         .lower_cs_local_index_from_id = true,
         .max_unroll_iterations = 32,
+        .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
 };
 
 #endif
diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c
index 183e299..609db50 100644
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -369,10 +369,7 @@ optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend)
                 NIR_PASS(progress, nir, nir_opt_undef);
                 NIR_PASS(progress, nir, nir_lower_undef_to_zero);
 
-                NIR_PASS(progress, nir, nir_opt_loop_unroll,
-                         nir_var_shader_in |
-                         nir_var_shader_out |
-                         nir_var_function_temp);
+                NIR_PASS(progress, nir, nir_opt_loop_unroll);
 
                 NIR_PASS(progress, nir, nir_opt_vectorize,
                          midgard_vectorize_filter, NULL);
diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h
index d3bce99..65c9bec 100644
--- a/src/panfrost/midgard/midgard_compile.h
+++ b/src/panfrost/midgard/midgard_compile.h
@@ -97,6 +97,7 @@ static const nir_shader_compiler_options midgard_nir_options = {
         .has_cs_global_id = true,
         .lower_cs_local_index_from_id = true,
         .max_unroll_iterations = 32,
+        .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
 };
 
 #endif
-- 
2.7.4