nir/lower_locals_to_regs: Add bool bitsize knob
authorAlyssa Rosenzweig <alyssa@rosenzweig.io>
Thu, 22 Jun 2023 20:12:40 +0000 (16:12 -0400)
committerAlyssa Rosenzweig <alyssa@rosenzweig.io>
Mon, 26 Jun 2023 12:22:06 +0000 (08:22 -0400)
GLSL booleans (and hence bool derefs) may be translated either as 1-bit or
32-bit NIR registers, depending whether the backend uses nir_lower_bool_to_int32
or not. Add a knob for this and choose the right type for different backends.

Fixes nir_validate failure on
dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_bvec3 run under
lavapipe. That test indexes into a bvec3 array, and gallivm first lowers bools
and then lowers derefs to registers, resulting in random 1-bit booleans mixed in
with 32-bit bools.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23804>

src/compiler/nir/nir.h
src/compiler/nir/nir_lower_locals_to_regs.c
src/freedreno/ir3/ir3_context.c
src/gallium/auxiliary/gallivm/lp_bld_nir.c
src/gallium/auxiliary/nir/nir_to_tgsi.c
src/gallium/drivers/r600/sfn/sfn_nir.cpp
src/intel/compiler/brw_nir.c

index 57b8446..b1b52f4 100644 (file)
@@ -4855,7 +4855,7 @@ bool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes,
 bool nir_lower_indirect_var_derefs(nir_shader *shader,
                                    const struct set *vars);
 
-bool nir_lower_locals_to_regs(nir_shader *shader);
+bool nir_lower_locals_to_regs(nir_shader *shader, uint8_t bool_bitsize);
 
 void nir_lower_io_to_temporaries(nir_shader *shader,
                                  nir_function_impl *entrypoint,
index 0982e44..80e7a12 100644 (file)
@@ -30,6 +30,9 @@ struct locals_to_regs_state {
    /* A hash table mapping derefs to registers */
    struct hash_table *regs_table;
 
+   /* Bit size to use for boolean registers */
+   uint8_t bool_bitsize;
+
    bool progress;
 };
 
@@ -118,6 +121,9 @@ get_reg_for_deref(nir_deref_instr *deref, struct locals_to_regs_state *state)
    reg->num_array_elems = array_size > 1 ? array_size : 0;
    reg->bit_size = glsl_get_bit_size(deref->type);
 
+   if (reg->bit_size == 1)
+      reg->bit_size = state->bool_bitsize;
+
    _mesa_hash_table_insert_pre_hashed(state->regs_table, hash, deref, reg);
 
    return reg;
@@ -288,13 +294,14 @@ lower_locals_to_regs_block(nir_block *block,
 }
 
 static bool
-nir_lower_locals_to_regs_impl(nir_function_impl *impl)
+nir_lower_locals_to_regs_impl(nir_function_impl *impl, uint8_t bool_bitsize)
 {
    struct locals_to_regs_state state;
 
    nir_builder_init(&state.builder, impl);
    state.progress = false;
    state.regs_table = _mesa_hash_table_create(NULL, hash_deref, derefs_equal);
+   state.bool_bitsize = bool_bitsize;
 
    nir_metadata_require(impl, nir_metadata_dominance);
 
@@ -311,13 +318,16 @@ nir_lower_locals_to_regs_impl(nir_function_impl *impl)
 }
 
 bool
-nir_lower_locals_to_regs(nir_shader *shader)
+nir_lower_locals_to_regs(nir_shader *shader, uint8_t bool_bitsize)
 {
    bool progress = false;
 
    nir_foreach_function(function, shader) {
-      if (function->impl)
-         progress = nir_lower_locals_to_regs_impl(function->impl) || progress;
+      if (function->impl) {
+         progress =
+            nir_lower_locals_to_regs_impl(function->impl, bool_bitsize) ||
+            progress;
+      }
    }
 
    return progress;
index fcf82a4..e58bedd 100644 (file)
@@ -87,7 +87,7 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader,
     */
    bool progress = false;
    bool needs_late_alg = false;
-   NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs);
+   NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs, 1);
 
    /* we could need cleanup after lower_locals_to_regs */
    while (progress) {
index 2239b0b..2b1a937 100644 (file)
@@ -2795,7 +2795,7 @@ bool lp_build_nir_llvm(struct lp_build_nir_context *bld_base,
    struct nir_function *func;
 
    nir_convert_from_ssa(nir, true);
-   nir_lower_locals_to_regs(nir);
+   nir_lower_locals_to_regs(nir, 32);
    nir_remove_dead_derefs(nir);
    nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
 
index 947ed28..69cd95b 100644 (file)
@@ -3871,7 +3871,7 @@ const void *nir_to_tgsi_options(struct nir_shader *s,
    NIR_PASS_V(s, nir_lower_vec_to_movs, ntt_vec_to_mov_writemask_cb, NULL);
 
    /* locals_to_regs will leave dead derefs that are good to clean up. */
-   NIR_PASS_V(s, nir_lower_locals_to_regs);
+   NIR_PASS_V(s, nir_lower_locals_to_regs, 32);
    NIR_PASS_V(s, nir_opt_dce);
 
    if (NIR_DEBUG(TGSI)) {
index 0e460de..bd8ed55 100644 (file)
@@ -939,7 +939,7 @@ r600_shader_from_nir(struct r600_context *rctx,
 
    NIR_PASS_V(sh, nir_lower_bool_to_int32);
 
-   NIR_PASS_V(sh, nir_lower_locals_to_regs);
+   NIR_PASS_V(sh, nir_lower_locals_to_regs, 32);
    NIR_PASS_V(sh, nir_convert_from_ssa, true);
    NIR_PASS_V(sh, nir_opt_dce);
 
index 920bd85..a711dcb 100644 (file)
@@ -1701,7 +1701,7 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
    OPT(nir_copy_prop);
    OPT(nir_opt_dce);
 
-   OPT(nir_lower_locals_to_regs);
+   OPT(nir_lower_locals_to_regs, 32);
 
    if (unlikely(debug_enabled)) {
       /* Re-index SSA defs so we print more sensible numbers. */