From a7114f3f46fc6e54ee0458b39e45619f9e277c6b Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Tue, 1 Sep 2020 17:39:35 +0100 Subject: [PATCH] nir/opt_uniform_atomics: don't optimize atomics twice MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Applications sometimes already do this optimization themselves. fossil-db (Navi): Totals from 51 (0.04% of 135946) affected shaders: CodeSize: 507484 -> 501860 (-1.11%) Instrs: 99635 -> 98471 (-1.17%) Cycles: 2421944 -> 2414780 (-0.30%) Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/compiler/nir/nir_opt_uniform_atomics.c | 76 ++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/src/compiler/nir/nir_opt_uniform_atomics.c b/src/compiler/nir/nir_opt_uniform_atomics.c index 91abf67..96b3edd 100644 --- a/src/compiler/nir/nir_opt_uniform_atomics.c +++ b/src/compiler/nir/nir_opt_uniform_atomics.c @@ -82,6 +82,79 @@ parse_atomic_op(nir_intrinsic_op op, unsigned *offset_src, unsigned *data_src) } } +/* Returns a bitmask of invocation indices that are compared against a subgroup + * uniform value. + */ +static unsigned +match_invocation_comparison(nir_ssa_scalar scalar) +{ + if (!nir_ssa_scalar_is_alu(scalar)) + return 0; + + if (nir_ssa_scalar_alu_op(scalar) == nir_op_iand) { + return match_invocation_comparison(nir_ssa_scalar_chase_alu_src(scalar, 0)) | + match_invocation_comparison(nir_ssa_scalar_chase_alu_src(scalar, 1)); + } else if (nir_ssa_scalar_alu_op(scalar) == nir_op_ieq) { + unsigned dims = 0; + for (unsigned i = 0; i < 2; i++) { + nir_ssa_scalar src = nir_ssa_scalar_chase_alu_src(scalar, i); + if (src.def->parent_instr->type != nir_instr_type_intrinsic) + continue; + if (nir_ssa_scalar_chase_alu_src(scalar, !i).def->divergent) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(src.def->parent_instr); + if (intrin->intrinsic == nir_intrinsic_load_subgroup_invocation) + dims = 0x8; + else if (intrin->intrinsic == nir_intrinsic_load_local_invocation_index) + dims = 0x7; + else if (intrin->intrinsic == nir_intrinsic_load_local_invocation_id) + dims = 1 << src.comp; + else if (intrin->intrinsic == nir_intrinsic_load_global_invocation_index) + dims = 0x7; + else if (intrin->intrinsic == nir_intrinsic_load_global_invocation_id) + dims = 1 << src.comp; + } + + return dims; + } else if (scalar.def->parent_instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(scalar.def->parent_instr); + if (intrin->intrinsic == nir_intrinsic_elect) + return 0x8; + return 0; + } else { + return 0; + } +} + +/* Returns true if the intrinsic is already conditional so that at most one + * invocation in the subgroup does the atomic. + */ +static bool +is_atomic_already_optimized(nir_shader *shader, nir_intrinsic_instr *instr) +{ + unsigned dims = 0; + for (nir_cf_node *cf = &instr->instr.block->cf_node; cf; cf = cf->parent) { + if (cf->type == nir_cf_node_if) { + nir_block *first_then = nir_if_first_then_block(nir_cf_node_as_if(cf)); + nir_block *last_then = nir_if_last_then_block(nir_cf_node_as_if(cf)); + bool within_then = instr->instr.block->index >= first_then->index; + within_then = within_then && instr->instr.block->index <= last_then->index; + if (!within_then) + continue; + + nir_ssa_scalar cond = {nir_cf_node_as_if(cf)->condition.ssa, 0}; + dims |= match_invocation_comparison(cond); + } + } + + unsigned dims_needed = 0; + for (unsigned i = 0; i < 3; i++) + dims_needed |= (shader->info.cs.local_size[i] > 1) << i; + + return (dims & dims_needed) == dims_needed || dims & 0x8; +} + static nir_ssa_def * emit_scalar_intrinsic(nir_builder *b, nir_intrinsic_op op, unsigned bit_size) { @@ -228,6 +301,9 @@ opt_uniform_atomics(nir_function_impl *impl) if (nir_src_is_divergent(intrin->src[offset_src])) continue; + if (is_atomic_already_optimized(b.shader, intrin)) + continue; + b.cursor = nir_before_instr(instr); optimize_and_rewrite_atomic(&b, intrin); progress = true; -- 2.7.4