From a9ad04f17d9f2c902141d3a362e2993ac9ce3ab8 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Tue, 22 Jun 2021 12:00:55 +0200 Subject: [PATCH] broadcom/compiler: lower nir_intrinsic_load_num_subgroups MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The number of subgroups is the local workgroup size divided by the dispatch width. Reviewed-by: Alejandro Piñeiro Part-of: --- src/broadcom/compiler/nir_to_vir.c | 4 +++ src/broadcom/compiler/vir.c | 69 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index de1cc9e..6f3d2c5 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -3241,6 +3241,10 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) break; } + case nir_intrinsic_load_num_subgroups: + unreachable("Should have been lowered"); + break; + default: fprintf(stderr, "Unknown intrinsic: "); nir_print_instr(&instr->instr, stderr); diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index e1d174c..1b35ed9 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -25,6 +25,7 @@ #include "v3d_compiler.h" #include "util/u_prim.h" #include "compiler/nir/nir_schedule.h" +#include "compiler/nir/nir_builder.h" int vir_get_nsrc(struct qinst *inst) @@ -1351,6 +1352,72 @@ v3d_nir_sort_constant_ubo_loads(nir_shader *s, struct v3d_compile *c) } static void +lower_load_num_subgroups(struct v3d_compile *c, + nir_builder *b, + nir_intrinsic_instr *intr) +{ + assert(c->s->info.stage == MESA_SHADER_COMPUTE); + assert(intr->intrinsic == nir_intrinsic_load_num_subgroups); + + b->cursor = nir_after_instr(&intr->instr); + uint32_t num_subgroups = + DIV_ROUND_UP(c->s->info.workgroup_size[0] * + c->s->info.workgroup_size[1] * + c->s->info.workgroup_size[2], V3D_CHANNELS); + nir_ssa_def *result = nir_imm_int(b, num_subgroups); + nir_ssa_def_rewrite_uses(&intr->dest.ssa, result); + nir_instr_remove(&intr->instr); +} + +static bool +lower_subgroup_intrinsics(struct v3d_compile *c, + nir_block *block, nir_builder *b) +{ + bool progress = false; + nir_foreach_instr_safe(inst, block) { + if (inst->type != nir_instr_type_intrinsic) + continue;; + + nir_intrinsic_instr *intr = + nir_instr_as_intrinsic(inst); + if (!intr) + continue; + + switch (intr->intrinsic) { + case nir_intrinsic_load_num_subgroups: { + lower_load_num_subgroups(c, b, intr); + progress = true; + break; + } + default: + break; + } + } + + return progress; +} + +static bool +v3d_nir_lower_subgroup_intrinsics(nir_shader *s, struct v3d_compile *c) +{ + bool progress = false; + nir_foreach_function(function, s) { + if (function->impl) { + nir_builder b; + nir_builder_init(&b, function->impl); + + nir_foreach_block(block, function->impl) + progress |= lower_subgroup_intrinsics(c, block, &b); + + nir_metadata_preserve(function->impl, + nir_metadata_block_index | + nir_metadata_dominance); + } + } + return progress; +} + +static void v3d_attempt_compile(struct v3d_compile *c) { switch (c->s->info.stage) { @@ -1422,6 +1489,8 @@ v3d_attempt_compile(struct v3d_compile *c) NIR_PASS_V(c->s, nir_lower_wrmasks, should_split_wrmask, c->s); + NIR_PASS_V(c->s, v3d_nir_lower_subgroup_intrinsics, c); + v3d_optimize_nir(c, c->s); /* Do late algebraic optimization to turn add(a, neg(b)) back into -- 2.7.4