From 53341e44ade08e6bcac58b0814ecb3db950da69f Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Tue, 22 Jun 2021 11:04:06 +0200 Subject: [PATCH] broadcom/compiler: implement more subgroup intrinsics MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Alejandro Piñeiro Part-of: --- src/broadcom/compiler/nir_to_vir.c | 51 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 6f3d2c5..ce8948a 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -3241,6 +3241,57 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) break; } + case nir_intrinsic_load_subgroup_size: + ntq_store_dest(c, &instr->dest, 0, + vir_uniform_ui(c, V3D_CHANNELS)); + break; + + case nir_intrinsic_load_subgroup_invocation: + ntq_store_dest(c, &instr->dest, 0, vir_EIDX(c)); + break; + + case nir_intrinsic_elect: { + /* Flafirst reads A flags so we need to make sure it reflects + * currently active lanes. + * + * MSF returns 0 for disabled lanes in compute shaders so + * PUSHZ will set A=1 for disabled lanes. We want the inverse + * of this but we don't have any means to negate the A flags + * directly, but we can do it by repeating the same operation + * with NORZ (A = ~A & ~Z). + */ + assert(c->s->info.stage == MESA_SHADER_COMPUTE); + vir_set_pf(c, vir_MSF_dest(c, vir_nop_reg()), V3D_QPU_PF_PUSHZ); + vir_set_uf(c, vir_MSF_dest(c, vir_nop_reg()), V3D_QPU_UF_NORZ); + + /* If we are under non-uniform control flow we also need to + * AND the A flags with the current execute mask. + */ + if (vir_in_nonuniform_control_flow(c)) { + const uint32_t bidx = c->cur_block->index; + vir_set_uf(c, vir_XOR_dest(c, vir_nop_reg(), + c->execute, + vir_uniform_ui(c, bidx)), + V3D_QPU_UF_ANDZ); + } + + struct qreg first = vir_FLAFIRST(c); + + /* Produce a boolean result from Flafirst */ + vir_set_pf(c, vir_XOR_dest(c, vir_nop_reg(), + first, vir_uniform_ui(c, 1)), + V3D_QPU_PF_PUSHZ); + struct qreg result = + vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFA, + vir_uniform_ui(c, ~0), + vir_uniform_ui(c, 0))); + c->flags_temp = result.index; + c->flags_cond = V3D_QPU_COND_IFA; + + ntq_store_dest(c, &instr->dest, 0, result); + break; + } + case nir_intrinsic_load_num_subgroups: unreachable("Should have been lowered"); break; -- 2.7.4