From 30dec8b414ef6113ba36726e3e72915a7b7b5288 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Tue, 22 Jun 2021 10:55:04 +0200 Subject: [PATCH] broadcom/compiler: implement nir_intrinsic_load_subgroup_id correctly MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit For some reason, this was implemented with the bulk of the compute shader enablement, but this intrinsic is specific to subgroups and thus was not really used. Also, its implementation was not correct, since it was returning the element index within the subgroup, not the subgroup index itself, which is the index of the batch in the dispatch. Reviewed-by: Alejandro Piñeiro Part-of: --- src/broadcom/compiler/nir_to_vir.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index fced8e1..de1cc9e 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -2773,6 +2773,13 @@ ntq_emit_load_ubo_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr) } } +static inline struct qreg +emit_load_local_invocation_index(struct v3d_compile *c) +{ + return vir_SHR(c, c->cs_payload[1], + vir_uniform_ui(c, 32 - c->local_invocation_index_bits)); +} + static void ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) { @@ -3034,12 +3041,6 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) } break; - case nir_intrinsic_load_local_invocation_index: - ntq_store_dest(c, &instr->dest, 0, - vir_SHR(c, c->cs_payload[1], - vir_uniform_ui(c, 32 - c->local_invocation_index_bits))); - break; - case nir_intrinsic_load_workgroup_id: { struct qreg x = vir_AND(c, c->cs_payload[0], vir_uniform_ui(c, 0xffff)); @@ -3066,10 +3067,24 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) break; } - case nir_intrinsic_load_subgroup_id: - ntq_store_dest(c, &instr->dest, 0, vir_EIDX(c)); + case nir_intrinsic_load_local_invocation_index: + ntq_store_dest(c, &instr->dest, 0, + emit_load_local_invocation_index(c)); break; + case nir_intrinsic_load_subgroup_id: { + /* This is basically the batch index, which is the Local + * Invocation Index divided by the SIMD width). + */ + STATIC_ASSERT(util_is_power_of_two_nonzero(V3D_CHANNELS)); + const uint32_t divide_shift = ffs(V3D_CHANNELS) - 1; + struct qreg lii = emit_load_local_invocation_index(c); + ntq_store_dest(c, &instr->dest, 0, + vir_SHR(c, lii, + vir_uniform_ui(c, divide_shift))); + break; + } + case nir_intrinsic_load_per_vertex_input: { /* The vertex shader writes all its used outputs into * consecutive VPM offsets, so if any output component is -- 2.7.4