From 4684425150bdd7907fbbc37caa01a01551bb2cc6 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Fri, 3 Dec 2021 03:04:19 -0500 Subject: [PATCH] freedreno/ir3: no need to count bits 16b at a time for a4xx This also works out nicely since a4xx has some sort of problem with the 16b-based lowering. Signed-off-by: Ilia Mirkin Part-of: --- src/freedreno/ir3/ir3_compiler_nir.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index c6bd72c..228cfbd 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -769,9 +769,13 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) break; } case nir_op_bit_count: { - // TODO, we need to do this 16b at a time on a5xx+a6xx.. need to - // double check on earlier gen's. Once half-precision support is - // in place, this should probably move to a NIR lowering pass: + if (ctx->compiler->gen < 5) { + dst[0] = ir3_CBITS_B(b, src[0], 0); + break; + } + + // We need to do this 16b at a time on a5xx+a6xx. Once half-precision + // support is in place, this should probably move to a NIR lowering pass: struct ir3_instruction *hi, *lo; hi = ir3_COV(b, ir3_SHR_B(b, src[0], 0, create_immed(b, 16), 0), TYPE_U32, -- 2.7.4