From b91616e80049d8de911cda06874d1d26255bafe8 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Tue, 26 Sep 2023 18:29:31 +0200 Subject: [PATCH] aco: implement 64bit div find_lsb This can be selected for divergent subgroupBallotFindLSB. Reviewed-by: Rhys Perry Cc: mesa-stable Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index d92bcc0..287669f 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -1753,6 +1753,13 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) emit_vop1_instruction(ctx, instr, aco_opcode::v_ffbl_b32, dst); } else if (src.regClass() == s2) { bld.sop1(aco_opcode::s_ff1_i32_b64, Definition(dst), src); + } else if (src.regClass() == v2) { + Temp lo = bld.tmp(v1), hi = bld.tmp(v1); + bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src); + lo = bld.vop1(aco_opcode::v_ffbl_b32, bld.def(v1), lo); + hi = bld.vop1(aco_opcode::v_ffbl_b32, bld.def(v1), hi); + hi = uadd32_sat(bld, bld.def(v1), bld.copy(bld.def(s1), Operand::c32(32u)), hi); + bld.vop2(aco_opcode::v_min_u32, Definition(dst), lo, hi); } else { isel_err(&instr->instr, "Unimplemented NIR instr bit size"); } -- 2.7.4