From 8384189b6c9a98624ac6c5fbeb1ac3eef5ebf0b5 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 13 May 2022 13:39:01 +0100 Subject: [PATCH] aco: use p_parallelcopy for uniform reduction with zero source MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit I think v_mov_b32 was only used because a sub-dword p_parallelcopy couldn't take constants on some gfx levels. That shouldn't be the case anymore. Signed-off-by: Rhys Perry Reviewed-by: Timur Kristóf Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 5b337f7..51285cf 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -7725,10 +7725,8 @@ emit_addition_uniform_reduce(isel_context* ctx, nir_op op, Definition dst, nir_s bld.pseudo(aco_opcode::p_extract_vector, dst, count, Operand::zero()); else if (nir_src_as_uint(src) == 1) bld.copy(dst, count); - else if (nir_src_as_uint(src) == 0 && dst.bytes() <= 2) - bld.vop1(aco_opcode::v_mov_b32, dst, Operand::zero()); /* RA will use SDWA if possible */ else if (nir_src_as_uint(src) == 0) - bld.copy(dst, Operand::zero()); + bld.copy(dst, Operand::zero(dst.bytes())); else if (count.type() == RegType::vgpr) bld.v_mul_imm(dst, count, nir_src_as_uint(src)); else -- 2.7.4