From fd6605367d00762e6f63dd6fc85b504ee0c1667a Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Fri, 28 May 2021 21:56:13 +0200 Subject: [PATCH] aco: Implement nir_op_sad_u8x4. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Fix up the operand size for v_sad instructions, and implement the new NIR horizontal add. There is no viable way to do this in SALU, so let's always use a VGPR destination. Signed-off-by: Timur Kristóf Reviewed-by: Tony Wasserka Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 5 +++++ src/amd/compiler/aco_instruction_selection_setup.cpp | 1 + src/amd/compiler/aco_opcodes.py | 8 ++++++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index b985b5a..40a1687 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -3031,6 +3031,11 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) } break; } + case nir_op_sad_u8x4: { + assert(dst.regClass() == v1); + emit_vop3a_instruction(ctx, instr, aco_opcode::v_sad_u8, dst, false, 3u, false); + break; + } case nir_op_fquantize2f16: { Temp src = get_alu_src(ctx, instr->src[0]); Temp f16 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v1), src); diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index 15f9ce3..f7cebe0 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -663,6 +663,7 @@ void init_context(isel_context *ctx, nir_shader *shader) case nir_op_frexp_exp: case nir_op_cube_face_index: case nir_op_cube_face_coord: + case nir_op_sad_u8x4: type = RegType::vgpr; break; case nir_op_f2i16: diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py index 7a1099d..5267fb1 100644 --- a/src/amd/compiler/aco_opcodes.py +++ b/src/amd/compiler/aco_opcodes.py @@ -238,8 +238,10 @@ class Opcode(object): self.definition_size = def_dtype_sizes.get(def_dtype, self.operand_size) # exceptions for operands: - if 'sad_' in name: + if 'qsad_' in name: self.operand_size = 0 + elif 'sad_' in name: + self.operand_size = 32 elif name in ['v_mad_u64_u32', 'v_mad_i64_i32']: self.operand_size = 0 elif self.operand_size == 24: @@ -251,8 +253,10 @@ class Opcode(object): self.operand_size = 32 # exceptions for definitions: - if 'sad_' in name: + if 'qsad_' in name: self.definition_size = 0 + elif 'sad_' in name: + self.definition_size = 32 elif '_pk' in name: self.definition_size = 32 -- 2.7.4