From 16f8bfb042cf5d0f41654805eda6502f6d205845 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sun, 26 Feb 2023 23:33:02 -0500 Subject: [PATCH] agx: Don't set lower_pack_split We should handle nir_op_unpack_32_2x16_split_* natively, since we can generate better code with agx_subdivide (coalescing the ops away) than the bitshift lowering. That said, we do need some extra instructions for the floating point conversions. No shader-db changes (which makes sense because we're targetting the GLES3.0 shader-db, which doesn't have the packing GLSL functions). The real motivation of this change isn't optimizing some GLSL pack functions, though, it's avoiding a code regression from using NIR's memory bit size lowering in a future MR. That lowering will turn things like "load i16vec4" into "load i32vec2 + unpack_32_2x16", so we need to be able to coalesce that unpack. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/compiler/agx_compile.c | 3 +++ src/asahi/compiler/agx_compile.h | 1 - src/asahi/compiler/agx_nir_algebraic.py | 10 +++++++++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 303ea7b..06328db 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -1250,15 +1250,18 @@ agx_emit_alu(agx_builder *b, nir_alu_instr *instr) return agx_convert_to(b, dst, agx_immediate(mode), s0, AGX_ROUND_RTE); } + case nir_op_pack_32_2x16_split: case nir_op_pack_64_2x32_split: { agx_index idx[] = {s0, s1}; return agx_emit_collect_to(b, dst, 2, idx); } case nir_op_unpack_64_2x32_split_x: + case nir_op_unpack_32_2x16_split_x: return agx_subdivide_to(b, dst, s0, 0); case nir_op_unpack_64_2x32_split_y: + case nir_op_unpack_32_2x16_split_y: return agx_subdivide_to(b, dst, s0, 1); case nir_op_vec2: diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h index 162fa6f..5702006 100644 --- a/src/asahi/compiler/agx_compile.h +++ b/src/asahi/compiler/agx_compile.h @@ -205,7 +205,6 @@ static const nir_shader_compiler_options agx_nir_options = { .lower_ffract = true, .lower_pack_half_2x16 = true, .lower_unpack_half_2x16 = true, - .lower_pack_split = true, .lower_extract_byte = true, .lower_extract_word = true, .lower_insert_byte = true, diff --git a/src/asahi/compiler/agx_nir_algebraic.py b/src/asahi/compiler/agx_nir_algebraic.py index 2195f43..cf60f13 100644 --- a/src/asahi/compiler/agx_nir_algebraic.py +++ b/src/asahi/compiler/agx_nir_algebraic.py @@ -21,6 +21,14 @@ for s in [8, 16, 32, 64]: lower_sm5_shift += [((shift, f'a@{s}', b), (shift, a, ('iand', b, s - 1)))] +lower_half_pack = [ + (('pack_half_2x16_split', a, b), + ('pack_32_2x16_split', ('f2f16', a), ('f2f16', b))), + + (('unpack_half_2x16_split_x', a), ('f2f32', ('unpack_32_2x16_split_x', a))), + (('unpack_half_2x16_split_y', a), ('f2f32', ('unpack_32_2x16_split_y', a))), +] + def main(): parser = argparse.ArgumentParser() parser.add_argument('-p', '--import-path', required=True) @@ -34,7 +42,7 @@ def run(): print('#include "agx_nir.h"') print(nir_algebraic.AlgebraicPass("agx_nir_lower_algebraic_late", - lower_sm5_shift).render()) + lower_sm5_shift + lower_half_pack).render()) if __name__ == '__main__': -- 2.7.4