From 16f8bfb042cf5d0f41654805eda6502f6d205845 Mon Sep 17 00:00:00 2001
From: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Date: Sun, 26 Feb 2023 23:33:02 -0500
Subject: [PATCH] agx: Don't set lower_pack_split

We should handle nir_op_unpack_32_2x16_split_* natively, since we can generate
better code with agx_subdivide (coalescing the ops away) than the bitshift
lowering.

That said, we do need some extra instructions for the floating point
conversions.

No shader-db changes (which makes sense because we're targetting the GLES3.0
shader-db, which doesn't have the packing GLSL functions).

The real motivation of this change isn't optimizing some GLSL pack functions,
though, it's avoiding a code regression from using NIR's memory bit size
lowering in a future MR. That lowering will turn things like "load i16vec4" into
"load i32vec2 + unpack_32_2x16", so we need to be able to coalesce that unpack.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21674>
---
 src/asahi/compiler/agx_compile.c        |  3 +++
 src/asahi/compiler/agx_compile.h        |  1 -
 src/asahi/compiler/agx_nir_algebraic.py | 10 +++++++++-
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c
index 303ea7b..06328db 100644
--- a/src/asahi/compiler/agx_compile.c
+++ b/src/asahi/compiler/agx_compile.c
@@ -1250,15 +1250,18 @@ agx_emit_alu(agx_builder *b, nir_alu_instr *instr)
       return agx_convert_to(b, dst, agx_immediate(mode), s0, AGX_ROUND_RTE);
    }
 
+   case nir_op_pack_32_2x16_split:
    case nir_op_pack_64_2x32_split: {
       agx_index idx[] = {s0, s1};
       return agx_emit_collect_to(b, dst, 2, idx);
    }
 
    case nir_op_unpack_64_2x32_split_x:
+   case nir_op_unpack_32_2x16_split_x:
       return agx_subdivide_to(b, dst, s0, 0);
 
    case nir_op_unpack_64_2x32_split_y:
+   case nir_op_unpack_32_2x16_split_y:
       return agx_subdivide_to(b, dst, s0, 1);
 
    case nir_op_vec2:
diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h
index 162fa6f..5702006 100644
--- a/src/asahi/compiler/agx_compile.h
+++ b/src/asahi/compiler/agx_compile.h
@@ -205,7 +205,6 @@ static const nir_shader_compiler_options agx_nir_options = {
    .lower_ffract = true,
    .lower_pack_half_2x16 = true,
    .lower_unpack_half_2x16 = true,
-   .lower_pack_split = true,
    .lower_extract_byte = true,
    .lower_extract_word = true,
    .lower_insert_byte = true,
diff --git a/src/asahi/compiler/agx_nir_algebraic.py b/src/asahi/compiler/agx_nir_algebraic.py
index 2195f43..cf60f13 100644
--- a/src/asahi/compiler/agx_nir_algebraic.py
+++ b/src/asahi/compiler/agx_nir_algebraic.py
@@ -21,6 +21,14 @@ for s in [8, 16, 32, 64]:
         lower_sm5_shift += [((shift, f'a@{s}', b),
                              (shift, a, ('iand', b, s - 1)))]
 
+lower_half_pack = [
+    (('pack_half_2x16_split', a, b),
+     ('pack_32_2x16_split', ('f2f16', a), ('f2f16', b))),
+
+    (('unpack_half_2x16_split_x', a), ('f2f32', ('unpack_32_2x16_split_x', a))),
+    (('unpack_half_2x16_split_y', a), ('f2f32', ('unpack_32_2x16_split_y', a))),
+]
+
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument('-p', '--import-path', required=True)
@@ -34,7 +42,7 @@ def run():
     print('#include "agx_nir.h"')
 
     print(nir_algebraic.AlgebraicPass("agx_nir_lower_algebraic_late",
-                                      lower_sm5_shift).render())
+                                      lower_sm5_shift + lower_half_pack).render())
 
 
 if __name__ == '__main__':
-- 
2.7.4