From b768a254f799d8da7e445b6f20bfa1e371681b33 Mon Sep 17 00:00:00 2001
From: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Date: Sat, 25 Feb 2023 22:29:42 -0500
Subject: [PATCH] agx: Use nir_lower_mem_access_bit_sizes

Lowers away 64-bit loads, which we'll create in the sysval lowering for
dynamically indexed UBOs/VBOs. The lowering generates pack_64_2x32 instructions,
so lower those too.

No shader-db changes.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21674>
---
 src/asahi/compiler/agx_compile.c | 30 ++++++++++++++++++++++++++++++
 src/asahi/compiler/agx_compile.h |  1 +
 2 files changed, 31 insertions(+)

diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c
index 1e2711f..99d89af 100644
--- a/src/asahi/compiler/agx_compile.c
+++ b/src/asahi/compiler/agx_compile.c
@@ -2094,6 +2094,27 @@ agx_fp32_varying_mask(nir_shader *nir)
    return mask;
 }
 
+static nir_mem_access_size_align
+mem_access_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes, uint32_t align,
+                         uint32_t align_offset, bool offset_is_const,
+                         const void *cb_data)
+{
+   align = nir_combined_align(align, align_offset);
+
+   assert(util_is_power_of_two_nonzero(align));
+   unsigned bit_size = (bytes & 1) ? 8 : (bytes & 2) ? 16 : 32;
+   if (align == 2)
+      bit_size = MIN2(bit_size, 16);
+   else if (align == 1)
+      bit_size = 8;
+
+   return (nir_mem_access_size_align){
+      .num_components = bytes / (bit_size / 8),
+      .bit_size = bit_size,
+      .align = bit_size / 8,
+   };
+}
+
 static bool
 agx_should_dump(nir_shader *nir, unsigned agx_dbg_bit)
 {
@@ -2354,6 +2375,15 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key,
          out->depth_layout = layout;
    }
 
+   /* Late sysval lowering creates large loads. Load lowering creates unpacks */
+   NIR_PASS_V(nir, nir_lower_mem_access_bit_sizes,
+              nir_var_mem_ssbo | nir_var_mem_constant |
+                 nir_var_mem_task_payload | nir_var_shader_temp |
+                 nir_var_function_temp | nir_var_mem_global |
+                 nir_var_mem_shared,
+              mem_access_size_align_cb, NULL);
+   NIR_PASS_V(nir, nir_lower_pack);
+
    /* Late blend lowering creates vectors */
    NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
    NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h
index 0ee8f43..f0f5f93 100644
--- a/src/asahi/compiler/agx_compile.h
+++ b/src/asahi/compiler/agx_compile.h
@@ -204,6 +204,7 @@ static const nir_shader_compiler_options agx_nir_options = {
    .lower_fdph = true,
    .lower_ffract = true,
    .lower_pack_half_2x16 = true,
+   .lower_pack_64_2x32 = true,
    .lower_unpack_half_2x16 = true,
    .lower_extract_byte = true,
    .lower_insert_byte = true,
-- 
2.7.4