agx: Fold addressing math into atomics
authorAlyssa Rosenzweig <alyssa@rosenzweig.io>
Sun, 28 May 2023 01:57:35 +0000 (21:57 -0400)
committerMarge Bot <emma+marge@anholt.net>
Fri, 9 Jun 2023 12:06:00 +0000 (12:06 +0000)
Like our loads and stores, our global atomics support indexing with a 64-bit
base plus a 32-bit element index, zero- or sign-extended and multiplied by the
word size. Unlike the loads and stores, they do not support additional shifting
(it's not too useful), so that needs an explicit lowering.

Switch to using AGX variants of the atomics, running our address pattern
matching on global atomics in order to delete some ALU.

This cleans up the image atomic lowering nicely, since we get to take full
advantage of the shift + zero-extend + add on the atomic... The shift comes from
multiplying by the bytes per pixel.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Christian Gmeiner <christian.gmeiner@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23529>

src/asahi/compiler/agx_compile.c
src/asahi/compiler/agx_nir_lower_address.c

index 76d5724..5e151e7 100644 (file)
@@ -730,12 +730,12 @@ agx_emit_atomic(agx_builder *b, agx_index dst, nir_intrinsic_instr *instr,
       translate_atomic_opcode(nir_intrinsic_atomic_op(instr));
    agx_index base =
       local ? agx_local_base(instr->src[0]) : agx_src_index(&instr->src[0]);
-   agx_index value = agx_src_index(&instr->src[1]);
-   agx_index index = agx_zero(); /* TODO: optimize address arithmetic? */
+   agx_index value = agx_src_index(&instr->src[local ? 1 : 2]);
+   agx_index index = local ? agx_zero() : agx_src_index(&instr->src[1]);
 
    /* cmpxchg (only) takes 2 sources, passed in consecutive registers */
    if (op == AGX_ATOMIC_OPC_CMPXCHG) {
-      agx_index value2 = agx_src_index(&instr->src[2]);
+      agx_index value2 = agx_src_index(&instr->src[local ? 2 : 3]);
       value = agx_vec2(b, value2, value);
    }
 
@@ -827,8 +827,8 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr)
       agx_emit_local_load(b, dst, instr);
       return NULL;
 
-   case nir_intrinsic_global_atomic:
-   case nir_intrinsic_global_atomic_swap:
+   case nir_intrinsic_global_atomic_agx:
+   case nir_intrinsic_global_atomic_swap_agx:
       agx_emit_atomic(b, dst, instr, false);
       return NULL;
 
index a6980ff..a12fcfd 100644 (file)
@@ -246,6 +246,8 @@ pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
    if (intr->intrinsic != nir_intrinsic_load_global &&
        intr->intrinsic != nir_intrinsic_load_global_constant &&
+       intr->intrinsic != nir_intrinsic_global_atomic &&
+       intr->intrinsic != nir_intrinsic_global_atomic_swap &&
        intr->intrinsic != nir_intrinsic_store_global)
       return false;
 
@@ -309,6 +311,18 @@ pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
                                    offset, .access = nir_intrinsic_access(intr),
                                    .base = match.shift, .format = format,
                                    .sign_extend = match.sign_extend);
+   } else if (intr->intrinsic == nir_intrinsic_global_atomic) {
+      offset = nir_ishl_imm(b, offset, match.shift);
+      repl =
+         nir_global_atomic_agx(b, bit_size, new_base, offset, intr->src[1].ssa,
+                               .atomic_op = nir_intrinsic_atomic_op(intr),
+                               .sign_extend = match.sign_extend);
+   } else if (intr->intrinsic == nir_intrinsic_global_atomic_swap) {
+      offset = nir_ishl_imm(b, offset, match.shift);
+      repl = nir_global_atomic_swap_agx(
+         b, bit_size, new_base, offset, intr->src[1].ssa, intr->src[2].ssa,
+         .atomic_op = nir_intrinsic_atomic_op(intr),
+         .sign_extend = match.sign_extend);
    } else {
       nir_store_agx(b, intr->src[0].ssa, new_base, offset,
                     .access = nir_intrinsic_access(intr), .base = match.shift,