From 6bc42054d118f3980c25b0ca2a94e618502e1475 Mon Sep 17 00:00:00 2001
From: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Date: Wed, 14 Jun 2023 17:37:04 -0400
Subject: [PATCH] asahi: Introduce concept of spilled render targets

To accommodate framebuffers which exceed tilebuffer limits, we'll need to spill
render targets to main memory. In effect, we need to emulate an immediate-mode
renderer for some render targets. This decision is made on a per-render target
basis. In our tilebuffer layout calculation, rather than asserting that all
render targets fit, introduce a notion of spilling.

This doesn't actually implement spilling -- it just pushes the assert failure
down to the users. But it's progress.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24258>
---
 src/asahi/lib/agx_meta.c                 |  4 ++-
 src/asahi/lib/agx_nir_lower_tilebuffer.c |  8 +++---
 src/asahi/lib/agx_tilebuffer.c           | 43 +++++++++++++++++++++++++++++---
 src/asahi/lib/agx_tilebuffer.h           | 34 ++++++++++++++++++++++---
 src/asahi/lib/tests/test-tilebuffer.cpp  | 16 ++++++------
 5 files changed, 85 insertions(+), 20 deletions(-)

diff --git a/src/asahi/lib/agx_meta.c b/src/asahi/lib/agx_meta.c
index b21e599..23a7e2f 100644
--- a/src/asahi/lib/agx_meta.c
+++ b/src/asahi/lib/agx_meta.c
@@ -115,8 +115,10 @@ agx_build_end_of_tile_shader(struct agx_meta_cache *cache,
          continue;
 
       assert(key->op[rt] == AGX_META_OP_STORE);
+      unsigned offset_B = agx_tilebuffer_offset_B(&key->tib, rt);
+
       nir_block_image_store_agx(
-         &b, nir_imm_int(&b, rt), nir_imm_intN_t(&b, key->tib.offset_B[rt], 16),
+         &b, nir_imm_int(&b, rt), nir_imm_intN_t(&b, offset_B, 16),
          .format = agx_tilebuffer_physical_format(&key->tib, rt),
          .image_dim = dim);
    }
diff --git a/src/asahi/lib/agx_nir_lower_tilebuffer.c b/src/asahi/lib/agx_nir_lower_tilebuffer.c
index d4d5b9d..0e4f8a3 100644
--- a/src/asahi/lib/agx_nir_lower_tilebuffer.c
+++ b/src/asahi/lib/agx_nir_lower_tilebuffer.c
@@ -96,9 +96,10 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data)
             value = nir_f2f32(b, value);
       }
 
+      uint8_t offset_B = agx_tilebuffer_offset_B(tib, rt);
       nir_store_local_pixel_agx(b, value, nir_imm_intN_t(b, ALL_SAMPLES, 16),
-                                .base = tib->offset_B[rt],
-                                .write_mask = write_mask, .format = format);
+                                .base = offset_B, .write_mask = write_mask,
+                                .format = format);
 
       return NIR_LOWER_INSTR_PROGRESS_REPLACE;
    } else {
@@ -116,9 +117,10 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data)
       if (f16)
          format = PIPE_FORMAT_R16_UINT;
 
+      uint8_t offset_B = agx_tilebuffer_offset_B(tib, rt);
       nir_ssa_def *res = nir_load_local_pixel_agx(
          b, MIN2(intr->num_components, comps), f16 ? 16 : bit_size,
-         nir_imm_intN_t(b, ALL_SAMPLES, 16), .base = tib->offset_B[rt],
+         nir_imm_intN_t(b, ALL_SAMPLES, 16), .base = offset_B,
          .format = format);
 
       /* Extend floats */
diff --git a/src/asahi/lib/agx_tilebuffer.c b/src/asahi/lib/agx_tilebuffer.c
index 393c874..c427462 100644
--- a/src/asahi/lib/agx_tilebuffer.c
+++ b/src/asahi/lib/agx_tilebuffer.c
@@ -6,6 +6,7 @@
 #include "agx_tilebuffer.h"
 #include <assert.h>
 #include "compiler/agx_internal_formats.h"
+#include "util/bitscan.h"
 #include "util/format/u_format.h"
 #include "agx_formats.h"
 #include "agx_usc.h"
@@ -15,6 +16,14 @@
  */
 #define MAX_BYTES_PER_TILE (32768 - 1)
 
+/* Maximum bytes per sample in the tilebuffer. Greater allocations require
+ * spilling render targets to memory.
+ */
+#define MAX_BYTES_PER_SAMPLE (64)
+
+/* Minimum tile size in pixels, architectural. */
+#define MIN_TILE_SIZE_PX (16 * 16)
+
 /* Select the largest tile size that fits */
 static struct agx_tile_size
 agx_select_tile_size(unsigned bytes_per_pixel)
@@ -53,19 +62,45 @@ agx_build_tilebuffer_layout(enum pipe_format *formats, uint8_t nr_cbufs,
        */
       enum pipe_format physical_fmt = agx_tilebuffer_physical_format(&tib, rt);
       unsigned align_B = util_format_get_blocksize(physical_fmt);
-      offset_B = ALIGN_POT(offset_B, align_B);
+      assert(util_is_power_of_two_nonzero(align_B) &&
+             util_is_power_of_two_nonzero(MAX_BYTES_PER_SAMPLE) &&
+             align_B < MAX_BYTES_PER_SAMPLE &&
+             "max bytes per sample divisible by alignment");
 
-      tib.offset_B[rt] = offset_B;
+      offset_B = ALIGN_POT(offset_B, align_B);
+      assert(offset_B <= MAX_BYTES_PER_SAMPLE && "loop invariant + above");
 
+      /* Determine the size, if we were to allocate this render target to the
+       * tilebuffer as desired.
+       */
       unsigned nr = util_format_get_nr_components(physical_fmt) == 1
                        ? util_format_get_nr_components(formats[rt])
                        : 1;
 
       unsigned size_B = align_B * nr;
-      offset_B += size_B;
+      unsigned new_offset_B = offset_B + size_B;
+
+      /* If allocating this render target would exceed any tilebuffer limits, we
+       * need to spill it to memory. We continue processing in case there are
+       * smaller render targets after that would still fit. Otherwise, we
+       * allocate it to the tilebuffer.
+       *
+       * TODO: Suboptimal, we might be able to reorder render targets to
+       * avoid fragmentation causing spilling.
+       */
+      bool fits =
+         (new_offset_B <= MAX_BYTES_PER_SAMPLE) &&
+         (new_offset_B * MIN_TILE_SIZE_PX * nr_samples) <= MAX_BYTES_PER_TILE;
+
+      if (fits) {
+         tib._offset_B[rt] = offset_B;
+         offset_B = new_offset_B;
+      } else {
+         tib.spilled[rt] = true;
+      }
    }
 
-   assert(offset_B <= 64 && "TIB strides must be <= 64");
+   assert(offset_B <= MAX_BYTES_PER_SAMPLE && "loop invariant");
 
    /* Multisampling needs a nonempty allocation.
     * XXX: Check this against hw
diff --git a/src/asahi/lib/agx_tilebuffer.h b/src/asahi/lib/agx_tilebuffer.h
index 838fa8a..4895d76 100644
--- a/src/asahi/lib/agx_tilebuffer.h
+++ b/src/asahi/lib/agx_tilebuffer.h
@@ -6,6 +6,7 @@
 #ifndef __AGX_TILEBUFFER_H
 #define __AGX_TILEBUFFER_H
 
+#include <assert.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include "util/format/u_formats.h"
@@ -14,6 +15,11 @@
 extern "C" {
 #endif
 
+/* Maximum render targets per framebuffer. This is NOT architectural, but it
+ * is the ~universal API limit so there's no point in allowing more.
+ */
+#define AGX_MAX_RENDER_TARGETS (8)
+
 /* Forward declarations to keep the header lean */
 struct nir_shader;
 struct agx_usc_builder;
@@ -27,12 +33,19 @@ struct agx_tilebuffer_layout {
    /* Logical format of each render target. Use agx_tilebuffer_physical_format
     * to get the physical format.
     */
-   enum pipe_format logical_format[8];
+   enum pipe_format logical_format[AGX_MAX_RENDER_TARGETS];
 
-   /* Offset into the sample of each render target */
-   uint8_t offset_B[8];
+   /* Which render targets are spilled. */
+   bool spilled[AGX_MAX_RENDER_TARGETS];
+
+   /* Offset into the sample of each render target. If a render target is
+    * spilled, its offset is UNDEFINED. Use agx_tilebuffer_offset_B to access.
+    */
+   uint8_t _offset_B[AGX_MAX_RENDER_TARGETS];
 
-   /* Total bytes per sample, rounded up as needed */
+   /* Total bytes per sample, rounded up as needed. Spilled render targets do
+    * not count against this.
+    */
    uint8_t sample_size_B;
 
    /* Number of samples per pixel */
@@ -42,6 +55,19 @@ struct agx_tilebuffer_layout {
    struct agx_tile_size tile_size;
 };
 
+/*
+ * _offset_B is undefined for non-spilled render targets. This safe accessor
+ * asserts that render targets are not spilled rather than returning garbage.
+ */
+static inline uint8_t
+agx_tilebuffer_offset_B(struct agx_tilebuffer_layout *layout, unsigned rt)
+{
+   assert(rt < AGX_MAX_RENDER_TARGETS);
+   assert(!layout->spilled[rt] && "precondition");
+
+   return layout->_offset_B[rt];
+}
+
 struct agx_tilebuffer_layout
 agx_build_tilebuffer_layout(enum pipe_format *formats, uint8_t nr_cbufs,
                             uint8_t nr_samples);
diff --git a/src/asahi/lib/tests/test-tilebuffer.cpp b/src/asahi/lib/tests/test-tilebuffer.cpp
index 5d0c674..4615006 100644
--- a/src/asahi/lib/tests/test-tilebuffer.cpp
+++ b/src/asahi/lib/tests/test-tilebuffer.cpp
@@ -23,7 +23,7 @@ struct test tests[] = {
       1,
       { PIPE_FORMAT_R8G8B8A8_UNORM },
       {
-         .offset_B = { 0 },
+         ._offset_B = { 0 },
          .sample_size_B = 8,
          .nr_samples = 1,
          .tile_size = { 32, 32 },
@@ -35,7 +35,7 @@ struct test tests[] = {
       2,
       { PIPE_FORMAT_R8G8B8A8_UNORM },
       {
-         .offset_B = { 0 },
+         ._offset_B = { 0 },
          .sample_size_B = 8,
          .nr_samples = 2,
          .tile_size = { 32, 32 },
@@ -47,7 +47,7 @@ struct test tests[] = {
       4,
       { PIPE_FORMAT_R8G8B8A8_UNORM },
       {
-         .offset_B = { 0 },
+         ._offset_B = { 0 },
          .sample_size_B = 8,
          .nr_samples = 4,
          .tile_size = { 32, 16 },
@@ -64,7 +64,7 @@ struct test tests[] = {
          PIPE_FORMAT_R32G32_SINT,
       },
       {
-         .offset_B = { 0, 4, 12, 16 },
+         ._offset_B = { 0, 4, 12, 16 },
          .sample_size_B = 24,
          .nr_samples = 1,
          .tile_size = { 32, 32 },
@@ -81,7 +81,7 @@ struct test tests[] = {
          PIPE_FORMAT_R32G32_SINT,
       },
       {
-         .offset_B = { 0, 4, 12, 16 },
+         ._offset_B = { 0, 4, 12, 16 },
          .sample_size_B = 24,
          .nr_samples = 2,
          .tile_size = { 32, 16 },
@@ -98,7 +98,7 @@ struct test tests[] = {
          PIPE_FORMAT_R32G32_SINT,
       },
       {
-         .offset_B = { 0, 4, 12, 16 },
+         ._offset_B = { 0, 4, 12, 16 },
          .sample_size_B = 24,
          .nr_samples = 4,
          .tile_size = { 16, 16 },
@@ -110,7 +110,7 @@ struct test tests[] = {
       1,
       { PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R16G16_SNORM },
       {
-         .offset_B = { 0, 2 },
+         ._offset_B = { 0, 2 },
          .sample_size_B = 8,
          .nr_samples = 1,
          .tile_size = { 32, 32 },
@@ -122,7 +122,7 @@ struct test tests[] = {
       1,
       { PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R10G10B10A2_UNORM },
       {
-         .offset_B = { 0, 4 },
+         ._offset_B = { 0, 4 },
          .sample_size_B = 8,
          .nr_samples = 1,
          .tile_size = { 32, 32 },
-- 
2.7.4