From 6bc42054d118f3980c25b0ca2a94e618502e1475 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 14 Jun 2023 17:37:04 -0400 Subject: [PATCH] asahi: Introduce concept of spilled render targets To accommodate framebuffers which exceed tilebuffer limits, we'll need to spill render targets to main memory. In effect, we need to emulate an immediate-mode renderer for some render targets. This decision is made on a per-render target basis. In our tilebuffer layout calculation, rather than asserting that all render targets fit, introduce a notion of spilling. This doesn't actually implement spilling -- it just pushes the assert failure down to the users. But it's progress. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/lib/agx_meta.c | 4 ++- src/asahi/lib/agx_nir_lower_tilebuffer.c | 8 +++--- src/asahi/lib/agx_tilebuffer.c | 43 +++++++++++++++++++++++++++++--- src/asahi/lib/agx_tilebuffer.h | 34 ++++++++++++++++++++++--- src/asahi/lib/tests/test-tilebuffer.cpp | 16 ++++++------ 5 files changed, 85 insertions(+), 20 deletions(-) diff --git a/src/asahi/lib/agx_meta.c b/src/asahi/lib/agx_meta.c index b21e599..23a7e2f 100644 --- a/src/asahi/lib/agx_meta.c +++ b/src/asahi/lib/agx_meta.c @@ -115,8 +115,10 @@ agx_build_end_of_tile_shader(struct agx_meta_cache *cache, continue; assert(key->op[rt] == AGX_META_OP_STORE); + unsigned offset_B = agx_tilebuffer_offset_B(&key->tib, rt); + nir_block_image_store_agx( - &b, nir_imm_int(&b, rt), nir_imm_intN_t(&b, key->tib.offset_B[rt], 16), + &b, nir_imm_int(&b, rt), nir_imm_intN_t(&b, offset_B, 16), .format = agx_tilebuffer_physical_format(&key->tib, rt), .image_dim = dim); } diff --git a/src/asahi/lib/agx_nir_lower_tilebuffer.c b/src/asahi/lib/agx_nir_lower_tilebuffer.c index d4d5b9d..0e4f8a3 100644 --- a/src/asahi/lib/agx_nir_lower_tilebuffer.c +++ b/src/asahi/lib/agx_nir_lower_tilebuffer.c @@ -96,9 +96,10 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data) value = nir_f2f32(b, value); } + uint8_t offset_B = agx_tilebuffer_offset_B(tib, rt); nir_store_local_pixel_agx(b, value, nir_imm_intN_t(b, ALL_SAMPLES, 16), - .base = tib->offset_B[rt], - .write_mask = write_mask, .format = format); + .base = offset_B, .write_mask = write_mask, + .format = format); return NIR_LOWER_INSTR_PROGRESS_REPLACE; } else { @@ -116,9 +117,10 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data) if (f16) format = PIPE_FORMAT_R16_UINT; + uint8_t offset_B = agx_tilebuffer_offset_B(tib, rt); nir_ssa_def *res = nir_load_local_pixel_agx( b, MIN2(intr->num_components, comps), f16 ? 16 : bit_size, - nir_imm_intN_t(b, ALL_SAMPLES, 16), .base = tib->offset_B[rt], + nir_imm_intN_t(b, ALL_SAMPLES, 16), .base = offset_B, .format = format); /* Extend floats */ diff --git a/src/asahi/lib/agx_tilebuffer.c b/src/asahi/lib/agx_tilebuffer.c index 393c874..c427462 100644 --- a/src/asahi/lib/agx_tilebuffer.c +++ b/src/asahi/lib/agx_tilebuffer.c @@ -6,6 +6,7 @@ #include "agx_tilebuffer.h" #include #include "compiler/agx_internal_formats.h" +#include "util/bitscan.h" #include "util/format/u_format.h" #include "agx_formats.h" #include "agx_usc.h" @@ -15,6 +16,14 @@ */ #define MAX_BYTES_PER_TILE (32768 - 1) +/* Maximum bytes per sample in the tilebuffer. Greater allocations require + * spilling render targets to memory. + */ +#define MAX_BYTES_PER_SAMPLE (64) + +/* Minimum tile size in pixels, architectural. */ +#define MIN_TILE_SIZE_PX (16 * 16) + /* Select the largest tile size that fits */ static struct agx_tile_size agx_select_tile_size(unsigned bytes_per_pixel) @@ -53,19 +62,45 @@ agx_build_tilebuffer_layout(enum pipe_format *formats, uint8_t nr_cbufs, */ enum pipe_format physical_fmt = agx_tilebuffer_physical_format(&tib, rt); unsigned align_B = util_format_get_blocksize(physical_fmt); - offset_B = ALIGN_POT(offset_B, align_B); + assert(util_is_power_of_two_nonzero(align_B) && + util_is_power_of_two_nonzero(MAX_BYTES_PER_SAMPLE) && + align_B < MAX_BYTES_PER_SAMPLE && + "max bytes per sample divisible by alignment"); - tib.offset_B[rt] = offset_B; + offset_B = ALIGN_POT(offset_B, align_B); + assert(offset_B <= MAX_BYTES_PER_SAMPLE && "loop invariant + above"); + /* Determine the size, if we were to allocate this render target to the + * tilebuffer as desired. + */ unsigned nr = util_format_get_nr_components(physical_fmt) == 1 ? util_format_get_nr_components(formats[rt]) : 1; unsigned size_B = align_B * nr; - offset_B += size_B; + unsigned new_offset_B = offset_B + size_B; + + /* If allocating this render target would exceed any tilebuffer limits, we + * need to spill it to memory. We continue processing in case there are + * smaller render targets after that would still fit. Otherwise, we + * allocate it to the tilebuffer. + * + * TODO: Suboptimal, we might be able to reorder render targets to + * avoid fragmentation causing spilling. + */ + bool fits = + (new_offset_B <= MAX_BYTES_PER_SAMPLE) && + (new_offset_B * MIN_TILE_SIZE_PX * nr_samples) <= MAX_BYTES_PER_TILE; + + if (fits) { + tib._offset_B[rt] = offset_B; + offset_B = new_offset_B; + } else { + tib.spilled[rt] = true; + } } - assert(offset_B <= 64 && "TIB strides must be <= 64"); + assert(offset_B <= MAX_BYTES_PER_SAMPLE && "loop invariant"); /* Multisampling needs a nonempty allocation. * XXX: Check this against hw diff --git a/src/asahi/lib/agx_tilebuffer.h b/src/asahi/lib/agx_tilebuffer.h index 838fa8a..4895d76 100644 --- a/src/asahi/lib/agx_tilebuffer.h +++ b/src/asahi/lib/agx_tilebuffer.h @@ -6,6 +6,7 @@ #ifndef __AGX_TILEBUFFER_H #define __AGX_TILEBUFFER_H +#include #include #include #include "util/format/u_formats.h" @@ -14,6 +15,11 @@ extern "C" { #endif +/* Maximum render targets per framebuffer. This is NOT architectural, but it + * is the ~universal API limit so there's no point in allowing more. + */ +#define AGX_MAX_RENDER_TARGETS (8) + /* Forward declarations to keep the header lean */ struct nir_shader; struct agx_usc_builder; @@ -27,12 +33,19 @@ struct agx_tilebuffer_layout { /* Logical format of each render target. Use agx_tilebuffer_physical_format * to get the physical format. */ - enum pipe_format logical_format[8]; + enum pipe_format logical_format[AGX_MAX_RENDER_TARGETS]; - /* Offset into the sample of each render target */ - uint8_t offset_B[8]; + /* Which render targets are spilled. */ + bool spilled[AGX_MAX_RENDER_TARGETS]; + + /* Offset into the sample of each render target. If a render target is + * spilled, its offset is UNDEFINED. Use agx_tilebuffer_offset_B to access. + */ + uint8_t _offset_B[AGX_MAX_RENDER_TARGETS]; - /* Total bytes per sample, rounded up as needed */ + /* Total bytes per sample, rounded up as needed. Spilled render targets do + * not count against this. + */ uint8_t sample_size_B; /* Number of samples per pixel */ @@ -42,6 +55,19 @@ struct agx_tilebuffer_layout { struct agx_tile_size tile_size; }; +/* + * _offset_B is undefined for non-spilled render targets. This safe accessor + * asserts that render targets are not spilled rather than returning garbage. + */ +static inline uint8_t +agx_tilebuffer_offset_B(struct agx_tilebuffer_layout *layout, unsigned rt) +{ + assert(rt < AGX_MAX_RENDER_TARGETS); + assert(!layout->spilled[rt] && "precondition"); + + return layout->_offset_B[rt]; +} + struct agx_tilebuffer_layout agx_build_tilebuffer_layout(enum pipe_format *formats, uint8_t nr_cbufs, uint8_t nr_samples); diff --git a/src/asahi/lib/tests/test-tilebuffer.cpp b/src/asahi/lib/tests/test-tilebuffer.cpp index 5d0c674..4615006 100644 --- a/src/asahi/lib/tests/test-tilebuffer.cpp +++ b/src/asahi/lib/tests/test-tilebuffer.cpp @@ -23,7 +23,7 @@ struct test tests[] = { 1, { PIPE_FORMAT_R8G8B8A8_UNORM }, { - .offset_B = { 0 }, + ._offset_B = { 0 }, .sample_size_B = 8, .nr_samples = 1, .tile_size = { 32, 32 }, @@ -35,7 +35,7 @@ struct test tests[] = { 2, { PIPE_FORMAT_R8G8B8A8_UNORM }, { - .offset_B = { 0 }, + ._offset_B = { 0 }, .sample_size_B = 8, .nr_samples = 2, .tile_size = { 32, 32 }, @@ -47,7 +47,7 @@ struct test tests[] = { 4, { PIPE_FORMAT_R8G8B8A8_UNORM }, { - .offset_B = { 0 }, + ._offset_B = { 0 }, .sample_size_B = 8, .nr_samples = 4, .tile_size = { 32, 16 }, @@ -64,7 +64,7 @@ struct test tests[] = { PIPE_FORMAT_R32G32_SINT, }, { - .offset_B = { 0, 4, 12, 16 }, + ._offset_B = { 0, 4, 12, 16 }, .sample_size_B = 24, .nr_samples = 1, .tile_size = { 32, 32 }, @@ -81,7 +81,7 @@ struct test tests[] = { PIPE_FORMAT_R32G32_SINT, }, { - .offset_B = { 0, 4, 12, 16 }, + ._offset_B = { 0, 4, 12, 16 }, .sample_size_B = 24, .nr_samples = 2, .tile_size = { 32, 16 }, @@ -98,7 +98,7 @@ struct test tests[] = { PIPE_FORMAT_R32G32_SINT, }, { - .offset_B = { 0, 4, 12, 16 }, + ._offset_B = { 0, 4, 12, 16 }, .sample_size_B = 24, .nr_samples = 4, .tile_size = { 16, 16 }, @@ -110,7 +110,7 @@ struct test tests[] = { 1, { PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R16G16_SNORM }, { - .offset_B = { 0, 2 }, + ._offset_B = { 0, 2 }, .sample_size_B = 8, .nr_samples = 1, .tile_size = { 32, 32 }, @@ -122,7 +122,7 @@ struct test tests[] = { 1, { PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R10G10B10A2_UNORM }, { - .offset_B = { 0, 4 }, + ._offset_B = { 0, 4 }, .sample_size_B = 8, .nr_samples = 1, .tile_size = { 32, 32 }, -- 2.7.4