continue;
assert(key->op[rt] == AGX_META_OP_STORE);
+ unsigned offset_B = agx_tilebuffer_offset_B(&key->tib, rt);
+
nir_block_image_store_agx(
- &b, nir_imm_int(&b, rt), nir_imm_intN_t(&b, key->tib.offset_B[rt], 16),
+ &b, nir_imm_int(&b, rt), nir_imm_intN_t(&b, offset_B, 16),
.format = agx_tilebuffer_physical_format(&key->tib, rt),
.image_dim = dim);
}
value = nir_f2f32(b, value);
}
+ uint8_t offset_B = agx_tilebuffer_offset_B(tib, rt);
nir_store_local_pixel_agx(b, value, nir_imm_intN_t(b, ALL_SAMPLES, 16),
- .base = tib->offset_B[rt],
- .write_mask = write_mask, .format = format);
+ .base = offset_B, .write_mask = write_mask,
+ .format = format);
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
} else {
if (f16)
format = PIPE_FORMAT_R16_UINT;
+ uint8_t offset_B = agx_tilebuffer_offset_B(tib, rt);
nir_ssa_def *res = nir_load_local_pixel_agx(
b, MIN2(intr->num_components, comps), f16 ? 16 : bit_size,
- nir_imm_intN_t(b, ALL_SAMPLES, 16), .base = tib->offset_B[rt],
+ nir_imm_intN_t(b, ALL_SAMPLES, 16), .base = offset_B,
.format = format);
/* Extend floats */
#include "agx_tilebuffer.h"
#include <assert.h>
#include "compiler/agx_internal_formats.h"
+#include "util/bitscan.h"
#include "util/format/u_format.h"
#include "agx_formats.h"
#include "agx_usc.h"
*/
#define MAX_BYTES_PER_TILE (32768 - 1)
+/* Maximum bytes per sample in the tilebuffer. Greater allocations require
+ * spilling render targets to memory.
+ */
+#define MAX_BYTES_PER_SAMPLE (64)
+
+/* Minimum tile size in pixels, architectural. */
+#define MIN_TILE_SIZE_PX (16 * 16)
+
/* Select the largest tile size that fits */
static struct agx_tile_size
agx_select_tile_size(unsigned bytes_per_pixel)
*/
enum pipe_format physical_fmt = agx_tilebuffer_physical_format(&tib, rt);
unsigned align_B = util_format_get_blocksize(physical_fmt);
- offset_B = ALIGN_POT(offset_B, align_B);
+ assert(util_is_power_of_two_nonzero(align_B) &&
+ util_is_power_of_two_nonzero(MAX_BYTES_PER_SAMPLE) &&
+ align_B < MAX_BYTES_PER_SAMPLE &&
+ "max bytes per sample divisible by alignment");
- tib.offset_B[rt] = offset_B;
+ offset_B = ALIGN_POT(offset_B, align_B);
+ assert(offset_B <= MAX_BYTES_PER_SAMPLE && "loop invariant + above");
+ /* Determine the size, if we were to allocate this render target to the
+ * tilebuffer as desired.
+ */
unsigned nr = util_format_get_nr_components(physical_fmt) == 1
? util_format_get_nr_components(formats[rt])
: 1;
unsigned size_B = align_B * nr;
- offset_B += size_B;
+ unsigned new_offset_B = offset_B + size_B;
+
+ /* If allocating this render target would exceed any tilebuffer limits, we
+ * need to spill it to memory. We continue processing in case there are
+ * smaller render targets after that would still fit. Otherwise, we
+ * allocate it to the tilebuffer.
+ *
+ * TODO: Suboptimal, we might be able to reorder render targets to
+ * avoid fragmentation causing spilling.
+ */
+ bool fits =
+ (new_offset_B <= MAX_BYTES_PER_SAMPLE) &&
+ (new_offset_B * MIN_TILE_SIZE_PX * nr_samples) <= MAX_BYTES_PER_TILE;
+
+ if (fits) {
+ tib._offset_B[rt] = offset_B;
+ offset_B = new_offset_B;
+ } else {
+ tib.spilled[rt] = true;
+ }
}
- assert(offset_B <= 64 && "TIB strides must be <= 64");
+ assert(offset_B <= MAX_BYTES_PER_SAMPLE && "loop invariant");
/* Multisampling needs a nonempty allocation.
* XXX: Check this against hw
#ifndef __AGX_TILEBUFFER_H
#define __AGX_TILEBUFFER_H
+#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include "util/format/u_formats.h"
extern "C" {
#endif
+/* Maximum render targets per framebuffer. This is NOT architectural, but it
+ * is the ~universal API limit so there's no point in allowing more.
+ */
+#define AGX_MAX_RENDER_TARGETS (8)
+
/* Forward declarations to keep the header lean */
struct nir_shader;
struct agx_usc_builder;
/* Logical format of each render target. Use agx_tilebuffer_physical_format
* to get the physical format.
*/
- enum pipe_format logical_format[8];
+ enum pipe_format logical_format[AGX_MAX_RENDER_TARGETS];
- /* Offset into the sample of each render target */
- uint8_t offset_B[8];
+ /* Which render targets are spilled. */
+ bool spilled[AGX_MAX_RENDER_TARGETS];
+
+ /* Offset into the sample of each render target. If a render target is
+ * spilled, its offset is UNDEFINED. Use agx_tilebuffer_offset_B to access.
+ */
+ uint8_t _offset_B[AGX_MAX_RENDER_TARGETS];
- /* Total bytes per sample, rounded up as needed */
+ /* Total bytes per sample, rounded up as needed. Spilled render targets do
+ * not count against this.
+ */
uint8_t sample_size_B;
/* Number of samples per pixel */
struct agx_tile_size tile_size;
};
+/*
+ * _offset_B is undefined for non-spilled render targets. This safe accessor
+ * asserts that render targets are not spilled rather than returning garbage.
+ */
+static inline uint8_t
+agx_tilebuffer_offset_B(struct agx_tilebuffer_layout *layout, unsigned rt)
+{
+ assert(rt < AGX_MAX_RENDER_TARGETS);
+ assert(!layout->spilled[rt] && "precondition");
+
+ return layout->_offset_B[rt];
+}
+
struct agx_tilebuffer_layout
agx_build_tilebuffer_layout(enum pipe_format *formats, uint8_t nr_cbufs,
uint8_t nr_samples);
1,
{ PIPE_FORMAT_R8G8B8A8_UNORM },
{
- .offset_B = { 0 },
+ ._offset_B = { 0 },
.sample_size_B = 8,
.nr_samples = 1,
.tile_size = { 32, 32 },
2,
{ PIPE_FORMAT_R8G8B8A8_UNORM },
{
- .offset_B = { 0 },
+ ._offset_B = { 0 },
.sample_size_B = 8,
.nr_samples = 2,
.tile_size = { 32, 32 },
4,
{ PIPE_FORMAT_R8G8B8A8_UNORM },
{
- .offset_B = { 0 },
+ ._offset_B = { 0 },
.sample_size_B = 8,
.nr_samples = 4,
.tile_size = { 32, 16 },
PIPE_FORMAT_R32G32_SINT,
},
{
- .offset_B = { 0, 4, 12, 16 },
+ ._offset_B = { 0, 4, 12, 16 },
.sample_size_B = 24,
.nr_samples = 1,
.tile_size = { 32, 32 },
PIPE_FORMAT_R32G32_SINT,
},
{
- .offset_B = { 0, 4, 12, 16 },
+ ._offset_B = { 0, 4, 12, 16 },
.sample_size_B = 24,
.nr_samples = 2,
.tile_size = { 32, 16 },
PIPE_FORMAT_R32G32_SINT,
},
{
- .offset_B = { 0, 4, 12, 16 },
+ ._offset_B = { 0, 4, 12, 16 },
.sample_size_B = 24,
.nr_samples = 4,
.tile_size = { 16, 16 },
1,
{ PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R16G16_SNORM },
{
- .offset_B = { 0, 2 },
+ ._offset_B = { 0, 2 },
.sample_size_B = 8,
.nr_samples = 1,
.tile_size = { 32, 32 },
1,
{ PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R10G10B10A2_UNORM },
{
- .offset_B = { 0, 4 },
+ ._offset_B = { 0, 4 },
.sample_size_B = 8,
.nr_samples = 1,
.tile_size = { 32, 32 },