Flag day change to replace the previous hardcoded background/end-of-tile shaders
and the API-style load/store_output in fragment shaders with the generated
shaders and lowered *_agx intrinsics. This gets us working non-UNORM8 render
targets and working MRT. It's also a step in the direction of working MSAA but
that needs a lot more work, since the multisampling programming model on AGX is
quite different from any of the APIs (including Metal).
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19871>
}
static agx_instr *
-agx_emit_fragment_out(agx_builder *b, nir_intrinsic_instr *instr)
+agx_emit_local_store_pixel(agx_builder *b, nir_intrinsic_instr *instr)
{
- nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
- unsigned loc = sem.location;
- assert(sem.dual_source_blend_index == 0 && "todo: dual-source blending");
- assert(loc == FRAG_RESULT_DATA0 && "todo: MRT");
- unsigned rt = (loc - FRAG_RESULT_DATA0);
-
/* TODO: Reverse-engineer interactions with MRT */
if (b->shader->key->fs.ignore_tib_dependencies) {
assert(b->shader->nir->info.internal && "only for clear shaders");
b->shader->did_writeout = true;
return agx_st_tile(b, agx_src_index(&instr->src[0]),
- b->shader->key->fs.tib_formats[rt],
- nir_intrinsic_write_mask(instr));
+ agx_src_index(&instr->src[1]),
+ agx_format_for_pipe(nir_intrinsic_format(instr)),
+ nir_intrinsic_write_mask(instr),
+ nir_intrinsic_base(instr));
}
static void
-agx_emit_load_tile(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr)
+agx_emit_local_load_pixel(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr)
{
- nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
- unsigned loc = sem.location;
- assert(sem.dual_source_blend_index == 0 && "dual src ld_tile is nonsense");
- assert(loc == FRAG_RESULT_DATA0 && "todo: MRT");
- unsigned rt = (loc - FRAG_RESULT_DATA0);
-
/* TODO: Reverse-engineer interactions with MRT */
assert(!b->shader->key->fs.ignore_tib_dependencies && "invalid usage");
agx_writeout(b, 0x0008);
b->shader->out->reads_tib = true;
unsigned nr_comps = nir_dest_num_components(instr->dest);
- agx_ld_tile_to(b, dest, b->shader->key->fs.tib_formats[rt],
- BITFIELD_MASK(nr_comps));
+ agx_ld_tile_to(b, dest, agx_src_index(&instr->src[0]),
+ agx_format_for_pipe(nir_intrinsic_format(instr)),
+ BITFIELD_MASK(nr_comps),
+ nir_intrinsic_base(instr));
agx_emit_cached_split(b, dest, nr_comps);
}
return NULL;
case nir_intrinsic_store_output:
- if (stage == MESA_SHADER_FRAGMENT)
- return agx_emit_fragment_out(b, instr);
- else if (stage == MESA_SHADER_VERTEX)
- return agx_emit_store_vary(b, instr);
- else
- unreachable("Unsupported shader stage");
+ assert(stage == MESA_SHADER_VERTEX);
+ return agx_emit_store_vary(b, instr);
+
+ case nir_intrinsic_store_local_pixel_agx:
+ assert(stage == MESA_SHADER_FRAGMENT);
+ return agx_emit_local_store_pixel(b, instr);
- case nir_intrinsic_load_output:
+ case nir_intrinsic_load_local_pixel_agx:
assert(stage == MESA_SHADER_FRAGMENT);
- agx_emit_load_tile(b, dst, instr);
+ agx_emit_local_load_pixel(b, dst, instr);
return NULL;
case nir_intrinsic_load_ubo:
};
struct agx_fs_shader_key {
- enum agx_format tib_formats[AGX_MAX_RTS];
-
/* Normally, access to the tilebuffer must be guarded by appropriate fencing
* instructions to ensure correct results in the presence of out-of-order
* hardware optimizations. However, specially dispatched clear shaders are
uint32_t component;
uint32_t channels;
uint32_t bfi_mask;
+ uint16_t pixel_offset;
enum agx_sr sr;
enum agx_icond icond;
enum agx_fcond fcond;
- enum agx_format format;
enum agx_round round;
enum agx_lod_mode lod_mode;
struct agx_block *target;
};
+ /* For local access */
+ enum agx_format format;
+
/* For load varying */
bool perspective : 1;
MASK = immediate("mask")
BFI_MASK = immediate("bfi_mask")
LOD_MODE = immediate("lod_mode", "enum agx_lod_mode")
+PIXEL_OFFSET = immediate("pixel_offset")
DIM = enum("dim", {
0: '1d',
op("sample_mask", (0x7fc1, 0xffff, 6, _), dests = 0, srcs = 1, can_eliminate = False)
-# Essentially same encoding
-op("ld_tile", (0x49, 0x7F, 8, _), dests = 1, srcs = 0, imms = [FORMAT, MASK], can_reorder = False)
+# Essentially same encoding. Last source is the sample mask
+op("ld_tile", (0x49, 0x7F, 8, _), dests = 1, srcs = 1,
+ imms = [FORMAT, MASK, PIXEL_OFFSET], can_reorder = False)
-op("st_tile", (0x09, 0x7F, 8, _), dests = 0, srcs = 1,
- can_eliminate = False, imms = [FORMAT, MASK])
+op("st_tile", (0x09, 0x7F, 8, _), dests = 0, srcs = 2,
+ can_eliminate = False, imms = [FORMAT, MASK, PIXEL_OFFSET])
for (name, exact) in [("any", 0xC000), ("none", 0xC200)]:
op("jmp_exec_" + name, (exact, (1 << 16) - 1, 6, _), dests = 0, srcs = 0,
/* cmpselsrc takes integer immediates only */
if (s >= 2 && I->op == AGX_OPCODE_FCMPSEL) float_src = false;
+ if (I->op == AGX_OPCODE_ST_TILE && s == 0) continue;
if (float_src) {
bool fp16 = (def->dest[0].size == AGX_SIZE_16);
agx_optimizer_fmov(defs, I);
/* Inline immediates if we can. TODO: systematic */
- if (I->op != AGX_OPCODE_ST_VARY && I->op != AGX_OPCODE_ST_TILE &&
- I->op != AGX_OPCODE_COLLECT && I->op != AGX_OPCODE_TEXTURE_SAMPLE &&
- I->op != AGX_OPCODE_TEXTURE_LOAD && I->op != AGX_OPCODE_UNIFORM_STORE &&
+ if (I->op != AGX_OPCODE_ST_VARY &&
+ I->op != AGX_OPCODE_COLLECT &&
+ I->op != AGX_OPCODE_TEXTURE_SAMPLE &&
+ I->op != AGX_OPCODE_TEXTURE_LOAD &&
+ I->op != AGX_OPCODE_UNIFORM_STORE &&
I->op != AGX_OPCODE_BLOCK_IMAGE_STORE)
agx_optimizer_inline_imm(defs, I, info.nr_srcs, info.is_float);
}
{
bool load = (I->op == AGX_OPCODE_LD_TILE);
unsigned D = agx_pack_alu_dst(load ? I->dest[0] : I->src[0]);
- unsigned rt = 0; /* TODO */
assert(I->mask < 0x10);
+ assert(I->pixel_offset < 0x200);
+
+ agx_index sample_index = load ? I->src[0] : I->src[1];
+ assert(sample_index.type == AGX_INDEX_REGISTER ||
+ sample_index.type == AGX_INDEX_IMMEDIATE);
+ assert(sample_index.size == AGX_SIZE_16);
+ unsigned St = (sample_index.type == AGX_INDEX_REGISTER) ? 1 : 0;
+ unsigned S = sample_index.value;
+ assert(S < 0x100);
uint64_t raw =
- 0x09 |
- (load ? (1 << 6) : 0) |
+ agx_opcodes_info[I->op].encoding.exact |
((uint64_t) (D & BITFIELD_MASK(8)) << 7) |
+ (St << 22) |
((uint64_t) (I->format) << 24) |
- ((uint64_t) (rt) << 32) |
+ ((uint64_t) (I->pixel_offset & BITFIELD_MASK(7)) << 28) |
(load ? (1ull << 35) : 0) |
((uint64_t) (I->mask) << 36) |
- ((uint64_t) 0x0380FC << 40) |
+ ((uint64_t) (I->pixel_offset >> 7) << 40) |
+ ((uint64_t) (S & BITFIELD_MASK(6)) << 42) |
+ ((uint64_t) (S >> 6) << 56) |
(((uint64_t) (D >> 8)) << 60);
unsigned size = 8;
pthread_mutex_t bo_map_lock;
struct util_sparse_array bo_map;
-
- /* Fixed shaders */
- struct {
- struct agx_bo *bo;
- uint32_t clear;
- uint32_t store;
- } internal;
-
- struct {
- struct agx_bo *bo;
- uint32_t format[AGX_NUM_FORMATS];
- } reload;
};
bool
-/*
+/*
* Copyright (C) 2021 Alyssa Rosenzweig
* Copyright (C) 2020-2021 Collabora, Ltd.
* Copyright (C) 2014 Broadcom
#include "asahi/compiler/agx_compile.h"
#include "gallium/auxiliary/util/u_blitter.h"
-static void
-agx_build_reload_shader(struct agx_device *dev)
-{
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
- &agx_nir_options, "agx_reload");
-
- nir_variable *out = nir_variable_create(b.shader, nir_var_shader_out,
- glsl_vector_type(GLSL_TYPE_FLOAT, 4), "output");
- out->data.location = FRAG_RESULT_DATA0;
-
- nir_ssa_def *fragcoord = nir_load_frag_coord(&b);
- nir_ssa_def *coord = nir_channels(&b, fragcoord, 0x3);
-
- nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1);
- tex->dest_type = nir_type_float32;
- tex->sampler_dim = GLSL_SAMPLER_DIM_RECT;
- tex->op = nir_texop_tex;
- tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(coord);
- tex->coord_components = 2;
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
- nir_builder_instr_insert(&b, &tex->instr);
- nir_store_var(&b, out, &tex->dest.ssa, 0xFF);
-
- unsigned offset = 0;
- unsigned bo_size = 4096;
-
- struct agx_bo *bo = agx_bo_create(dev, bo_size, AGX_MEMORY_TYPE_SHADER);
- dev->reload.bo = bo;
-
- for (unsigned i = 0; i < AGX_NUM_FORMATS; ++i) {
- struct util_dynarray binary;
- util_dynarray_init(&binary, NULL);
-
- nir_shader *s = nir_shader_clone(NULL, b.shader);
- struct agx_shader_info info;
-
- struct agx_shader_key key = {
- .fs.tib_formats[0] = i,
- .fs.ignore_tib_dependencies = true,
- };
-
- agx_preprocess_nir(s);
- agx_compile_shader_nir(s, &key, NULL, &binary, &info);
-
- assert(offset + binary.size < bo_size);
- memcpy(((uint8_t *) bo->ptr.cpu) + offset, binary.data, binary.size);
-
- dev->reload.format[i] = bo->ptr.gpu + offset;
- offset += ALIGN_POT(binary.size, 128);
-
- util_dynarray_fini(&binary);
- }
-}
-
void
agx_blitter_save(struct agx_context *ctx, struct blitter_context *blitter,
bool render_cond)
agx_blitter_save(ctx, ctx->blitter, info->render_condition_enable);
util_blitter_blit(ctx->blitter, info);
}
-
-/* We need some fixed shaders for common rendering tasks. When colour buffer
- * reload is not in use, a shader is used to clear a particular colour. At the
- * end of rendering a tile, a shader is used to write it out. These shaders are
- * too trivial to go through the compiler at this stage. */
-#define AGX_STOP \
- 0x88, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, \
- 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00 \
-
-#define AGX_BLEND \
- 0x09, 0x00, 0x00, 0x04, 0xf0, 0xfc, 0x80, 0x03
-
-/* Clears the tilebuffer, where u6-u7 are preloaded with the FP16 clear colour
-
- 0: 7e018c098040 bitop_mov r0, u6
- 6: 7e058e098000 bitop_mov r1, u7
- c: 09000004f0fc8003 TODO.blend
- */
-
-static uint8_t shader_clear[] = {
- 0x7e, 0x01, 0x8c, 0x09, 0x80, 0x40,
- 0x7e, 0x05, 0x8e, 0x09, 0x80, 0x00,
- AGX_BLEND,
- AGX_STOP
-};
-
-static uint8_t shader_store[] = {
- 0x7e, 0x00, 0x04, 0x09, 0x80, 0x00,
- 0xb1, 0x80, 0x00, 0x80, 0x00, 0x4a, 0x00, 0x00, 0x0a, 0x00,
- AGX_STOP
-};
-
-void
-agx_internal_shaders(struct agx_device *dev)
-{
- unsigned clear_offset = 0;
- unsigned store_offset = 1024;
-
- struct agx_bo *bo = agx_bo_create(dev, 4096, AGX_MEMORY_TYPE_SHADER);
- memcpy(((uint8_t *) bo->ptr.cpu) + clear_offset, shader_clear, sizeof(shader_clear));
- memcpy(((uint8_t *) bo->ptr.cpu) + store_offset, shader_store, sizeof(shader_store));
-
- dev->internal.bo = bo;
- dev->internal.clear = bo->ptr.gpu + clear_offset;
- dev->internal.store = bo->ptr.gpu + store_offset;
-
- agx_build_reload_shader(dev);
-}
assert(scissor_state == NULL && "we don't support PIPE_CAP_CLEAR_SCISSORED");
/* Fast clears configure the batch */
- if (fastclear & PIPE_CLEAR_COLOR0)
- memcpy(batch->clear_color, color->f, sizeof(color->f));
+ for (unsigned rt = 0; rt < PIPE_MAX_COLOR_BUFS; ++rt) {
+ if (!(fastclear & (PIPE_CLEAR_COLOR0 << rt)))
+ continue;
+
+ static_assert(sizeof(color->f) == 16, "mismatched structure");
+
+ batch->uploaded_clear_color[rt] =
+ agx_pool_upload_aligned(&batch->pool, color->f, sizeof(color->f), 16);
+ }
if (fastclear & PIPE_CLEAR_DEPTH)
batch->clear_depth = depth;
uint8_t stop[5 + 64] = { 0x00, 0x00, 0x00, 0xc0, 0x00 };
memcpy(batch->encoder_current, stop, sizeof(stop));
- /* Emit the commandbuffer */
- uint64_t pipeline_clear = 0, pipeline_reload = 0;
- bool clear_pipeline_textures = false;
-
- uint16_t clear_colour[4] = {
- _mesa_float_to_half(batch->clear_color[0]),
- _mesa_float_to_half(batch->clear_color[1]),
- _mesa_float_to_half(batch->clear_color[2]),
- _mesa_float_to_half(batch->clear_color[3])
- };
-
- pipeline_clear = agx_build_clear_pipeline(batch,
- dev->internal.clear,
- agx_pool_upload(&batch->pool, clear_colour, sizeof(clear_colour)));
-
- if (batch->key.cbufs[0]) {
- enum agx_format internal = AGX_FORMAT_U8NORM /* other formats broken */;
- uint32_t shader = dev->reload.format[internal];
-
- pipeline_reload = agx_build_reload_pipeline(batch, shader,
- batch->key.cbufs[0]);
- }
-
- if (batch->key.cbufs[0] && !(batch->clear & PIPE_CLEAR_COLOR0)) {
- clear_pipeline_textures = true;
- pipeline_clear = pipeline_reload;
- }
+ uint64_t pipeline_background = agx_build_meta(batch, false, false);
+ uint64_t pipeline_background_partial = agx_build_meta(batch, false, true);
+ uint64_t pipeline_store = agx_build_meta(batch, true, false);
- uint64_t pipeline_store = 0;
-
- if (batch->key.cbufs[0]) {
- pipeline_store =
- agx_build_store_pipeline(batch,
- dev->internal.store,
- agx_batch_upload_pbe(batch, 0));
- }
+ bool clear_pipeline_textures = false;
for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
struct pipe_surface *surf = batch->key.cbufs[i];
if (surf && surf->texture) {
struct agx_resource *rt = agx_resource(surf->texture);
BITSET_SET(rt->data_valid, surf->u.tex.level);
+
+ if (!(batch->clear & (PIPE_CLEAR_COLOR0 << i)))
+ clear_pipeline_textures = true;
}
}
agx_batch_add_bo(batch, batch->encoder);
agx_batch_add_bo(batch, batch->scissor.bo);
agx_batch_add_bo(batch, batch->depth_bias.bo);
- agx_batch_add_bo(batch, dev->internal.bo);
- agx_batch_add_bo(batch, dev->reload.bo);
unsigned handle_count =
agx_batch_num_bo(batch) +
encoder_id,
batch->scissor.bo->ptr.gpu,
batch->depth_bias.bo->ptr.gpu,
- pipeline_clear,
- pipeline_reload,
+ pipeline_background,
+ pipeline_background_partial,
pipeline_store,
clear_pipeline_textures,
batch->clear,
free(handles);
- agx_submit_cmdbuf(dev, dev->cmdbuf.handle, dev->memmap.handle, dev->queue.id);
-
agx_wait_queue(dev->queue);
if (dev->debug & AGX_DBG_TRACE) {
pctx->invalidate_resource = agx_invalidate_resource;
agx_init_state_functions(pctx);
+ agx_meta_init(&ctx->meta, agx_device(screen), ctx);
ctx->blitter = util_blitter_create(pctx);
U_TRANSFER_HELPER_MSAA_MAP |
U_TRANSFER_HELPER_Z24_IN_Z32F);
- agx_internal_shaders(&agx_screen->dev);
-
return screen;
}
}
agx_preprocess_nir(nir);
+
+ if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+ struct agx_tilebuffer_layout tib =
+ agx_build_tilebuffer_layout(key->rt_formats, key->nr_cbufs, 1);
+
+ agx_nir_lower_tilebuffer(nir, &tib);
+ }
+
agx_compile_shader_nir(nir, &key->base, debug, &binary, &compiled->info);
if (binary.size) {
}
case MESA_SHADER_FRAGMENT:
key.nr_cbufs = 1;
- key.base.fs.tib_formats[0] = AGX_FORMAT_U8NORM;
+ key.rt_formats[0] = PIPE_FORMAT_R8G8B8A8_UNORM;
break;
default:
unreachable("Unknown shader stage in shader-db precompile");
for (unsigned i = 0; i < key.nr_cbufs; ++i) {
struct pipe_surface *surf = batch->key.cbufs[i];
- if (surf) {
- enum pipe_format fmt = surf->format;
- key.rt_formats[i] = fmt;
- key.base.fs.tib_formats[i] = AGX_FORMAT_U8NORM /* other formats broken */;
- } else {
- key.rt_formats[i] = PIPE_FORMAT_NONE;
- }
+ key.rt_formats[i] = surf ? surf->format : PIPE_FORMAT_NONE;
}
memcpy(&key.blend, ctx->blend, sizeof(key.blend));
return agx_usc_fini(&b);
}
-/* Internal pipelines (TODO: refactor?) */
uint64_t
-agx_build_clear_pipeline(struct agx_batch *batch, uint32_t code, uint64_t clear_buf)
+agx_build_meta(struct agx_batch *batch, bool store, bool partial_render)
{
- struct agx_usc_builder b =
- agx_alloc_usc_control(&batch->pipeline_pool, 1);
+ struct agx_context *ctx = batch->ctx;
- agx_usc_pack(&b, UNIFORM, cfg) {
- cfg.start_halfs = (6 * 2);
- cfg.size_halfs = 4;
- cfg.buffer = clear_buf;
- }
+ /* Construct the key */
+ struct agx_meta_key key = {
+ .tib = batch->tilebuffer_layout
+ };
- agx_usc_pack(&b, SHARED, cfg) {
- cfg.uses_shared_memory = true;
- cfg.layout = AGX_SHARED_LAYOUT_32X32;
- cfg.sample_stride_in_8_bytes = 1;
- cfg.bytes_per_threadgroup = 32 * 256;
- }
+ for (unsigned rt = 0; rt < PIPE_MAX_COLOR_BUFS; ++rt) {
+ struct pipe_surface *surf = batch->key.cbufs[rt];
- agx_usc_pack(&b, SHADER, cfg) {
- cfg.code = code;
- cfg.unk_2 = 3;
+ if (surf == NULL)
+ continue;
+
+ if (store) {
+ /* TODO: Suppress stores to discarded render targets */
+ key.op[rt] = AGX_META_OP_STORE;
+ } else {
+ bool load = !(batch->clear & (PIPE_CLEAR_COLOR0 << rt));
+
+ /* The background program used for partial renders must always load
+ * whatever was stored in the mid-frame end-of-tile program.
+ */
+ load |= partial_render;
+
+ key.op[rt] = load ? AGX_META_OP_LOAD : AGX_META_OP_CLEAR;
+ }
}
- agx_usc_pack(&b, REGISTERS, cfg) cfg.register_count = 8;
- agx_usc_pack(&b, NO_PRESHADER, cfg);
+ /* Get the shader */
+ struct agx_meta_shader *shader = agx_get_meta_shader(&ctx->meta, &key);
+ agx_batch_add_bo(batch, shader->bo);
- return agx_usc_fini(&b);
-}
+ /* Begin building the pipeline */
+ struct agx_usc_builder b =
+ agx_alloc_usc_control(&batch->pipeline_pool, 1 + PIPE_MAX_COLOR_BUFS);
+
+ for (unsigned rt = 0; rt < PIPE_MAX_COLOR_BUFS; ++rt) {
+ if (key.op[rt] == AGX_META_OP_LOAD) {
+ /* Each reloaded render target is textured */
+ struct agx_ptr texture = agx_pool_alloc_aligned(&batch->pool, AGX_TEXTURE_LENGTH, 64);
+ struct pipe_surface *surf = batch->key.cbufs[rt];
+ assert(surf != NULL && "cannot load nonexistant attachment");
+
+ struct agx_resource *rsrc = agx_resource(surf->texture);
+
+ agx_pack_texture(texture.cpu, rsrc, surf->format, &(struct pipe_sampler_view) {
+ /* To reduce shader variants, we always use a 2D texture. For
+ * reloads of arrays and cube maps, we map a single layer as a 2D
+ * image.
+ */
+ .target = PIPE_TEXTURE_2D,
+ .swizzle_r = PIPE_SWIZZLE_X,
+ .swizzle_g = PIPE_SWIZZLE_Y,
+ .swizzle_b = PIPE_SWIZZLE_Z,
+ .swizzle_a = PIPE_SWIZZLE_W,
+ .u.tex = {
+ .first_layer = surf->u.tex.first_layer,
+ .last_layer = surf->u.tex.last_layer,
+ .first_level = surf->u.tex.level,
+ .last_level = surf->u.tex.level
+ }
+ });
+
+ agx_usc_pack(&b, TEXTURE, cfg) {
+ cfg.start = rt;
+ cfg.count = 1;
+ cfg.buffer = texture.gpu;
+ }
+ } else if (key.op[rt] == AGX_META_OP_CLEAR) {
+ assert(batch->uploaded_clear_color[rt] && "set when cleared");
+ agx_usc_uniform(&b, 8 * rt, 8, batch->uploaded_clear_color[rt]);
+ } else if (key.op[rt] == AGX_META_OP_STORE) {
+ agx_usc_pack(&b, TEXTURE, cfg) {
+ cfg.start = rt;
+ cfg.count = 1;
+ cfg.buffer = agx_batch_upload_pbe(batch, rt);
+ }
+ }
+ }
-uint64_t
-agx_build_reload_pipeline(struct agx_batch *batch, uint32_t code, struct pipe_surface *surf)
-{
+ /* All render targets share a sampler */
struct agx_ptr sampler = agx_pool_alloc_aligned(&batch->pool, AGX_SAMPLER_LENGTH, 64);
- struct agx_ptr texture = agx_pool_alloc_aligned(&batch->pool, AGX_TEXTURE_LENGTH, 64);
agx_pack(sampler.cpu, SAMPLER, cfg) {
cfg.magnify_linear = true;
cfg.unk_3 = 0;
}
- agx_pack(texture.cpu, TEXTURE, cfg) {
- struct agx_resource *rsrc = agx_resource(surf->texture);
- unsigned layer = surf->u.tex.first_layer;
- const struct util_format_description *desc =
- util_format_description(surf->format);
-
- /* To reduce shader variants, we always use a 2D texture. For reloads of
- * arrays and cube maps, we map a single layer as a 2D image.
- */
- cfg.dimension = AGX_TEXTURE_DIMENSION_2D;
- cfg.layout = agx_translate_layout(rsrc->layout.tiling);
- cfg.channels = agx_pixel_format[surf->format].channels;
- cfg.type = agx_pixel_format[surf->format].type;
- cfg.swizzle_r = agx_channel_from_pipe(desc->swizzle[0]);
- cfg.swizzle_g = agx_channel_from_pipe(desc->swizzle[1]);
- cfg.swizzle_b = agx_channel_from_pipe(desc->swizzle[2]);
- cfg.swizzle_a = agx_channel_from_pipe(desc->swizzle[3]);
- cfg.width = surf->width;
- cfg.height = surf->height;
- cfg.first_level = surf->u.tex.level;
- cfg.last_level = surf->u.tex.level;
- cfg.unk_mipmapped = rsrc->mipmapped;
- cfg.srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
- cfg.address = agx_map_texture_gpu(rsrc, layer);
-
- if (rsrc->layout.tiling == AIL_TILING_LINEAR)
- cfg.stride = ail_get_linear_stride_B(&rsrc->layout, surf->u.tex.level) - 16;
- else
- cfg.unk_tiled = true;
- }
-
- struct agx_usc_builder b =
- agx_alloc_usc_control(&batch->pipeline_pool, 2);
-
- agx_usc_pack(&b, TEXTURE, cfg) {
- cfg.start = 0;
- cfg.count = 1;
- cfg.buffer = texture.gpu;
- }
-
agx_usc_pack(&b, SAMPLER, cfg) {
cfg.start = 0;
cfg.count = 1;
cfg.buffer = sampler.gpu;
}
- agx_usc_pack(&b, SHARED, cfg) {
- cfg.uses_shared_memory = true;
- cfg.layout = AGX_SHARED_LAYOUT_32X32;
- cfg.sample_stride_in_8_bytes = 1;
- cfg.sample_count = 1;
- cfg.bytes_per_threadgroup = 8 * 32 * 32;
- }
+ agx_usc_tilebuffer(&b, &batch->tilebuffer_layout);
agx_usc_pack(&b, SHADER, cfg) {
- cfg.code = code;
- cfg.unk_2 = 3;
+ cfg.code = shader->ptr;
+ cfg.unk_2 = 0;
}
agx_usc_pack(&b, REGISTERS, cfg) cfg.register_count = 256;
return agx_usc_fini(&b);
}
-uint64_t
-agx_build_store_pipeline(struct agx_batch *batch, uint32_t code,
- uint64_t render_target)
-{
- struct agx_usc_builder b = agx_alloc_usc_control(&batch->pipeline_pool, 2);
-
- agx_usc_pack(&b, TEXTURE, cfg) {
- cfg.start = 0;
- cfg.count = 1;
- cfg.buffer = render_target;
- }
-
- uint32_t unk[] = { 0, ~0 };
-
- agx_usc_pack(&b, UNIFORM, cfg) {
- cfg.start_halfs = 4;
- cfg.size_halfs = 4;
- cfg.buffer = agx_pool_upload_aligned(&batch->pool, unk, sizeof(unk), 16);
- }
-
- agx_usc_pack(&b, SHARED, cfg) {
- cfg.uses_shared_memory = true;
- cfg.layout = AGX_SHARED_LAYOUT_32X32;
- cfg.sample_stride_in_8_bytes = 1;
- cfg.bytes_per_threadgroup = 32 * 256;
- }
-
- agx_usc_pack(&b, SHADER, cfg) cfg.code = code;
- agx_usc_pack(&b, REGISTERS, cfg) cfg.register_count = 8;
- agx_usc_pack(&b, NO_PRESHADER, cfg);
-
- return agx_usc_fini(&b);
-}
-
void
agx_batch_init_state(struct agx_batch *batch)
{
#include "compiler/nir/nir_lower_blend.h"
#include "util/hash_table.h"
#include "util/bitset.h"
+#include "agx_meta.h"
struct agx_streamout_target {
struct pipe_stream_output_target base;
/* Base of uploaded texture descriptors */
uint64_t textures;
- float clear_color[4];
+ uint64_t uploaded_clear_color[PIPE_MAX_COLOR_BUFS];
double clear_depth;
unsigned clear_stencil;
/* Map of agx_resource to agx_batch that writes that resource */
struct hash_table *writer;
+
+ struct agx_meta_cache meta;
};
static inline struct agx_context *
agx_batch_is_active(struct agx_batch *batch);
uint64_t
-agx_build_clear_pipeline(struct agx_batch *batch, uint32_t code, uint64_t clear_buf);
-
-uint64_t
-agx_build_store_pipeline(struct agx_batch *batch, uint32_t code,
- uint64_t render_target);
-
-uint64_t
-agx_build_reload_pipeline(struct agx_batch *batch, uint32_t code, struct pipe_surface *surf);
-
-uint64_t
agx_batch_upload_pbe(struct agx_batch *batch, unsigned rt);
/* Add a BO to a batch. This needs to be amortized O(1) since it's called in
void agx_blit(struct pipe_context *pipe,
const struct pipe_blit_info *info);
-void agx_internal_shaders(struct agx_device *dev);
-
/* Batch logic */
void
agx_batch_init_state(struct agx_batch *batch);
+uint64_t
+agx_build_meta(struct agx_batch *batch, bool store, bool partial_render);
+
#endif