From 101700b150aecebfc4cc2cb18a467ca939530b04 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 31 Dec 2022 10:36:53 -0800 Subject: [PATCH] freedreno/a6xx: Pre-bake IBO descriptor sets Pre-bake IBO descriptor sets at the time that images/SSBOs are bound, and re-use the pre-baked descriptors at draw time when we emit state. This starts putting in place the state tracking we'll use when switching over to bindless IBO state, without yet changing the shaders (lowering to bindless) or changing the actual state emitted (other than switching to use the storage descriptor for image reads via isam, like tu does). Note that this even pre-bakes the iova into the descriptor, rather than relying on OUT_RELOC() to do the bo tracking, so we need to manually attach the bo to the ring. But we already require FD_BO_NO_HARDPIN for a6xx. This makes the state emit a straight memcpy, and will simplify things when it comes to generating the bindless descriptor set (which due to the desc_size field in the low bits of the BINDLESS_BASE regs would be awkward to construct as a ring rather than a bo). Signed-off-by: Rob Clark Part-of: --- src/gallium/drivers/freedreno/a6xx/fd6_context.h | 27 ++++ src/gallium/drivers/freedreno/a6xx/fd6_image.c | 187 +++++++++++++++++------ src/gallium/drivers/freedreno/freedreno_state.c | 2 +- src/gallium/drivers/freedreno/freedreno_state.h | 8 +- 4 files changed, 172 insertions(+), 52 deletions(-) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.h b/src/gallium/drivers/freedreno/a6xx/fd6_context.h index 5745d7e..37a1051 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.h @@ -34,6 +34,7 @@ #include "freedreno_resource.h" #include "ir3/ir3_shader.h" +#include "ir3/ir3_descriptor.h" #include "a6xx.xml.h" @@ -47,6 +48,22 @@ struct fd6_lrz_state { enum a6xx_ztest_mode z_mode : 2; }; +/** + * Bindless descriptor set state for a single descriptor set. + */ +struct fd6_descriptor_set { + /** + * Pre-baked descriptor state, updated when image/SSBO is bound + */ + uint32_t descriptor[IR3_BINDLESS_DESC_COUNT][FDL6_TEX_CONST_DWORDS]; + + /** + * The current seqn of the backed in resource, for detecting if the + * resource has been rebound + */ + uint16_t seqno[IR3_BINDLESS_DESC_COUNT]; +}; + struct fd6_context { struct fd_context base; @@ -91,6 +108,16 @@ struct fd6_context { uint16_t tex_seqno; struct hash_table *tex_cache; + /** + * Descriptor sets for 3d shader stages + */ + struct fd6_descriptor_set descriptor_sets[5] dt; + + /** + * Descriptor set for compute shaders + */ + struct fd6_descriptor_set cs_descriptor_set dt; + struct { /* previous lrz state, which is a function of multiple gallium * stateobjs, but doesn't necessarily change as frequently: diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_image.c b/src/gallium/drivers/freedreno/a6xx/fd6_image.c index dc50b08..e13a7f8 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_image.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_image.c @@ -33,6 +33,7 @@ #include "freedreno_state.h" #include "fd6_image.h" +#include "fd6_pack.h" #include "fd6_resource.h" #include "fd6_screen.h" #include "fd6_texture.h" @@ -45,25 +46,18 @@ fd6_emit_single_plane_descriptor(struct fd_ringbuffer *ring, struct pipe_resource *prsc, uint32_t *descriptor) { - /* If the resource isn't present (holes are allowed), zero-fill the slot. */ - if (!prsc) { - for (int i = 0; i < 16; i++) - OUT_RING(ring, 0); - return; - } - - struct fd_resource *rsc = fd_resource(prsc); - for (int i = 0; i < 4; i++) + for (int i = 0; i < FDL6_TEX_CONST_DWORDS; i++) OUT_RING(ring, descriptor[i]); + if (prsc) + fd_ringbuffer_attach_bo(ring, fd_resource(prsc)->bo); +} - OUT_RELOC(ring, rsc->bo, descriptor[4], (uint64_t)descriptor[5] << 32, 0); - - OUT_RING(ring, descriptor[6]); - - OUT_RELOC(ring, rsc->bo, descriptor[7], (uint64_t)descriptor[8] << 32, 0); - - for (int i = 9; i < FDL6_TEX_CONST_DWORDS; i++) - OUT_RING(ring, descriptor[i]); +static uint64_t +rsc_iova(struct pipe_resource *prsc, unsigned offset) +{ + if (!prsc) + return 0; + return fd_bo_get_iova(fd_resource(prsc)->bo) + offset; } static void @@ -74,35 +68,24 @@ fd6_ssbo_descriptor(struct fd_context *ctx, descriptor, ctx->screen->info->a6xx.storage_16bit ? PIPE_FORMAT_R16_UINT : PIPE_FORMAT_R32_UINT, - swiz_identity, buf->buffer_offset, /* Using relocs for addresses */ + swiz_identity, rsc_iova(buf->buffer, buf->buffer_offset), buf->buffer_size); } static void -fd6_emit_image_descriptor(struct fd_context *ctx, struct fd_ringbuffer *ring, - const struct pipe_image_view *buf, bool ibo) +fd6_image_descriptor(struct fd_context *ctx, const struct pipe_image_view *buf, + uint32_t *descriptor) { - struct fd_resource *rsc = fd_resource(buf->resource); - if (!rsc) { - for (int i = 0; i < FDL6_TEX_CONST_DWORDS; i++) - OUT_RING(ring, 0); - return; - } - if (buf->resource->target == PIPE_BUFFER) { - uint32_t descriptor[FDL6_TEX_CONST_DWORDS]; - uint32_t size = fd_clamp_buffer_size(buf->format, buf->u.buf.size, A4XX_MAX_TEXEL_BUFFER_ELEMENTS_UINT); fdl6_buffer_view_init(descriptor, buf->format, swiz_identity, - buf->u.buf.offset, /* Using relocs for addresses */ - size); - fd6_emit_single_plane_descriptor(ring, buf->resource, descriptor); + rsc_iova(buf->resource, buf->u.buf.offset), + size); } else { struct fdl_view_args args = { - /* Using relocs for addresses */ - .iova = 0, + .iova = rsc_iova(buf->resource, 0), .base_miplevel = buf->u.tex.level, .level_count = 1, @@ -127,13 +110,12 @@ fd6_emit_image_descriptor(struct fd_context *ctx, struct fd_ringbuffer *ring, args.type = FDL_VIEW_TYPE_2D; struct fdl6_view view; - const struct fdl_layout *layouts[3] = {&rsc->layout, NULL, NULL}; + struct fd_resource *rsc = fd_resource(buf->resource); + const struct fdl_layout *layouts[3] = { &rsc->layout, NULL, NULL }; fdl6_view_init(&view, layouts, &args, ctx->screen->info->a6xx.has_z24uint_s8uint); - if (ibo) - fd6_emit_single_plane_descriptor(ring, buf->resource, view.storage_descriptor); - else - fd6_emit_single_plane_descriptor(ring, buf->resource, view.descriptor); + + memcpy(descriptor, view.storage_descriptor, sizeof(view.storage_descriptor)); } } @@ -141,7 +123,13 @@ void fd6_emit_image_tex(struct fd_context *ctx, struct fd_ringbuffer *ring, const struct pipe_image_view *pimg) { - fd6_emit_image_descriptor(ctx, ring, pimg, false); + uint32_t descriptor[FDL6_TEX_CONST_DWORDS]; + if (!pimg->resource) { + memset(descriptor, 0, sizeof(descriptor)); + } else { + fd6_image_descriptor(ctx, pimg, descriptor); + } + fd6_emit_single_plane_descriptor(ring, pimg->resource, descriptor); } void @@ -153,6 +141,52 @@ fd6_emit_ssbo_tex(struct fd_context *ctx, struct fd_ringbuffer *ring, fd6_emit_single_plane_descriptor(ring, pbuf->buffer, descriptor); } +static struct fd6_descriptor_set * +descriptor_set(struct fd_context *ctx, enum pipe_shader_type shader) + assert_dt +{ + struct fd6_context *fd6_ctx = fd6_context(ctx); + + if (shader == PIPE_SHADER_COMPUTE) + return &fd6_ctx->cs_descriptor_set; + + unsigned idx = ir3_shader_descriptor_set(shader); + assert(idx < ARRAY_SIZE(fd6_ctx->descriptor_sets)); + return &fd6_ctx->descriptor_sets[idx]; +} + +static void +clear_descriptor(struct fd6_descriptor_set *set, unsigned slot) +{ + memset(set->descriptor[slot], 0, sizeof(set->descriptor[slot])); +} + +static void +validate_image_descriptor(struct fd_context *ctx, struct fd6_descriptor_set *set, + unsigned slot, struct pipe_image_view *img) +{ + struct fd_resource *rsc = fd_resource(img->resource); + + if (!rsc || (rsc->seqno == set->seqno[slot])) + return; + + fd6_image_descriptor(ctx, img, set->descriptor[slot]); + set->seqno[slot] = rsc->seqno; +} + +static void +validate_buffer_descriptor(struct fd_context *ctx, struct fd6_descriptor_set *set, + unsigned slot, struct pipe_shader_buffer *buf) +{ + struct fd_resource *rsc = fd_resource(buf->buffer); + + if (!rsc || (rsc->seqno == set->seqno[slot])) + return; + + fd6_ssbo_descriptor(ctx, buf, set->descriptor[slot]); + set->seqno[slot] = rsc->seqno; +} + /* Build combined image/SSBO "IBO" state, returns ownership of state reference */ struct fd_ringbuffer * fd6_build_ibo_state(struct fd_context *ctx, const struct ir3_shader_variant *v, @@ -160,48 +194,90 @@ fd6_build_ibo_state(struct fd_context *ctx, const struct ir3_shader_variant *v, { struct fd_shaderbuf_stateobj *bufso = &ctx->shaderbuf[shader]; struct fd_shaderimg_stateobj *imgso = &ctx->shaderimg[shader]; + struct fd6_descriptor_set *set = descriptor_set(ctx, shader); struct fd_ringbuffer *state = fd_submit_new_ringbuffer( ctx->batch->submit, - ir3_shader_nibo(v) * 16 * 4, + ir3_shader_nibo(v) * FDL6_TEX_CONST_DWORDS * 4, FD_RINGBUFFER_STREAMING); assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT); - uint32_t descriptor[FDL6_TEX_CONST_DWORDS]; for (unsigned i = 0; i < v->num_ssbos; i++) { - fd6_ssbo_descriptor(ctx, &bufso->sb[i], descriptor); - fd6_emit_single_plane_descriptor(state, bufso->sb[i].buffer, descriptor); + unsigned slot = i + IR3_BINDLESS_SSBO_OFFSET; + validate_buffer_descriptor(ctx, set, slot, &bufso->sb[i]); + fd6_emit_single_plane_descriptor(state, bufso->sb[i].buffer, + set->descriptor[slot]); } for (unsigned i = v->num_ssbos; i < v->num_ibos; i++) { - fd6_emit_image_descriptor(ctx, state, &imgso->si[i - v->num_ssbos], true); + unsigned n = i - v->num_ssbos; + unsigned slot = n + IR3_BINDLESS_IMAGE_OFFSET; + validate_image_descriptor(ctx, set, slot, &imgso->si[n]); + fd6_emit_single_plane_descriptor(state, imgso->si[n].resource, + set->descriptor[slot]); } return state; } static void +fd6_set_shader_buffers(struct pipe_context *pctx, enum pipe_shader_type shader, + unsigned start, unsigned count, + const struct pipe_shader_buffer *buffers, + unsigned writable_bitmask) + in_dt +{ + struct fd_context *ctx = fd_context(pctx); + struct fd_shaderbuf_stateobj *so = &ctx->shaderbuf[shader]; + struct fd6_descriptor_set *set = descriptor_set(ctx, shader); + + fd_set_shader_buffers(pctx, shader, start, count, buffers, writable_bitmask); + + for (unsigned i = 0; i < count; i++) { + unsigned n = i + start; + unsigned slot = n + IR3_BINDLESS_SSBO_OFFSET; + struct pipe_shader_buffer *buf = &so->sb[n]; + + /* invalidate descriptor: */ + set->seqno[slot] = 0; + + if (!buf->buffer) { + clear_descriptor(set, slot); + continue; + } + + /* update descriptor: */ + validate_buffer_descriptor(ctx, set, slot, buf); + } +} + +static void fd6_set_shader_images(struct pipe_context *pctx, enum pipe_shader_type shader, unsigned start, unsigned count, unsigned unbind_num_trailing_slots, - const struct pipe_image_view *images) in_dt + const struct pipe_image_view *images) + in_dt { struct fd_context *ctx = fd_context(pctx); struct fd_shaderimg_stateobj *so = &ctx->shaderimg[shader]; + struct fd6_descriptor_set *set = descriptor_set(ctx, shader); fd_set_shader_images(pctx, shader, start, count, unbind_num_trailing_slots, images); - if (!images) - return; - for (unsigned i = 0; i < count; i++) { unsigned n = i + start; + unsigned slot = n + IR3_BINDLESS_IMAGE_OFFSET; struct pipe_image_view *buf = &so->si[n]; - if (!buf->resource) + /* invalidate descriptor: */ + set->seqno[slot] = 0; + + if (!buf->resource) { + clear_descriptor(set, slot); continue; + } struct fd_resource *rsc = fd_resource(buf->resource); @@ -223,11 +299,22 @@ fd6_set_shader_images(struct pipe_context *pctx, enum pipe_shader_type shader, } else { fd6_validate_format(ctx, rsc, buf->format); } + + /* update descriptor: */ + validate_image_descriptor(ctx, set, slot, buf); + } + + for (unsigned i = 0; i < unbind_num_trailing_slots; i++) { + unsigned slot = i + start + count + IR3_BINDLESS_IMAGE_OFFSET; + + set->seqno[slot] = 0; + clear_descriptor(set, slot); } } void fd6_image_init(struct pipe_context *pctx) { + pctx->set_shader_buffers = fd6_set_shader_buffers; pctx->set_shader_images = fd6_set_shader_images; } diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c index 03d2300..5f9573c 100644 --- a/src/gallium/drivers/freedreno/freedreno_state.c +++ b/src/gallium/drivers/freedreno/freedreno_state.c @@ -154,7 +154,7 @@ fd_set_constant_buffer(struct pipe_context *pctx, enum pipe_shader_type shader, } } -static void +void fd_set_shader_buffers(struct pipe_context *pctx, enum pipe_shader_type shader, unsigned start, unsigned count, const struct pipe_shader_buffer *buffers, diff --git a/src/gallium/drivers/freedreno/freedreno_state.h b/src/gallium/drivers/freedreno/freedreno_state.h index 1890583..60121d2 100644 --- a/src/gallium/drivers/freedreno/freedreno_state.h +++ b/src/gallium/drivers/freedreno/freedreno_state.h @@ -61,10 +61,16 @@ fd_depth_clamp_enabled(struct fd_context *ctx) assert_dt ctx->rasterizer->depth_clip_far); } +void fd_set_shader_buffers(struct pipe_context *pctx, + enum pipe_shader_type shader, + unsigned start, unsigned count, + const struct pipe_shader_buffer *buffers, + unsigned writable_bitmask) in_dt; + void fd_set_shader_images(struct pipe_context *pctx, enum pipe_shader_type shader, unsigned start, unsigned count, unsigned unbind_num_trailing_slots, - const struct pipe_image_view *images); + const struct pipe_image_view *images) in_dt; void fd_set_framebuffer_state(struct pipe_context *pctx, const struct pipe_framebuffer_state *framebuffer) in_dt; -- 2.7.4