From 478ae974a15a729f8f3a779948603fe39e0614fc Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 20 May 2021 17:09:14 -0400 Subject: [PATCH] panfrost: Eliminate reserve_* functions We always want to reserve _something_, so reserve what we need at batch creation time and stop trying to re-reserve in a zillion places after. This has a neglible (<128 bytes per batch) increase in memory usage for compute-only workloads, but given the amount of simplication, that's a fair tradeoff. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/gallium/drivers/panfrost/pan_blit.c | 2 +- src/gallium/drivers/panfrost/pan_compute.c | 5 -- src/gallium/drivers/panfrost/pan_context.c | 25 +++------ src/gallium/drivers/panfrost/pan_job.c | 88 +++++++----------------------- src/gallium/drivers/panfrost/pan_job.h | 3 - 5 files changed, 31 insertions(+), 92 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_blit.c b/src/gallium/drivers/panfrost/pan_blit.c index 3ebce13..c49f821 100644 --- a/src/gallium/drivers/panfrost/pan_blit.c +++ b/src/gallium/drivers/panfrost/pan_blit.c @@ -298,7 +298,7 @@ panfrost_blit(struct pipe_context *pipe, mali_ptr tiler = pan_is_bifrost(dev) ? panfrost_batch_get_bifrost_tiler(batch, ~0) : 0; pan_blit(&bctx, &batch->pool, &batch->scoreboard, - panfrost_batch_reserve_tls(batch, false), tiler); + batch->tls.gpu, tiler); /* We don't want this batch to interfere with subsequent draw * calls, but we want to keep it in the list of pending batches diff --git a/src/gallium/drivers/panfrost/pan_compute.c b/src/gallium/drivers/panfrost/pan_compute.c index ad473b3..a93a3ea 100644 --- a/src/gallium/drivers/panfrost/pan_compute.c +++ b/src/gallium/drivers/panfrost/pan_compute.c @@ -103,11 +103,6 @@ panfrost_launch_grid(struct pipe_context *pipe, struct panfrost_device *dev = pan_device(pipe->screen); struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); - /* Reserve a thread storage descriptor now (will be emitted at submit - * time). - */ - panfrost_batch_reserve_tls(batch, true); - ctx->compute_grid = info; struct panfrost_ptr t = diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 238bca7..186b2d5 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -227,8 +227,7 @@ static void panfrost_draw_emit_vertex(struct panfrost_batch *batch, const struct pipe_draw_info *info, void *invocation_template, - mali_ptr shared_mem, mali_ptr vs_vary, - mali_ptr varyings, + mali_ptr vs_vary, mali_ptr varyings, mali_ptr attribs, mali_ptr attrib_bufs, void *job) { @@ -252,7 +251,7 @@ panfrost_draw_emit_vertex(struct panfrost_batch *batch, cfg.attribute_buffers = attrib_bufs; cfg.varyings = vs_vary; cfg.varying_buffers = vs_vary ? varyings : 0; - cfg.thread_storage = shared_mem; + cfg.thread_storage = batch->tls.gpu; pan_emit_draw_descs(batch, &cfg, PIPE_SHADER_VERTEX); } @@ -357,8 +356,7 @@ panfrost_draw_emit_tiler(struct panfrost_batch *batch, const struct pipe_draw_info *info, const struct pipe_draw_start_count_bias *draw, void *invocation_template, - mali_ptr shared_mem, mali_ptr indices, - mali_ptr fs_vary, mali_ptr varyings, + mali_ptr indices, mali_ptr fs_vary, mali_ptr varyings, mali_ptr pos, mali_ptr psiz, void *job) { struct panfrost_context *ctx = batch->ctx; @@ -438,7 +436,7 @@ panfrost_draw_emit_tiler(struct panfrost_batch *batch, cfg.viewport = batch->viewport; cfg.varyings = fs_vary; cfg.varying_buffers = fs_vary ? varyings : 0; - cfg.thread_storage = shared_mem; + cfg.thread_storage = batch->tls.gpu; /* For all primitives but lines DRAW.flat_shading_vertex must * be set to 0 and the provoking vertex is selected with the @@ -516,8 +514,6 @@ panfrost_direct_draw(struct panfrost_batch *batch, unsigned vertex_count = ctx->vertex_count; - mali_ptr shared_mem = panfrost_batch_reserve_tls(batch, false); - unsigned min_index = 0, max_index = 0; mali_ptr indices = 0; @@ -571,9 +567,9 @@ panfrost_direct_draw(struct panfrost_batch *batch, attribs = panfrost_emit_vertex_data(batch, &attrib_bufs); /* Fire off the draw itself */ - panfrost_draw_emit_vertex(batch, info, &invocation, shared_mem, + panfrost_draw_emit_vertex(batch, info, &invocation, vs_vary, varyings, attribs, attrib_bufs, vertex.cpu); - panfrost_draw_emit_tiler(batch, info, draw, &invocation, shared_mem, indices, + panfrost_draw_emit_tiler(batch, info, draw, &invocation, indices, fs_vary, varyings, pos, psiz, tiler.cpu); panfrost_emit_vertex_tiler_jobs(batch, &vertex, &tiler); @@ -603,8 +599,6 @@ panfrost_indirect_draw(struct panfrost_batch *batch, ctx->drawid = drawid_offset; ctx->indirect_draw = true; - mali_ptr shared_mem = panfrost_batch_reserve_tls(batch, false); - struct panfrost_ptr tiler = panfrost_pool_alloc_aligned(&batch->pool, pan_is_bifrost(dev) ? @@ -660,10 +654,9 @@ panfrost_indirect_draw(struct panfrost_batch *batch, static struct mali_invocation_packed invocation; /* Fire off the draw itself */ - panfrost_draw_emit_vertex(batch, info, &invocation, shared_mem, - vs_vary, varyings, attribs, attrib_bufs, - vertex.cpu); - panfrost_draw_emit_tiler(batch, info, draw, &invocation, shared_mem, + panfrost_draw_emit_vertex(batch, info, &invocation, vs_vary, varyings, + attribs, attrib_bufs, vertex.cpu); + panfrost_draw_emit_tiler(batch, info, draw, &invocation, index_buf ? index_buf->ptr.gpu : 0, fs_vary, varyings, pos, psiz, tiler.cpu); diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c index 0bbd27b..ce2df7c 100644 --- a/src/gallium/drivers/panfrost/pan_job.c +++ b/src/gallium/drivers/panfrost/pan_job.c @@ -94,6 +94,25 @@ panfrost_batch_init(struct panfrost_context *ctx, PAN_BO_INVISIBLE, 65536, "Varyings", false, true); panfrost_batch_add_fbo_bos(batch); + + /* Reserve the framebuffer and local storage descriptors */ + batch->framebuffer = + (dev->quirks & MIDGARD_SFBD) ? + panfrost_pool_alloc_desc(&batch->pool, SINGLE_TARGET_FRAMEBUFFER) : + panfrost_pool_alloc_desc_aggregate(&batch->pool, + PAN_DESC(MULTI_TARGET_FRAMEBUFFER), + PAN_DESC(ZS_CRC_EXTENSION), + PAN_DESC_ARRAY(MAX2(key->nr_cbufs, 1), RENDER_TARGET)); + + /* Add the MFBD tag now, other tags will be added at submit-time */ + if (!(dev->quirks & MIDGARD_SFBD)) + batch->framebuffer.gpu |= MALI_FBD_TAG_IS_MFBD; + + /* On Midgard, the TLS is embedded in the FB descriptor */ + if (pan_is_bifrost(dev)) + batch->tls = panfrost_pool_alloc_desc(&batch->pool, LOCAL_STORAGE); + else + batch->tls = batch->framebuffer; } static void @@ -729,66 +748,6 @@ panfrost_batch_to_fb_info(const struct panfrost_batch *batch, } } -static mali_ptr -panfrost_batch_reserve_framebuffer(struct panfrost_batch *batch) -{ - struct panfrost_device *dev = pan_device(batch->ctx->base.screen); - - if (batch->framebuffer.gpu) - return batch->framebuffer.gpu; - - /* If we haven't, reserve space for a framebuffer descriptor */ - - struct pan_image_view rts[8]; - struct pan_image_view zs; - struct pan_image_view s; - struct pan_fb_info fb; - - panfrost_batch_to_fb_info(batch, &fb, rts, &zs, &s, true); - - unsigned zs_crc_count = pan_fbd_has_zs_crc_ext(dev, &fb) ? 1 : 0; - unsigned rt_count = MAX2(fb.rt_count, 1); - batch->framebuffer = - (dev->quirks & MIDGARD_SFBD) ? - panfrost_pool_alloc_desc(&batch->pool, SINGLE_TARGET_FRAMEBUFFER) : - panfrost_pool_alloc_desc_aggregate(&batch->pool, - PAN_DESC(MULTI_TARGET_FRAMEBUFFER), - PAN_DESC_ARRAY(zs_crc_count, ZS_CRC_EXTENSION), - PAN_DESC_ARRAY(rt_count, RENDER_TARGET)); - - /* Add the MFBD tag now, other tags will be added when emitting the - * FB desc. - */ - if (!(dev->quirks & MIDGARD_SFBD)) - batch->framebuffer.gpu |= MALI_FBD_TAG_IS_MFBD; - - return batch->framebuffer.gpu; -} - -mali_ptr -panfrost_batch_reserve_tls(struct panfrost_batch *batch, bool compute) -{ - struct panfrost_device *dev = pan_device(batch->ctx->base.screen); - - /* If we haven't, reserve space for the thread storage descriptor */ - - if (batch->tls.gpu) - return batch->tls.gpu; - - if (pan_is_bifrost(dev) || compute) { - batch->tls = panfrost_pool_alloc_desc(&batch->pool, LOCAL_STORAGE); - } else { - /* On Midgard, the FB descriptor contains a thread storage - * descriptor, and tiler jobs need more than thread storage - * info. Let's point to the FB desc in that case. - */ - panfrost_batch_reserve_framebuffer(batch); - batch->tls = batch->framebuffer; - } - - return batch->tls.gpu; -} - static void panfrost_batch_draw_wallpaper(struct panfrost_batch *batch, struct pan_fb_info *fb) @@ -962,15 +921,11 @@ panfrost_batch_submit(struct panfrost_batch *batch, if (!batch->scoreboard.first_job && !batch->clear) goto out; - if (batch->scoreboard.first_tiler || batch->clear) - panfrost_batch_reserve_framebuffer(batch); - struct pan_fb_info fb; struct pan_image_view rts[8], zs, s; panfrost_batch_to_fb_info(batch, &fb, rts, &zs, &s, false); - panfrost_batch_reserve_tls(batch, false); panfrost_batch_draw_wallpaper(batch, &fb); @@ -981,13 +936,12 @@ panfrost_batch_submit(struct panfrost_batch *batch, } /* Now that all draws are in, we can finally prepare the - * FBD for the batch */ + * FBD for the batch (if there is one). */ panfrost_emit_tls(batch); - panfrost_emit_tile_map(batch, &fb); - if (batch->framebuffer.gpu) + if (batch->scoreboard.first_tiler || batch->clear) panfrost_emit_fbd(batch, &fb); ret = panfrost_batch_submit_jobs(batch, &fb, in_sync, out_sync); diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h index fa920ec..1a6dc0b 100644 --- a/src/gallium/drivers/panfrost/pan_job.h +++ b/src/gallium/drivers/panfrost/pan_job.h @@ -189,7 +189,4 @@ panfrost_batch_intersection_scissor(struct panfrost_batch *batch, mali_ptr panfrost_batch_get_bifrost_tiler(struct panfrost_batch *batch, unsigned vertex_count); -mali_ptr -panfrost_batch_reserve_tls(struct panfrost_batch *batch, bool compute); - #endif -- 2.7.4