From b4df115d3f3c7e5a1f6105c3737d0fe9a80dfcf2 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 11 Sep 2019 10:02:37 -0700 Subject: [PATCH] freedreno/a6xx: pre-calculate userconst stateobj size The AnTuTu "garden" benchmark overflows the fixed size constbuffer stateobject, so lets be more clever and calculate (a potentially slightly pessimistic) actual size. Signed-off-by: Rob Clark --- src/freedreno/ir3/ir3_shader.h | 1 + src/gallium/drivers/freedreno/a6xx/fd6_emit.c | 6 ++++-- src/gallium/drivers/freedreno/a6xx/fd6_program.c | 16 +++++++++++++++- src/gallium/drivers/freedreno/ir3/ir3_gallium.c | 23 +++++++++++++++++++++++ src/gallium/drivers/freedreno/ir3/ir3_gallium.h | 2 ++ 5 files changed, 45 insertions(+), 3 deletions(-) diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index f6896c3..b2d08fe 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -527,6 +527,7 @@ struct ir3_ubo_analysis_state struct ir3_ubo_range range[IR3_MAX_CONSTANT_BUFFERS]; uint32_t size; uint32_t lower_count; + uint32_t cmdstream_size; /* for per-gen backend to stash required cmdstream size */ }; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index 4319b43..3e8d41c 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -928,7 +928,8 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & DIRTY_CONST) { struct fd_ringbuffer *vsconstobj = fd_submit_new_ringbuffer( - ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING); + ctx->batch->submit, vp->shader->ubo_state.cmdstream_size, + FD_RINGBUFFER_STREAMING); ir3_emit_user_consts(ctx->screen, vp, vsconstobj, &ctx->constbuf[PIPE_SHADER_VERTEX]); @@ -940,7 +941,8 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & DIRTY_CONST) { struct fd_ringbuffer *fsconstobj = fd_submit_new_ringbuffer( - ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING); + ctx->batch->submit, fp->shader->ubo_state.cmdstream_size, + FD_RINGBUFFER_STREAMING); ir3_emit_user_consts(ctx->screen, fp, fsconstobj, &ctx->constbuf[PIPE_SHADER_FRAGMENT]); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index a2acaa7..8931d40 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -45,7 +45,21 @@ create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state { struct fd_context *ctx = fd_context(pctx); struct ir3_compiler *compiler = ctx->screen->compiler; - return ir3_shader_create(compiler, cso, type, &ctx->debug, pctx->screen); + struct ir3_shader *shader = + ir3_shader_create(compiler, cso, type, &ctx->debug, pctx->screen); + unsigned packets, size; + + /* pre-calculate size required for userconst stateobj: */ + ir3_user_consts_size(&shader->ubo_state, &packets, &size); + + /* also account for UBO addresses: */ + packets += 1; + size += 2 * shader->const_state.num_ubos; + + unsigned sizedwords = (4 * packets) + size; + shader->ubo_state.cmdstream_size = sizedwords * 4; + + return shader; } static void * diff --git a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c index b72e6c7..7456a82 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c @@ -211,6 +211,29 @@ emit_const(struct fd_screen *screen, struct fd_ringbuffer *ring, offset, size, user_buffer, buffer); } +/** + * Indirectly calculates size of cmdstream needed for ir3_emit_user_consts(). + * Returns number of packets, and total size of all the payload. + * + * The value can be a worst-case, ie. some shader variants may not read all + * consts, etc. + * + * Returns size in dwords. + */ +void +ir3_user_consts_size(struct ir3_ubo_analysis_state *state, + unsigned *packets, unsigned *size) +{ + *packets = *size = 0; + + for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) { + if (state->range[i].start < state->range[i].end) { + *size += state->range[i].end - state->range[i].start; + (*packets)++; + } + } +} + void ir3_emit_user_consts(struct fd_screen *screen, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_gallium.h b/src/gallium/drivers/freedreno/ir3/ir3_gallium.h index cc8ae45..b0cf7ed 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_gallium.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_gallium.h @@ -51,6 +51,8 @@ struct fd_constbuf_stateobj; struct fd_shaderbuf_stateobj; struct fd_shaderimg_stateobj; +void ir3_user_consts_size(struct ir3_ubo_analysis_state *state, + unsigned *packets, unsigned *size); void ir3_emit_user_consts(struct fd_screen *screen, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf); void ir3_emit_ubos(struct fd_screen *screen, const struct ir3_shader_variant *v, -- 2.7.4