From 5ac0c2c5c949bd1e30b3da4d530be0e1f6d7968b Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Wed, 14 Oct 2020 12:37:01 -0400 Subject: [PATCH] zink: always do full-fb clears in renderpass begin when possible previously if any of the pending clears required an explicit clear then we'd clear them explicitly, but with this patch we're shifting the first pending clear into the renderpass begin if possible and then applying the remaining clears on top of that in order to reduce gpu operations Reviewed-by: Dave Airlie Part-of: --- src/gallium/drivers/zink/zink_clear.c | 19 +++++++++++++------ src/gallium/drivers/zink/zink_clear.h | 9 +++++++++ src/gallium/drivers/zink/zink_context.c | 25 ++++++++++++++----------- 3 files changed, 36 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/zink/zink_clear.c b/src/gallium/drivers/zink/zink_clear.c index bd767e3..c4b5d77 100644 --- a/src/gallium/drivers/zink/zink_clear.c +++ b/src/gallium/drivers/zink/zink_clear.c @@ -267,7 +267,7 @@ zink_clear_framebuffer(struct zink_context *ctx, unsigned clear_buffers) if (num_clears != zink_fb_clear_count(fb_clear)) goto out; /* compare all the clears to determine if we can batch these buffers together */ - for (int j = 0; j < num_clears; j++) { + for (int j = !zink_fb_clear_first_needs_explicit(fb_clear); j < num_clears; j++) { struct zink_framebuffer_clear_data *a = zink_fb_clear_element(color_clear, j); struct zink_framebuffer_clear_data *b = zink_fb_clear_element(fb_clear, j); /* scissors don't match, fire this one off */ @@ -292,7 +292,7 @@ zink_clear_framebuffer(struct zink_context *ctx, unsigned clear_buffers) if (num_clears != zink_fb_clear_count(fb_clear)) goto out; /* compare all the clears to determine if we can batch these buffers together */ - for (int j = 0; j < zink_fb_clear_count(color_clear); j++) { + for (int j = !zink_fb_clear_first_needs_explicit(fb_clear); j < zink_fb_clear_count(color_clear); j++) { struct zink_framebuffer_clear_data *a = zink_fb_clear_element(color_clear, j); struct zink_framebuffer_clear_data *b = zink_fb_clear_element(fb_clear, j); /* scissors don't match, fire this one off */ @@ -307,7 +307,7 @@ zink_clear_framebuffer(struct zink_context *ctx, unsigned clear_buffers) out: if (to_clear) { if (num_clears) { - for (int j = 0; j < num_clears; j++) { + for (int j = !zink_fb_clear_first_needs_explicit(color_clear); j < num_clears; j++) { struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(color_clear, j); struct zink_framebuffer_clear_data *zsclear = NULL; if (zs_clear) @@ -319,7 +319,7 @@ out: zsclear ? zsclear->zs.stencil : 0); } } else { - for (int j = 0; j < zink_fb_clear_count(zs_clear); j++) { + for (int j = !zink_fb_clear_first_needs_explicit(zs_clear); j < zink_fb_clear_count(zs_clear); j++) { struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(zs_clear, j); zink_clear(&ctx->base, to_clear, clear->has_scissor ? &clear->scissor : NULL, @@ -408,8 +408,15 @@ zink_fb_clear_needs_explicit(struct zink_framebuffer_clear *fb_clear) { if (zink_fb_clear_count(fb_clear) != 1) return true; - struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(fb_clear, 0); - return clear->has_scissor || clear->conditional; + return zink_fb_clear_element_needs_explicit(zink_fb_clear_element(fb_clear, 0)); +} + +bool +zink_fb_clear_first_needs_explicit(struct zink_framebuffer_clear *fb_clear) +{ + if (!zink_fb_clear_count(fb_clear)) + return false; + return zink_fb_clear_element_needs_explicit(zink_fb_clear_element(fb_clear, 0)); } static void diff --git a/src/gallium/drivers/zink/zink_clear.h b/src/gallium/drivers/zink/zink_clear.h index 34bc015..ca5547f 100644 --- a/src/gallium/drivers/zink/zink_clear.h +++ b/src/gallium/drivers/zink/zink_clear.h @@ -70,6 +70,9 @@ zink_clear_texture(struct pipe_context *ctx, bool zink_fb_clear_needs_explicit(struct zink_framebuffer_clear *fb_clear); +bool +zink_fb_clear_first_needs_explicit(struct zink_framebuffer_clear *fb_clear); + void zink_clear_framebuffer(struct zink_context *ctx, unsigned clear_buffers); @@ -92,6 +95,12 @@ zink_fb_clear_reset(struct zink_framebuffer_clear *fb_clear) fb_clear->enabled = false; } +static inline bool +zink_fb_clear_element_needs_explicit(struct zink_framebuffer_clear_data *clear) +{ + return clear->has_scissor || clear->conditional; +} + void zink_clear_apply_conditionals(struct zink_context *ctx); diff --git a/src/gallium/drivers/zink/zink_context.c b/src/gallium/drivers/zink/zink_context.c index 3d7ed35..bacaca6 100644 --- a/src/gallium/drivers/zink/zink_context.c +++ b/src/gallium/drivers/zink/zink_context.c @@ -740,7 +740,7 @@ get_render_pass(struct zink_context *ctx) state.rts[i].format = zink_get_format(screen, surf->format); state.rts[i].samples = surf->texture->nr_samples > 0 ? surf->texture->nr_samples : VK_SAMPLE_COUNT_1_BIT; - state.rts[i].clear_color = ctx->fb_clears[i].enabled && !zink_fb_clear_needs_explicit(&ctx->fb_clears[i]); + state.rts[i].clear_color = ctx->fb_clears[i].enabled && !zink_fb_clear_first_needs_explicit(&ctx->fb_clears[i]); clears |= !!state.rts[i].clear_color ? BITFIELD_BIT(i) : 0; } else { state.rts[i].format = VK_FORMAT_R8_UINT; @@ -756,10 +756,10 @@ get_render_pass(struct zink_context *ctx) state.rts[fb->nr_cbufs].format = zsbuf->format; state.rts[fb->nr_cbufs].samples = zsbuf->base.nr_samples > 0 ? zsbuf->base.nr_samples : VK_SAMPLE_COUNT_1_BIT; state.rts[fb->nr_cbufs].clear_color = fb_clear->enabled && - !zink_fb_clear_needs_explicit(fb_clear) && + !zink_fb_clear_first_needs_explicit(fb_clear) && (zink_fb_clear_element(fb_clear, 0)->zs.bits & PIPE_CLEAR_DEPTH); state.rts[fb->nr_cbufs].clear_stencil = fb_clear->enabled && - !zink_fb_clear_needs_explicit(fb_clear) && + !zink_fb_clear_first_needs_explicit(fb_clear) && (zink_fb_clear_element(fb_clear, 0)->zs.bits & PIPE_CLEAR_STENCIL); clears |= state.rts[fb->nr_cbufs].clear_color || state.rts[fb->nr_cbufs].clear_stencil ? BITFIELD_BIT(fb->nr_cbufs) : 0;; state.num_rts++; @@ -869,12 +869,14 @@ zink_begin_render_pass(struct zink_context *ctx, struct zink_batch *batch) if (!fb_state->cbufs[i] || !ctx->fb_clears[i].enabled) continue; /* these need actual clear calls inside the rp */ + struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(&ctx->fb_clears[i], 0); if (zink_fb_clear_needs_explicit(&ctx->fb_clears[i])) { clear_buffers |= (PIPE_CLEAR_COLOR0 << i); - continue; + if (zink_fb_clear_count(&ctx->fb_clears[i]) < 2 || + zink_fb_clear_element_needs_explicit(clear)) + continue; } - /* we now know there's only one clear */ - struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(&ctx->fb_clears[i], 0); + /* we now know there's one clear that can be done here */ if (clear->color.srgb) { clears[i].color.float32[0] = util_format_srgb_to_linear_float(clear->color.color.f[0]); clears[i].color.float32[1] = util_format_srgb_to_linear_float(clear->color.color.f[1]); @@ -891,17 +893,18 @@ zink_begin_render_pass(struct zink_context *ctx, struct zink_batch *batch) } if (fb_state->zsbuf && ctx->fb_clears[PIPE_MAX_COLOR_BUFS].enabled) { struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[PIPE_MAX_COLOR_BUFS]; - if (zink_fb_clear_needs_explicit(fb_clear)) { - for (int j = 0; j < zink_fb_clear_count(fb_clear); j++) - clear_buffers |= zink_fb_clear_element(fb_clear, j)->zs.bits; - } else { - struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(fb_clear, 0); + struct zink_framebuffer_clear_data *clear = zink_fb_clear_element(fb_clear, 0); + if (!zink_fb_clear_element_needs_explicit(clear)) { clears[fb_state->nr_cbufs].depthStencil.depth = clear->zs.depth; clears[fb_state->nr_cbufs].depthStencil.stencil = clear->zs.stencil; rpbi.clearValueCount = fb_state->nr_cbufs + 1; clear_validate |= BITFIELD_BIT(fb_state->nr_cbufs); assert(ctx->framebuffer->rp->state.clears); } + if (zink_fb_clear_needs_explicit(fb_clear)) { + for (int j = !zink_fb_clear_element_needs_explicit(clear); j < zink_fb_clear_count(fb_clear); j++) + clear_buffers |= zink_fb_clear_element(fb_clear, j)->zs.bits; + } } assert(clear_validate == ctx->framebuffer->rp->state.clears); rpbi.pClearValues = &clears[0]; -- 2.7.4