From f02a64dbdd2ec147167ad60357bd46d8d964290a Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 27 Jun 2016 11:28:37 -0400 Subject: [PATCH] freedreno: move more batch related tracking to fd_batch To flush batches out of order, the gmem code needs to not depend on state from fd_context (since that may apply to a more recent batch). So this all moves into batch. The one exception is the gmem/pipe/tile state itself. But this is only used from gmem code (and batches are flushed serially). The alternative would be having to re-calculate GMEM layout on every batch, even if the dimensions of the render targets are the same. Note: This opens up the possibility of pushing gmem/submit into a helper thread. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a2xx/fd2_draw.c | 6 +- src/gallium/drivers/freedreno/a2xx/fd2_emit.c | 8 +- src/gallium/drivers/freedreno/a2xx/fd2_gmem.c | 63 ++++----- src/gallium/drivers/freedreno/a3xx/fd3_context.c | 4 - src/gallium/drivers/freedreno/a3xx/fd3_context.h | 5 - src/gallium/drivers/freedreno/a3xx/fd3_draw.c | 15 +-- src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 15 ++- src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 145 +++++++++++---------- src/gallium/drivers/freedreno/a4xx/fd4_draw.c | 11 +- src/gallium/drivers/freedreno/a4xx/fd4_draw.h | 13 +- src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 16 +-- src/gallium/drivers/freedreno/a4xx/fd4_gmem.c | 121 ++++++++--------- src/gallium/drivers/freedreno/freedreno_batch.c | 22 +++- src/gallium/drivers/freedreno/freedreno_batch.h | 66 ++++++++++ src/gallium/drivers/freedreno/freedreno_context.c | 19 +-- src/gallium/drivers/freedreno/freedreno_context.h | 77 ++--------- src/gallium/drivers/freedreno/freedreno_draw.c | 82 ++++++------ src/gallium/drivers/freedreno/freedreno_draw.h | 15 ++- src/gallium/drivers/freedreno/freedreno_gmem.c | 96 +++++++------- src/gallium/drivers/freedreno/freedreno_gmem.h | 6 +- src/gallium/drivers/freedreno/freedreno_query_hw.c | 4 +- src/gallium/drivers/freedreno/freedreno_resource.c | 3 +- src/gallium/drivers/freedreno/freedreno_state.c | 6 +- 23 files changed, 420 insertions(+), 398 deletions(-) diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_draw.c b/src/gallium/drivers/freedreno/a2xx/fd2_draw.c index 030e6f6..a824018 100644 --- a/src/gallium/drivers/freedreno/a2xx/fd2_draw.c +++ b/src/gallium/drivers/freedreno/a2xx/fd2_draw.c @@ -107,7 +107,7 @@ fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) OUT_RING(ring, info->max_index); /* VGT_MAX_VTX_INDX */ OUT_RING(ring, info->min_index); /* VGT_MIN_VTX_INDX */ - fd_draw_emit(ctx, ring, ctx->primtypes[info->mode], + fd_draw_emit(ctx->batch, ring, ctx->primtypes[info->mode], IGNORE_VISIBILITY, info); OUT_PKT3(ring, CP_SET_CONSTANT, 2); @@ -126,7 +126,7 @@ fd2_clear(struct fd_context *ctx, unsigned buffers, { struct fd2_context *fd2_ctx = fd2_context(ctx); struct fd_ringbuffer *ring = ctx->batch->draw; - struct pipe_framebuffer_state *fb = &ctx->framebuffer; + struct pipe_framebuffer_state *fb = &ctx->batch->framebuffer; uint32_t reg, colr = 0; if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs) @@ -266,7 +266,7 @@ fd2_clear(struct fd_context *ctx, unsigned buffers, OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */ OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */ - fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, + fd_draw(ctx->batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL); OUT_PKT3(ring, CP_SET_CONSTANT, 2); diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c index 0327803..b3a1b3d 100644 --- a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c +++ b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c @@ -250,10 +250,10 @@ fd2_emit_state(struct fd_context *ctx, uint32_t dirty) OUT_RING(ring, xy2d(scissor->maxx, /* PA_SC_WINDOW_SCISSOR_BR */ scissor->maxy)); - ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, scissor->minx); - ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, scissor->miny); - ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, scissor->maxx); - ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, scissor->maxy); + ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx); + ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny); + ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx); + ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy); } if (dirty & FD_DIRTY_VIEWPORT) { diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c index eba2cec..6dc6396 100644 --- a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c +++ b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c @@ -56,10 +56,10 @@ static uint32_t fmt2swap(enum pipe_format format) /* transfer from gmem to system memory (ie. normal RAM) */ static void -emit_gmem2mem_surf(struct fd_context *ctx, uint32_t base, +emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base, struct pipe_surface *psurf) { - struct fd_ringbuffer *ring = ctx->ring; + struct fd_ringbuffer *ring = batch->gmem; struct fd_resource *rsc = fd_resource(psurf->texture); uint32_t swap = fmt2swap(psurf->format); @@ -90,16 +90,17 @@ emit_gmem2mem_surf(struct fd_context *ctx, uint32_t base, OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */ OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */ - fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, + fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL); } static void -fd2_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) +fd2_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile) { + struct fd_context *ctx = batch->ctx; struct fd2_context *fd2_ctx = fd2_context(ctx); - struct fd_ringbuffer *ring = ctx->ring; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct fd_ringbuffer *ring = batch->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) { { .prsc = fd2_ctx->solid_vertexbuf, .size = 48 }, @@ -159,11 +160,11 @@ fd2_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, A2XX_RB_COPY_DEST_OFFSET_X(tile->xoff) | A2XX_RB_COPY_DEST_OFFSET_Y(tile->yoff)); - if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) - emit_gmem2mem_surf(ctx, tile->bin_w * tile->bin_h, pfb->zsbuf); + if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) + emit_gmem2mem_surf(batch, tile->bin_w * tile->bin_h, pfb->zsbuf); - if (ctx->resolve & FD_BUFFER_COLOR) - emit_gmem2mem_surf(ctx, 0, pfb->cbufs[0]); + if (batch->resolve & FD_BUFFER_COLOR) + emit_gmem2mem_surf(batch, 0, pfb->cbufs[0]); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL)); @@ -173,10 +174,10 @@ fd2_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) /* transfer from system memory to gmem */ static void -emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base, +emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base, struct pipe_surface *psurf) { - struct fd_ringbuffer *ring = ctx->ring; + struct fd_ringbuffer *ring = batch->gmem; struct fd_resource *rsc = fd_resource(psurf->texture); uint32_t swiz; @@ -212,16 +213,17 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base, OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */ OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */ - fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, + fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL); } static void -fd2_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) +fd2_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile) { + struct fd_context *ctx = batch->ctx; struct fd2_context *fd2_ctx = fd2_context(ctx); - struct fd_ringbuffer *ring = ctx->ring; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct fd_ringbuffer *ring = batch->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; unsigned bin_w = tile->bin_w; unsigned bin_h = tile->bin_h; float x0, y0, x1, y1; @@ -317,26 +319,27 @@ fd2_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL)); OUT_RING(ring, 0x00000000); - if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) - emit_mem2gmem_surf(ctx, bin_w * bin_h, pfb->zsbuf); + if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) + emit_mem2gmem_surf(batch, bin_w * bin_h, pfb->zsbuf); - if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR)) - emit_mem2gmem_surf(ctx, 0, pfb->cbufs[0]); + if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) + emit_mem2gmem_surf(batch, 0, pfb->cbufs[0]); /* TODO blob driver seems to toss in a CACHE_FLUSH after each DRAW_INDX.. */ } /* before first tile */ static void -fd2_emit_tile_init(struct fd_context *ctx) +fd2_emit_tile_init(struct fd_batch *batch) { - struct fd_ringbuffer *ring = ctx->ring; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct fd_context *ctx = batch->ctx; + struct fd_ringbuffer *ring = batch->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; struct fd_gmem_stateobj *gmem = &ctx->gmem; enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); uint32_t reg; - fd2_emit_restore(ctx, ctx->ring); + fd2_emit_restore(ctx, ring); OUT_PKT3(ring, CP_SET_CONSTANT, 4); OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO)); @@ -351,10 +354,10 @@ fd2_emit_tile_init(struct fd_context *ctx) /* before mem2gmem */ static void -fd2_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile) +fd2_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile) { - struct fd_ringbuffer *ring = ctx->ring; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct fd_ringbuffer *ring = batch->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); OUT_PKT3(ring, CP_SET_CONSTANT, 2); @@ -373,10 +376,10 @@ fd2_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile) /* before IB to rendering cmds: */ static void -fd2_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile) +fd2_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile) { - struct fd_ringbuffer *ring = ctx->ring; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct fd_ringbuffer *ring = batch->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); OUT_PKT3(ring, CP_SET_CONSTANT, 2); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c index e47bbff..2ffc2e0 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c @@ -43,8 +43,6 @@ fd3_context_destroy(struct pipe_context *pctx) { struct fd3_context *fd3_ctx = fd3_context(fd_context(pctx)); - util_dynarray_fini(&fd3_ctx->rbrc_patches); - fd_bo_del(fd3_ctx->vs_pvt_mem); fd_bo_del(fd3_ctx->fs_pvt_mem); fd_bo_del(fd3_ctx->vsc_size_mem); @@ -127,8 +125,6 @@ fd3_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) if (!pctx) return NULL; - util_dynarray_init(&fd3_ctx->rbrc_patches); - fd3_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000, DRM_FREEDRENO_GEM_TYPE_KMEM); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.h b/src/gallium/drivers/freedreno/a3xx/fd3_context.h index b4c2ebe5..0735411 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_context.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.h @@ -41,11 +41,6 @@ struct fd3_context { struct fd_context base; - /* Keep track of writes to RB_RENDER_CONTROL which need to be patched - * once we know whether or not to use GMEM, and GMEM tile pitch. - */ - struct util_dynarray rbrc_patches; - struct fd_bo *vs_pvt_mem, *fs_pvt_mem; /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index 0593b25..34d782b 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -84,8 +84,7 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring, (info->mode == PIPE_PRIM_POINTS)) primtype = DI_PT_POINTLIST_PSIZE; - fd_draw_emit(ctx, ring, - primtype, + fd_draw_emit(ctx->batch, ring, primtype, emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY, info); } @@ -223,7 +222,7 @@ fd3_clear_binning(struct fd_context *ctx, unsigned dirty) fd3_emit_state(ctx, ring, &emit); fd3_emit_vertex_bufs(ring, &emit); - reset_viewport(ring, &ctx->framebuffer); + reset_viewport(ring, &ctx->batch->framebuffer); OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1); OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) | @@ -240,7 +239,7 @@ fd3_clear_binning(struct fd_context *ctx, unsigned dirty) fd_event_write(ctx, ring, PERFCOUNTER_STOP); - fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, + fd_draw(ctx->batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL); } @@ -249,7 +248,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, const union pipe_color_union *color, double depth, unsigned stencil) { struct fd3_context *fd3_ctx = fd3_context(ctx); - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; struct fd_ringbuffer *ring = ctx->batch->draw; unsigned dirty = ctx->dirty; unsigned i; @@ -270,7 +269,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, /* emit generic state now: */ fd3_emit_state(ctx, ring, &emit); - reset_viewport(ring, &ctx->framebuffer); + reset_viewport(ring, &ctx->batch->framebuffer); OUT_PKT0(ring, REG_A3XX_RB_BLEND_ALPHA, 1); OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(0xff) | @@ -278,7 +277,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); OUT_RINGP(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER), - &fd3_ctx->rbrc_patches); + &ctx->batch->rbrc_patches); if (buffers & PIPE_CLEAR_DEPTH) { OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); @@ -374,7 +373,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, fd_event_write(ctx, ring, PERFCOUNTER_STOP); - fd_draw(ctx, ring, DI_PT_RECTLIST, USE_VISIBILITY, + fd_draw(ctx->batch, ring, DI_PT_RECTLIST, USE_VISIBILITY, DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL); } diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 45185ed..eef5b52 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -516,7 +516,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, */ OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); - OUT_RINGP(ring, val, &fd3_context(ctx)->rbrc_patches); + OUT_RINGP(ring, val, &ctx->batch->rbrc_patches); } if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) { @@ -631,10 +631,10 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(scissor->maxx - 1) | A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(scissor->maxy - 1)); - ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, scissor->minx); - ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, scissor->miny); - ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, scissor->maxx); - ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, scissor->maxy); + ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx); + ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny); + ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx); + ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy); } if (dirty & FD_DIRTY_VIEWPORT) { @@ -649,7 +649,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, } if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_BLEND_DUAL)) { - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; int nr_cbufs = pfb->nr_cbufs; if (fd3_blend_stateobj(ctx->blend)->rb_render_control & A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE) @@ -673,7 +673,8 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, uint32_t i; for (i = 0; i < ARRAY_SIZE(blend->rb_mrt); i++) { - enum pipe_format format = pipe_surface_format(ctx->framebuffer.cbufs[i]); + enum pipe_format format = + pipe_surface_format(ctx->batch->framebuffer.cbufs[i]); const struct util_format_description *desc = util_format_description(format); bool is_float = util_format_is_float(format); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c index 2449a84..b9af456 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c @@ -128,9 +128,9 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, } static bool -use_hw_binning(struct fd_context *ctx) +use_hw_binning(struct fd_batch *batch) { - struct fd_gmem_stateobj *gmem = &ctx->gmem; + struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; /* workaround: combining scissor optimization and hw binning * seems problematic. Seems like we end up with a mismatch @@ -153,13 +153,14 @@ use_hw_binning(struct fd_context *ctx) } /* workaround for (hlsq?) lockup with hw binning on a3xx patchlevel 0 */ -static void update_vsc_pipe(struct fd_context *ctx); +static void update_vsc_pipe(struct fd_batch *batch); static void -emit_binning_workaround(struct fd_context *ctx) +emit_binning_workaround(struct fd_batch *batch) { + struct fd_context *ctx = batch->ctx; struct fd3_context *fd3_ctx = fd3_context(ctx); struct fd_gmem_stateobj *gmem = &ctx->gmem; - struct fd_ringbuffer *ring = ctx->ring; + struct fd_ringbuffer *ring = batch->gmem; struct fd3_emit emit = { .debug = &ctx->debug, .vtx = &fd3_ctx->solid_vbuf_state, @@ -308,12 +309,12 @@ emit_binning_workaround(struct fd_context *ctx) /* transfer from gmem to system memory (ie. normal RAM) */ static void -emit_gmem2mem_surf(struct fd_context *ctx, +emit_gmem2mem_surf(struct fd_batch *batch, enum adreno_rb_copy_control_mode mode, bool stencil, uint32_t base, struct pipe_surface *psurf) { - struct fd_ringbuffer *ring = ctx->ring; + struct fd_ringbuffer *ring = batch->gmem; struct fd_resource *rsc = fd_resource(psurf->texture); enum pipe_format format = psurf->format; if (stencil) { @@ -342,16 +343,17 @@ emit_gmem2mem_surf(struct fd_context *ctx, A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) | A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(format))); - fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, + fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL); } static void -fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) +fd3_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile) { + struct fd_context *ctx = batch->ctx; struct fd3_context *fd3_ctx = fd3_context(ctx); - struct fd_ringbuffer *ring = ctx->ring; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct fd_ringbuffer *ring = batch->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; struct fd3_emit emit = { .debug = &ctx->debug, .vtx = &fd3_ctx->solid_vbuf_state, @@ -437,23 +439,23 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) fd3_program_emit(ring, &emit, 0, NULL); fd3_emit_vertex_bufs(ring, &emit); - if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { + if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); - if (!rsc->stencil || ctx->resolve & FD_BUFFER_DEPTH) - emit_gmem2mem_surf(ctx, RB_COPY_DEPTH_STENCIL, false, + if (!rsc->stencil || batch->resolve & FD_BUFFER_DEPTH) + emit_gmem2mem_surf(batch, RB_COPY_DEPTH_STENCIL, false, ctx->gmem.zsbuf_base[0], pfb->zsbuf); - if (rsc->stencil && ctx->resolve & FD_BUFFER_STENCIL) - emit_gmem2mem_surf(ctx, RB_COPY_DEPTH_STENCIL, true, + if (rsc->stencil && batch->resolve & FD_BUFFER_STENCIL) + emit_gmem2mem_surf(batch, RB_COPY_DEPTH_STENCIL, true, ctx->gmem.zsbuf_base[1], pfb->zsbuf); } - if (ctx->resolve & FD_BUFFER_COLOR) { + if (batch->resolve & FD_BUFFER_COLOR) { for (i = 0; i < pfb->nr_cbufs; i++) { if (!pfb->cbufs[i]) continue; - if (!(ctx->resolve & (PIPE_CLEAR_COLOR0 << i))) + if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i))) continue; - emit_gmem2mem_surf(ctx, RB_COPY_RESOLVE, false, + emit_gmem2mem_surf(batch, RB_COPY_RESOLVE, false, ctx->gmem.cbuf_base[i], pfb->cbufs[i]); } } @@ -472,10 +474,10 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) /* transfer from system memory to gmem */ static void -emit_mem2gmem_surf(struct fd_context *ctx, uint32_t bases[], +emit_mem2gmem_surf(struct fd_batch *batch, uint32_t bases[], struct pipe_surface **psurf, uint32_t bufs, uint32_t bin_w) { - struct fd_ringbuffer *ring = ctx->ring; + struct fd_ringbuffer *ring = batch->gmem; struct pipe_surface *zsbufs[2]; assert(bufs > 0); @@ -502,7 +504,7 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t bases[], OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2); OUT_RING(ring, A3XX_RB_DEPTH_INFO_DEPTH_BASE(bases[0]) | A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(DEPTHX_32)); - OUT_RING(ring, A3XX_RB_DEPTH_PITCH(4 * ctx->gmem.bin_w)); + OUT_RING(ring, A3XX_RB_DEPTH_PITCH(4 * batch->ctx->gmem.bin_w)); if (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT) { OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(0), 1); @@ -523,17 +525,18 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t bases[], fd3_emit_gmem_restore_tex(ring, psurf, bufs); - fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, + fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL); } static void -fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) +fd3_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile) { + struct fd_context *ctx = batch->ctx; struct fd3_context *fd3_ctx = fd3_context(ctx); struct fd_gmem_stateobj *gmem = &ctx->gmem; - struct fd_ringbuffer *ring = ctx->ring; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct fd_ringbuffer *ring = batch->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; struct fd3_emit emit = { .debug = &ctx->debug, .vtx = &fd3_ctx->blit_vbuf_state, @@ -655,14 +658,14 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) bin_w = gmem->bin_w; bin_h = gmem->bin_h; - if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR)) { + if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) { emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1]; emit.fp = NULL; /* frag shader changed so clear cache */ fd3_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs); - emit_mem2gmem_surf(ctx, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w); + emit_mem2gmem_surf(batch, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w); } - if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { + if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { if (pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT) { /* Non-float can use a regular color write. It's split over 8-bit @@ -680,7 +683,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) } emit.fp = NULL; /* frag shader changed so clear cache */ fd3_program_emit(ring, &emit, 1, &pfb->zsbuf); - emit_mem2gmem_surf(ctx, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w); + emit_mem2gmem_surf(batch, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w); } OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); @@ -695,34 +698,33 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) } static void -patch_draws(struct fd_context *ctx, enum pc_di_vis_cull_mode vismode) +patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode) { unsigned i; - for (i = 0; i < fd_patch_num_elements(&ctx->draw_patches); i++) { - struct fd_cs_patch *patch = fd_patch_element(&ctx->draw_patches, i); + for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) { + struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i); *patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0); } - util_dynarray_resize(&ctx->draw_patches, 0); + util_dynarray_resize(&batch->draw_patches, 0); } static void -patch_rbrc(struct fd_context *ctx, uint32_t val) +patch_rbrc(struct fd_batch *batch, uint32_t val) { - struct fd3_context *fd3_ctx = fd3_context(ctx); unsigned i; - for (i = 0; i < fd_patch_num_elements(&fd3_ctx->rbrc_patches); i++) { - struct fd_cs_patch *patch = fd_patch_element(&fd3_ctx->rbrc_patches, i); + for (i = 0; i < fd_patch_num_elements(&batch->rbrc_patches); i++) { + struct fd_cs_patch *patch = fd_patch_element(&batch->rbrc_patches, i); *patch->cs = patch->val | val; } - util_dynarray_resize(&fd3_ctx->rbrc_patches, 0); + util_dynarray_resize(&batch->rbrc_patches, 0); } /* for rendering directly to system memory: */ static void -fd3_emit_sysmem_prep(struct fd_context *ctx) +fd3_emit_sysmem_prep(struct fd_batch *batch) { - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - struct fd_ringbuffer *ring = ctx->ring; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; + struct fd_ringbuffer *ring = batch->gmem; uint32_t i, pitch = 0; for (i = 0; i < pfb->nr_cbufs; i++) { @@ -732,7 +734,7 @@ fd3_emit_sysmem_prep(struct fd_context *ctx) pitch = fd_resource(psurf->texture)->slices[psurf->u.tex.level].pitch; } - fd3_emit_restore(ctx, ring); + fd3_emit_restore(batch->ctx, ring); OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1); OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) | @@ -757,15 +759,16 @@ fd3_emit_sysmem_prep(struct fd_context *ctx) A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1)); - patch_draws(ctx, IGNORE_VISIBILITY); - patch_rbrc(ctx, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch)); + patch_draws(batch, IGNORE_VISIBILITY); + patch_rbrc(batch, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch)); } static void -update_vsc_pipe(struct fd_context *ctx) +update_vsc_pipe(struct fd_batch *batch) { + struct fd_context *ctx = batch->ctx; struct fd3_context *fd3_ctx = fd3_context(ctx); - struct fd_ringbuffer *ring = ctx->ring; + struct fd_ringbuffer *ring = batch->gmem; int i; OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1); @@ -790,12 +793,12 @@ update_vsc_pipe(struct fd_context *ctx) } static void -emit_binning_pass(struct fd_context *ctx) +emit_binning_pass(struct fd_batch *batch) { + struct fd_context *ctx = batch->ctx; struct fd_gmem_stateobj *gmem = &ctx->gmem; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - struct fd_batch *batch = ctx->batch; - struct fd_ringbuffer *ring = ctx->ring; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; + struct fd_ringbuffer *ring = batch->gmem; int i; uint32_t x1 = gmem->minx; @@ -804,7 +807,7 @@ emit_binning_pass(struct fd_context *ctx) uint32_t y2 = gmem->miny + gmem->height - 1; if (ctx->screen->gpu_id == 320) { - emit_binning_workaround(ctx); + emit_binning_workaround(batch); fd_wfi(ctx, ring); OUT_PKT3(ring, CP_INVALIDATE_STATE, 1); OUT_RING(ring, 0x00007fff); @@ -912,19 +915,19 @@ emit_binning_pass(struct fd_context *ctx) fd_wfi(ctx, ring); if (ctx->screen->gpu_id == 320) { - emit_binning_workaround(ctx); + emit_binning_workaround(batch); } } /* before first tile */ static void -fd3_emit_tile_init(struct fd_context *ctx) +fd3_emit_tile_init(struct fd_batch *batch) { - struct fd_ringbuffer *ring = ctx->ring; - struct fd_gmem_stateobj *gmem = &ctx->gmem; + struct fd_ringbuffer *ring = batch->gmem; + struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; uint32_t rb_render_control; - fd3_emit_restore(ctx, ring); + fd3_emit_restore(batch->ctx, ring); /* note: use gmem->bin_w/h, the bin_w/h parameters may be truncated * at the right and bottom edge tiles @@ -933,29 +936,30 @@ fd3_emit_tile_init(struct fd_context *ctx) OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) | A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h)); - update_vsc_pipe(ctx); + update_vsc_pipe(batch); - if (use_hw_binning(ctx)) { + if (use_hw_binning(batch)) { /* emit hw binning pass: */ - emit_binning_pass(ctx); + emit_binning_pass(batch); - patch_draws(ctx, USE_VISIBILITY); + patch_draws(batch, USE_VISIBILITY); } else { - patch_draws(ctx, IGNORE_VISIBILITY); + patch_draws(batch, IGNORE_VISIBILITY); } rb_render_control = A3XX_RB_RENDER_CONTROL_ENABLE_GMEM | A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w); - patch_rbrc(ctx, rb_render_control); + patch_rbrc(batch, rb_render_control); } /* before mem2gmem */ static void -fd3_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile) +fd3_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile) { - struct fd_ringbuffer *ring = ctx->ring; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct fd_context *ctx = batch->ctx; + struct fd_ringbuffer *ring = batch->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; if (ctx->needs_rb_fbd) { fd_wfi(ctx, ring); @@ -973,12 +977,13 @@ fd3_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile) /* before IB to rendering cmds: */ static void -fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile) +fd3_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile) { + struct fd_context *ctx = batch->ctx; struct fd3_context *fd3_ctx = fd3_context(ctx); - struct fd_ringbuffer *ring = ctx->ring; + struct fd_ringbuffer *ring = batch->gmem; struct fd_gmem_stateobj *gmem = &ctx->gmem; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; uint32_t x1 = tile->xoff; uint32_t y1 = tile->yoff; @@ -1005,7 +1010,7 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, 0x00000000); } - if (use_hw_binning(ctx)) { + if (use_hw_binning(batch)) { struct fd_vsc_pipe *pipe = &ctx->pipe[tile->p]; assert(pipe->w * pipe->h); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c index e051386..8270c4f 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c @@ -68,8 +68,7 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring, (info->mode == PIPE_PRIM_POINTS)) primtype = DI_PT_POINTLIST_PSIZE; - fd4_draw_emit(ctx, ring, - primtype, + fd4_draw_emit(ctx->batch, ring, primtype, emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY, info); } @@ -233,7 +232,7 @@ fd4_clear_binning(struct fd_context *ctx, unsigned dirty) fd4_emit_state(ctx, ring, &emit); fd4_emit_vertex_bufs(ring, &emit); - reset_viewport(ring, &ctx->framebuffer); + reset_viewport(ring, &ctx->batch->framebuffer); OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2); OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_VAROUT(0) | @@ -244,7 +243,7 @@ fd4_clear_binning(struct fd_context *ctx, unsigned dirty) OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1); OUT_RING(ring, 0x00000002); - fd4_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, + fd4_draw(ctx->batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX_SIZE_IGN, 0, 0, NULL); } @@ -254,7 +253,7 @@ fd4_clear(struct fd_context *ctx, unsigned buffers, { struct fd4_context *fd4_ctx = fd4_context(ctx); struct fd_ringbuffer *ring = ctx->batch->draw; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0}; unsigned dirty = ctx->dirty; unsigned i; @@ -390,7 +389,7 @@ fd4_clear(struct fd_context *ctx, unsigned buffers, OUT_PKT3(ring, CP_UNKNOWN_1A, 1); OUT_RING(ring, 0x00000001); - fd4_draw(ctx, ring, DI_PT_RECTLIST, USE_VISIBILITY, + fd4_draw(ctx->batch, ring, DI_PT_RECTLIST, USE_VISIBILITY, DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX_SIZE_IGN, 0, 0, NULL); OUT_PKT3(ring, CP_UNKNOWN_1A, 1); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h index 5f5f6cd..57b1687 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h @@ -48,7 +48,7 @@ static inline uint32_t DRAW4(enum pc_di_primtype prim_type, } static inline void -fd4_draw(struct fd_context *ctx, struct fd_ringbuffer *ring, +fd4_draw(struct fd_batch *batch, struct fd_ringbuffer *ring, enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode, enum pc_di_src_sel src_sel, uint32_t count, @@ -70,7 +70,7 @@ fd4_draw(struct fd_context *ctx, struct fd_ringbuffer *ring, * we know if we are binning or not */ OUT_RINGP(ring, DRAW4(primtype, src_sel, idx_type, 0), - &ctx->draw_patches); + &batch->draw_patches); } else { OUT_RING(ring, DRAW4(primtype, src_sel, idx_type, vismode)); } @@ -84,7 +84,7 @@ fd4_draw(struct fd_context *ctx, struct fd_ringbuffer *ring, emit_marker(ring, 7); - fd_reset_wfi(ctx); + fd_reset_wfi(batch->ctx); } @@ -101,18 +101,19 @@ fd4_size2indextype(unsigned index_size) return INDEX4_SIZE_32_BIT; } static inline void -fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, +fd4_draw_emit(struct fd_batch *batch, struct fd_ringbuffer *ring, enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode, const struct pipe_draw_info *info) { - struct pipe_index_buffer *idx = &ctx->indexbuf; struct pipe_resource *idx_buffer = NULL; enum a4xx_index_size idx_type; enum pc_di_src_sel src_sel; uint32_t idx_size, idx_offset; if (info->indexed) { + struct pipe_index_buffer *idx = &batch->ctx->indexbuf; + assert(!idx->user_buffer); idx_buffer = idx->buffer; @@ -128,7 +129,7 @@ fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, src_sel = DI_SRC_SEL_AUTO_INDEX; } - fd4_draw(ctx, ring, primtype, vismode, src_sel, + fd4_draw(batch, ring, primtype, vismode, src_sel, info->count, info->instance_count, idx_type, idx_size, idx_offset, idx_buffer); } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 5bb712c..88e1a40 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -505,7 +505,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, emit_marker(ring, 5); if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->key.binning_pass) { - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0}; for (unsigned i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) { @@ -525,7 +525,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) { struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa); - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; uint32_t rb_alpha_control = zsa->rb_alpha_control; if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0]))) @@ -625,10 +625,10 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(scissor->minx) | A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(scissor->miny)); - ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, scissor->minx); - ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, scissor->miny); - ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, scissor->maxx); - ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, scissor->maxy); + ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx); + ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny); + ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx); + ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy); } if (dirty & FD_DIRTY_VIEWPORT) { @@ -643,7 +643,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, } if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) { - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; unsigned n = pfb->nr_cbufs; /* if we have depth/stencil, we need at least on MRT: */ if (pfb->zsbuf) @@ -663,7 +663,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) { enum pipe_format format = pipe_surface_format( - ctx->framebuffer.cbufs[i]); + ctx->batch->framebuffer.cbufs[i]); bool is_int = util_format_is_pure_integer(format); bool has_alpha = util_format_has_alpha(format); uint32_t control = blend->rb_mrt[i].control; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c index 524c35a..afd37a8 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c @@ -132,10 +132,10 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, } static bool -use_hw_binning(struct fd_context *ctx) +use_hw_binning(struct fd_batch *batch) { - struct fd_gmem_stateobj *gmem = &ctx->gmem; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; /* this seems to be a hw bug.. but this hack fixes piglit fbo-maxsize: */ if ((pfb->width > 4096) && (pfb->height > 4096)) @@ -147,10 +147,10 @@ use_hw_binning(struct fd_context *ctx) /* transfer from gmem to system memory (ie. normal RAM) */ static void -emit_gmem2mem_surf(struct fd_context *ctx, bool stencil, +emit_gmem2mem_surf(struct fd_batch *batch, bool stencil, uint32_t base, struct pipe_surface *psurf) { - struct fd_ringbuffer *ring = ctx->ring; + struct fd_ringbuffer *ring = batch->gmem; struct fd_resource *rsc = fd_resource(psurf->texture); enum pipe_format pformat = psurf->format; struct fd_resource_slice *slice; @@ -180,17 +180,18 @@ emit_gmem2mem_surf(struct fd_context *ctx, bool stencil, A4XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) | A4XX_RB_COPY_DEST_INFO_SWAP(fd4_pipe2swap(pformat))); - fd4_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, + fd4_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX_SIZE_IGN, 0, 0, NULL); } static void -fd4_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) +fd4_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile) { + struct fd_context *ctx = batch->ctx; struct fd4_context *fd4_ctx = fd4_context(ctx); struct fd_gmem_stateobj *gmem = &ctx->gmem; - struct fd_ringbuffer *ring = ctx->ring; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct fd_ringbuffer *ring = batch->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; struct fd4_emit emit = { .debug = &ctx->debug, .vtx = &fd4_ctx->solid_vbuf_state, @@ -269,22 +270,22 @@ fd4_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) fd4_program_emit(ring, &emit, 0, NULL); fd4_emit_vertex_bufs(ring, &emit); - if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { + if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); - if (!rsc->stencil || (ctx->resolve & FD_BUFFER_DEPTH)) - emit_gmem2mem_surf(ctx, false, ctx->gmem.zsbuf_base[0], pfb->zsbuf); - if (rsc->stencil && (ctx->resolve & FD_BUFFER_STENCIL)) - emit_gmem2mem_surf(ctx, true, ctx->gmem.zsbuf_base[1], pfb->zsbuf); + if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH)) + emit_gmem2mem_surf(batch, false, ctx->gmem.zsbuf_base[0], pfb->zsbuf); + if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL)) + emit_gmem2mem_surf(batch, true, ctx->gmem.zsbuf_base[1], pfb->zsbuf); } - if (ctx->resolve & FD_BUFFER_COLOR) { + if (batch->resolve & FD_BUFFER_COLOR) { unsigned i; for (i = 0; i < pfb->nr_cbufs; i++) { if (!pfb->cbufs[i]) continue; - if (!(ctx->resolve & (PIPE_CLEAR_COLOR0 << i))) + if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i))) continue; - emit_gmem2mem_surf(ctx, false, gmem->cbuf_base[i], pfb->cbufs[i]); + emit_gmem2mem_surf(batch, false, gmem->cbuf_base[i], pfb->cbufs[i]); } } @@ -298,10 +299,10 @@ fd4_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) /* transfer from system memory to gmem */ static void -emit_mem2gmem_surf(struct fd_context *ctx, uint32_t *bases, +emit_mem2gmem_surf(struct fd_batch *batch, uint32_t *bases, struct pipe_surface **bufs, uint32_t nr_bufs, uint32_t bin_w) { - struct fd_ringbuffer *ring = ctx->ring; + struct fd_ringbuffer *ring = batch->gmem; struct pipe_surface *zsbufs[2]; emit_mrt(ring, nr_bufs, bufs, bases, bin_w, false); @@ -318,17 +319,18 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t *bases, fd4_emit_gmem_restore_tex(ring, nr_bufs, bufs); - fd4_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, + fd4_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX_SIZE_IGN, 0, 0, NULL); } static void -fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) +fd4_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile) { + struct fd_context *ctx = batch->ctx; struct fd4_context *fd4_ctx = fd4_context(ctx); struct fd_gmem_stateobj *gmem = &ctx->gmem; - struct fd_ringbuffer *ring = ctx->ring; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct fd_ringbuffer *ring = batch->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; struct fd4_emit emit = { .debug = &ctx->debug, .vtx = &fd4_ctx->blit_vbuf_state, @@ -455,14 +457,14 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) bin_w = gmem->bin_w; bin_h = gmem->bin_h; - if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR)) { + if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) { emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1]; emit.fp = NULL; /* frag shader changed so clear cache */ fd4_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs); - emit_mem2gmem_surf(ctx, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w); + emit_mem2gmem_surf(batch, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w); } - if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { + if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { switch (pfb->zsbuf->format) { case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: case PIPE_FORMAT_Z32_FLOAT: @@ -493,7 +495,7 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) } emit.fp = NULL; /* frag shader changed so clear cache */ fd4_program_emit(ring, &emit, 1, &pfb->zsbuf); - emit_mem2gmem_surf(ctx, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w); + emit_mem2gmem_surf(batch, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w); } OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1); @@ -508,24 +510,24 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) } static void -patch_draws(struct fd_context *ctx, enum pc_di_vis_cull_mode vismode) +patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode) { unsigned i; - for (i = 0; i < fd_patch_num_elements(&ctx->draw_patches); i++) { - struct fd_cs_patch *patch = fd_patch_element(&ctx->draw_patches, i); + for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) { + struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i); *patch->cs = patch->val | DRAW4(0, 0, 0, vismode); } - util_dynarray_resize(&ctx->draw_patches, 0); + util_dynarray_resize(&batch->draw_patches, 0); } /* for rendering directly to system memory: */ static void -fd4_emit_sysmem_prep(struct fd_context *ctx) +fd4_emit_sysmem_prep(struct fd_batch *batch) { - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - struct fd_ringbuffer *ring = ctx->ring; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; + struct fd_ringbuffer *ring = batch->gmem; - fd4_emit_restore(ctx, ring); + fd4_emit_restore(batch->ctx, ring); OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1); OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) | @@ -552,14 +554,15 @@ fd4_emit_sysmem_prep(struct fd_context *ctx) OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1); OUT_RING(ring, 0x8); - patch_draws(ctx, IGNORE_VISIBILITY); + patch_draws(batch, IGNORE_VISIBILITY); } static void -update_vsc_pipe(struct fd_context *ctx) +update_vsc_pipe(struct fd_batch *batch) { + struct fd_context *ctx = batch->ctx; struct fd4_context *fd4_ctx = fd4_context(ctx); - struct fd_ringbuffer *ring = ctx->ring; + struct fd_ringbuffer *ring = batch->gmem; int i; OUT_PKT0(ring, REG_A4XX_VSC_SIZE_ADDRESS, 1); @@ -592,12 +595,12 @@ update_vsc_pipe(struct fd_context *ctx) } static void -emit_binning_pass(struct fd_context *ctx) +emit_binning_pass(struct fd_batch *batch) { + struct fd_context *ctx = batch->ctx; struct fd_gmem_stateobj *gmem = &ctx->gmem; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - struct fd_batch *batch = ctx->batch; - struct fd_ringbuffer *ring = ctx->ring; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; + struct fd_ringbuffer *ring = batch->gmem; int i; uint32_t x1 = gmem->minx; @@ -658,20 +661,20 @@ emit_binning_pass(struct fd_context *ctx) /* before first tile */ static void -fd4_emit_tile_init(struct fd_context *ctx) +fd4_emit_tile_init(struct fd_batch *batch) { - struct fd_ringbuffer *ring = ctx->ring; - struct fd_gmem_stateobj *gmem = &ctx->gmem; + struct fd_ringbuffer *ring = batch->gmem; + struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; - fd4_emit_restore(ctx, ring); + fd4_emit_restore(batch->ctx, ring); OUT_PKT0(ring, REG_A4XX_VSC_BIN_SIZE, 1); OUT_RING(ring, A4XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) | A4XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h)); - update_vsc_pipe(ctx); + update_vsc_pipe(batch); - if (use_hw_binning(ctx)) { + if (use_hw_binning(batch)) { OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1); OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) | A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h)); @@ -682,11 +685,11 @@ fd4_emit_tile_init(struct fd_context *ctx) 0x8); /* emit hw binning pass: */ - emit_binning_pass(ctx); + emit_binning_pass(batch); - patch_draws(ctx, USE_VISIBILITY); + patch_draws(batch, USE_VISIBILITY); } else { - patch_draws(ctx, IGNORE_VISIBILITY); + patch_draws(batch, IGNORE_VISIBILITY); } OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1); @@ -697,10 +700,11 @@ fd4_emit_tile_init(struct fd_context *ctx) /* before mem2gmem */ static void -fd4_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile) +fd4_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile) { - struct fd_ringbuffer *ring = ctx->ring; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct fd_context *ctx = batch->ctx; + struct fd_ringbuffer *ring = batch->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; struct fd_gmem_stateobj *gmem = &ctx->gmem; if (pfb->zsbuf) { @@ -752,19 +756,20 @@ fd4_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile) /* before IB to rendering cmds: */ static void -fd4_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile) +fd4_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile) { + struct fd_context *ctx = batch->ctx; struct fd4_context *fd4_ctx = fd4_context(ctx); - struct fd_ringbuffer *ring = ctx->ring; + struct fd_ringbuffer *ring = batch->gmem; struct fd_gmem_stateobj *gmem = &ctx->gmem; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; uint32_t x1 = tile->xoff; uint32_t y1 = tile->yoff; uint32_t x2 = tile->xoff + tile->bin_w - 1; uint32_t y2 = tile->yoff + tile->bin_h - 1; - if (use_hw_binning(ctx)) { + if (use_hw_binning(batch)) { struct fd_vsc_pipe *pipe = &ctx->pipe[tile->p]; assert(pipe->w * pipe->h); diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c b/src/gallium/drivers/freedreno/freedreno_batch.c index 6d17a42..1fbce43 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.c +++ b/src/gallium/drivers/freedreno/freedreno_batch.c @@ -64,16 +64,31 @@ fd_batch_create(struct fd_context *ctx) list_inithead(&batch->used_resources); + /* reset maximal bounds: */ + batch->max_scissor.minx = batch->max_scissor.miny = ~0; + batch->max_scissor.maxx = batch->max_scissor.maxy = 0; + + util_dynarray_init(&batch->draw_patches); + + if (is_a3xx(ctx->screen)) + util_dynarray_init(&batch->rbrc_patches); + return batch; } void __fd_batch_destroy(struct fd_batch *batch) { + util_copy_framebuffer_state(&batch->framebuffer, NULL); fd_ringbuffer_del(batch->draw); fd_ringbuffer_del(batch->binning); fd_ringbuffer_del(batch->gmem); + util_dynarray_fini(&batch->draw_patches); + + if (is_a3xx(batch->ctx->screen)) + util_dynarray_fini(&batch->rbrc_patches); + free(batch); } @@ -88,7 +103,12 @@ fd_batch_flush(struct fd_batch *batch) { struct fd_resource *rsc, *rsc_tmp; - fd_gmem_render_tiles(batch->ctx); + DBG("%p: needs_flush=%d", batch, batch->needs_flush); + + if (!batch->needs_flush) + return; + + fd_gmem_render_tiles(batch); /* go through all the used resources and clear their reading flag */ LIST_FOR_EACH_ENTRY_SAFE(rsc, rsc_tmp, &batch->used_resources, list) { diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h b/src/gallium/drivers/freedreno/freedreno_batch.h index 69779d8..4607250 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.h +++ b/src/gallium/drivers/freedreno/freedreno_batch.h @@ -42,8 +42,74 @@ enum fd_resource_status; struct fd_batch { struct pipe_reference reference; unsigned seqno; + struct fd_context *ctx; + /* do we need to mem2gmem before rendering. We don't, if for example, + * there was a glClear() that invalidated the entire previous buffer + * contents. Keep track of which buffer(s) are cleared, or needs + * restore. Masks of PIPE_CLEAR_* + * + * The 'cleared' bits will be set for buffers which are *entirely* + * cleared, and 'partial_cleared' bits will be set if you must + * check cleared_scissor. + */ + enum { + /* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */ + FD_BUFFER_COLOR = PIPE_CLEAR_COLOR, + FD_BUFFER_DEPTH = PIPE_CLEAR_DEPTH, + FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL, + FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL, + } cleared, partial_cleared, restore, resolve; + + bool needs_flush; + + /* To decide whether to render to system memory, keep track of the + * number of draws, and whether any of them require multisample, + * depth_test (or depth write), stencil_test, blending, and + * color_logic_Op (since those functions are disabled when by- + * passing GMEM. + */ + enum { + FD_GMEM_CLEARS_DEPTH_STENCIL = 0x01, + FD_GMEM_DEPTH_ENABLED = 0x02, + FD_GMEM_STENCIL_ENABLED = 0x04, + + FD_GMEM_MSAA_ENABLED = 0x08, + FD_GMEM_BLEND_ENABLED = 0x10, + FD_GMEM_LOGICOP_ENABLED = 0x20, + } gmem_reason; + unsigned num_draws; /* number of draws in current batch */ + + /* Track the maximal bounds of the scissor of all the draws within a + * batch. Used at the tile rendering step (fd_gmem_render_tiles(), + * mem2gmem/gmem2mem) to avoid needlessly moving data in/out of gmem. + */ + struct pipe_scissor_state max_scissor; + + /* Track the cleared scissor for color/depth/stencil, so we know + * which, if any, tiles need to be restored (mem2gmem). Only valid + * if the corresponding bit in ctx->cleared is set. + */ + struct { + struct pipe_scissor_state color, depth, stencil; + } cleared_scissor; + + /* Keep track of DRAW initiators that need to be patched up depending + * on whether we using binning or not: + */ + struct util_dynarray draw_patches; + + /* Keep track of writes to RB_RENDER_CONTROL which need to be patched + * once we know whether or not to use GMEM, and GMEM tile pitch. + * + * (only for a3xx.. but having gen specific subclasses of fd_batch + * seemed overkill for now) + */ + struct util_dynarray rbrc_patches; + + struct pipe_framebuffer_state framebuffer; + /** draw pass cmdstream: */ struct fd_ringbuffer *draw; /** binning pass cmdstream: */ diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c index 3614370..b9a1fe9 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.c +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -45,21 +45,14 @@ void fd_context_render(struct pipe_context *pctx) { struct fd_context *ctx = fd_context(pctx); - - DBG("needs_flush: %d", ctx->needs_flush); - - if (!ctx->needs_flush) - return; + struct fd_batch *new_batch; fd_batch_flush(ctx->batch); + new_batch = fd_batch_create(ctx); + util_copy_framebuffer_state(&new_batch->framebuffer, &ctx->batch->framebuffer); fd_batch_reference(&ctx->batch, NULL); - ctx->batch = fd_batch_create(ctx); - - ctx->needs_flush = false; - ctx->cleared = ctx->partial_cleared = ctx->restore = ctx->resolve = 0; - ctx->gmem_reason = 0; - ctx->num_draws = 0; + ctx->batch = new_batch; } static void @@ -120,8 +113,6 @@ fd_context_destroy(struct pipe_context *pctx) fd_prog_fini(pctx); fd_hw_query_fini(pctx); - util_dynarray_fini(&ctx->draw_patches); - if (ctx->blitter) util_blitter_destroy(ctx->blitter); @@ -190,8 +181,6 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen, fd_reset_wfi(ctx); - util_dynarray_init(&ctx->draw_patches); - util_slab_create(&ctx->transfer_pool, sizeof(struct fd_transfer), 16, UTIL_SLAB_SINGLETHREADED); diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 88e103e..9401367 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -199,42 +199,6 @@ struct fd_context { struct fd_program_stateobj blit_prog[MAX_RENDER_TARGETS]; // TODO move to screen? struct fd_program_stateobj blit_z, blit_zs; - /* do we need to mem2gmem before rendering. We don't, if for example, - * there was a glClear() that invalidated the entire previous buffer - * contents. Keep track of which buffer(s) are cleared, or needs - * restore. Masks of PIPE_CLEAR_* - * - * The 'cleared' bits will be set for buffers which are *entirely* - * cleared, and 'partial_cleared' bits will be set if you must - * check cleared_scissor. - */ - enum { - /* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */ - FD_BUFFER_COLOR = PIPE_CLEAR_COLOR, - FD_BUFFER_DEPTH = PIPE_CLEAR_DEPTH, - FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL, - FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL, - } cleared, partial_cleared, restore, resolve; - - bool needs_flush; - - /* To decide whether to render to system memory, keep track of the - * number of draws, and whether any of them require multisample, - * depth_test (or depth write), stencil_test, blending, and - * color_logic_Op (since those functions are disabled when by- - * passing GMEM. - */ - enum { - FD_GMEM_CLEARS_DEPTH_STENCIL = 0x01, - FD_GMEM_DEPTH_ENABLED = 0x02, - FD_GMEM_STENCIL_ENABLED = 0x04, - - FD_GMEM_MSAA_ENABLED = 0x08, - FD_GMEM_BLEND_ENABLED = 0x10, - FD_GMEM_LOGICOP_ENABLED = 0x20, - } gmem_reason; - unsigned num_draws; /* number of draws in current batch */ - /* Stats/counters: */ struct { @@ -244,12 +208,6 @@ struct fd_context { uint64_t batch_total, batch_sysmem, batch_gmem, batch_restore; } stats; - /* TODO get rid of this.. only used in gmem/tiling code paths (and - * NULL the rest of the time). Just leaving for now to reduce some - * churn.. - */ - struct fd_ringbuffer *ring; - /* Current batch.. the rule here is that you can deref ctx->batch * in codepaths from pipe_context entrypoints. But not in code- * paths from fd_batch_flush() (basically, the stuff that gets @@ -269,11 +227,6 @@ struct fd_context { * */ bool needs_rb_fbd; - /* Keep track of DRAW initiators that need to be patched up depending - * on whether we using binning or not: - */ - struct util_dynarray draw_patches; - struct pipe_scissor_state scissor; /* we don't have a disable/enable bit for scissor, so instead we keep @@ -282,22 +235,11 @@ struct fd_context { */ struct pipe_scissor_state disabled_scissor; - /* Track the maximal bounds of the scissor of all the draws within a - * batch. Used at the tile rendering step (fd_gmem_render_tiles(), - * mem2gmem/gmem2mem) to avoid needlessly moving data in/out of gmem. - */ - struct pipe_scissor_state max_scissor; - - /* Track the cleared scissor for color/depth/stencil, so we know - * which, if any, tiles need to be restored (mem2gmem). Only valid - * if the corresponding bit in ctx->cleared is set. - */ - struct { - struct pipe_scissor_state color, depth, stencil; - } cleared_scissor; - /* Current gmem/tiling configuration.. gets updated on render_tiles() * if out of date with current maximal-scissor/cpp: + * + * (NOTE: this is kind of related to the batch, but moving it there + * means we'd always have to recalc tiles ever batch) */ struct fd_gmem_stateobj gmem; struct fd_vsc_pipe pipe[8]; @@ -346,7 +288,6 @@ struct fd_context { struct pipe_blend_color blend_color; struct pipe_stencil_ref stencil_ref; unsigned sample_mask; - struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple stipple; struct pipe_viewport_state viewport; struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES]; @@ -361,14 +302,14 @@ struct fd_context { struct pipe_debug_callback debug; /* GMEM/tile handling fxns: */ - void (*emit_tile_init)(struct fd_context *ctx); - void (*emit_tile_prep)(struct fd_context *ctx, struct fd_tile *tile); - void (*emit_tile_mem2gmem)(struct fd_context *ctx, struct fd_tile *tile); - void (*emit_tile_renderprep)(struct fd_context *ctx, struct fd_tile *tile); - void (*emit_tile_gmem2mem)(struct fd_context *ctx, struct fd_tile *tile); + void (*emit_tile_init)(struct fd_batch *batch); + void (*emit_tile_prep)(struct fd_batch *batch, struct fd_tile *tile); + void (*emit_tile_mem2gmem)(struct fd_batch *batch, struct fd_tile *tile); + void (*emit_tile_renderprep)(struct fd_batch *batch, struct fd_tile *tile); + void (*emit_tile_gmem2mem)(struct fd_batch *batch, struct fd_tile *tile); /* optional, for GMEM bypass: */ - void (*emit_sysmem_prep)(struct fd_context *ctx); + void (*emit_sysmem_prep)(struct fd_batch *batch); /* draw: */ bool (*draw_vbo)(struct fd_context *ctx, const struct pipe_draw_info *info); diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c index 2c76333..b947762 100644 --- a/src/gallium/drivers/freedreno/freedreno_draw.c +++ b/src/gallium/drivers/freedreno/freedreno_draw.c @@ -40,26 +40,27 @@ #include "freedreno_util.h" static void -resource_read(struct fd_context *ctx, struct pipe_resource *prsc) +resource_read(struct fd_batch *batch, struct pipe_resource *prsc) { if (!prsc) return; - fd_batch_resource_used(ctx->batch, fd_resource(prsc), FD_PENDING_READ); + fd_batch_resource_used(batch, fd_resource(prsc), FD_PENDING_READ); } static void -resource_written(struct fd_context *ctx, struct pipe_resource *prsc) +resource_written(struct fd_batch *batch, struct pipe_resource *prsc) { if (!prsc) return; - fd_batch_resource_used(ctx->batch, fd_resource(prsc), FD_PENDING_WRITE); + fd_batch_resource_used(batch, fd_resource(prsc), FD_PENDING_WRITE); } static void fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) { struct fd_context *ctx = fd_context(pctx); - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct fd_batch *batch = ctx->batch; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); unsigned i, prims, buffers = 0; @@ -89,18 +90,18 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) if (fd_depth_enabled(ctx)) { buffers |= FD_BUFFER_DEPTH; - resource_written(ctx, pfb->zsbuf->texture); - ctx->gmem_reason |= FD_GMEM_DEPTH_ENABLED; + resource_written(batch, pfb->zsbuf->texture); + batch->gmem_reason |= FD_GMEM_DEPTH_ENABLED; } if (fd_stencil_enabled(ctx)) { buffers |= FD_BUFFER_STENCIL; - resource_written(ctx, pfb->zsbuf->texture); - ctx->gmem_reason |= FD_GMEM_STENCIL_ENABLED; + resource_written(batch, pfb->zsbuf->texture); + batch->gmem_reason |= FD_GMEM_STENCIL_ENABLED; } if (fd_logicop_enabled(ctx)) - ctx->gmem_reason |= FD_GMEM_LOGICOP_ENABLED; + batch->gmem_reason |= FD_GMEM_LOGICOP_ENABLED; for (i = 0; i < pfb->nr_cbufs; i++) { struct pipe_resource *surf; @@ -110,45 +111,45 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) surf = pfb->cbufs[i]->texture; - resource_written(ctx, surf); + resource_written(batch, surf); buffers |= PIPE_CLEAR_COLOR0 << i; if (surf->nr_samples > 1) - ctx->gmem_reason |= FD_GMEM_MSAA_ENABLED; + batch->gmem_reason |= FD_GMEM_MSAA_ENABLED; if (fd_blend_enabled(ctx, i)) - ctx->gmem_reason |= FD_GMEM_BLEND_ENABLED; + batch->gmem_reason |= FD_GMEM_BLEND_ENABLED; } /* Skip over buffer 0, that is sent along with the command stream */ for (i = 1; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { - resource_read(ctx, ctx->constbuf[PIPE_SHADER_VERTEX].cb[i].buffer); - resource_read(ctx, ctx->constbuf[PIPE_SHADER_FRAGMENT].cb[i].buffer); + resource_read(batch, ctx->constbuf[PIPE_SHADER_VERTEX].cb[i].buffer); + resource_read(batch, ctx->constbuf[PIPE_SHADER_FRAGMENT].cb[i].buffer); } /* Mark VBOs as being read */ for (i = 0; i < ctx->vtx.vertexbuf.count; i++) { assert(!ctx->vtx.vertexbuf.vb[i].user_buffer); - resource_read(ctx, ctx->vtx.vertexbuf.vb[i].buffer); + resource_read(batch, ctx->vtx.vertexbuf.vb[i].buffer); } /* Mark index buffer as being read */ - resource_read(ctx, ctx->indexbuf.buffer); + resource_read(batch, ctx->indexbuf.buffer); /* Mark textures as being read */ for (i = 0; i < ctx->verttex.num_textures; i++) if (ctx->verttex.textures[i]) - resource_read(ctx, ctx->verttex.textures[i]->texture); + resource_read(batch, ctx->verttex.textures[i]->texture); for (i = 0; i < ctx->fragtex.num_textures; i++) if (ctx->fragtex.textures[i]) - resource_read(ctx, ctx->fragtex.textures[i]->texture); + resource_read(batch, ctx->fragtex.textures[i]->texture); /* Mark streamout buffers as being written.. */ for (i = 0; i < ctx->streamout.num_targets; i++) if (ctx->streamout.targets[i]) - resource_written(ctx, ctx->streamout.targets[i]->buffer); + resource_written(batch, ctx->streamout.targets[i]->buffer); - ctx->num_draws++; + batch->num_draws++; prims = u_reduced_prims_for_vertices(info->mode, info->count); @@ -165,17 +166,17 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) ctx->stats.prims_generated += prims; /* any buffers that haven't been cleared yet, we need to restore: */ - ctx->restore |= buffers & (FD_BUFFER_ALL & ~ctx->cleared); + batch->restore |= buffers & (FD_BUFFER_ALL & ~batch->cleared); /* and any buffers used, need to be resolved: */ - ctx->resolve |= buffers; + batch->resolve |= buffers; - DBG("%x num_draws=%u (%s/%s)", buffers, ctx->num_draws, + DBG("%x num_draws=%u (%s/%s)", buffers, batch->num_draws, util_format_short_name(pipe_surface_format(pfb->cbufs[0])), util_format_short_name(pipe_surface_format(pfb->zsbuf))); - fd_hw_query_set_stage(ctx, ctx->batch->draw, FD_STAGE_DRAW); + fd_hw_query_set_stage(ctx, batch->draw, FD_STAGE_DRAW); if (ctx->draw_vbo(ctx, info)) - ctx->needs_flush = true; + batch->needs_flush = true; for (i = 0; i < ctx->streamout.num_targets; i++) ctx->streamout.offsets[i] += info->count; @@ -183,7 +184,7 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) if (fd_mesa_debug & FD_DBG_DDRAW) ctx->dirty = 0xffffffff; - fd_batch_check_size(ctx->batch); + fd_batch_check_size(batch); } /* TODO figure out how to make better use of existing state mechanism @@ -197,7 +198,8 @@ fd_clear(struct pipe_context *pctx, unsigned buffers, const union pipe_color_union *color, double depth, unsigned stencil) { struct fd_context *ctx = fd_context(pctx); - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct fd_batch *batch = ctx->batch; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); unsigned cleared_buffers; int i; @@ -213,38 +215,38 @@ fd_clear(struct pipe_context *pctx, unsigned buffers, * something like alpha-test causes side effects from the draw in * the depth buffer, etc) */ - cleared_buffers = buffers & (FD_BUFFER_ALL & ~ctx->restore); + cleared_buffers = buffers & (FD_BUFFER_ALL & ~batch->restore); /* do we have full-screen scissor? */ if (!memcmp(scissor, &ctx->disabled_scissor, sizeof(*scissor))) { - ctx->cleared |= cleared_buffers; + batch->cleared |= cleared_buffers; } else { - ctx->partial_cleared |= cleared_buffers; + batch->partial_cleared |= cleared_buffers; if (cleared_buffers & PIPE_CLEAR_COLOR) - ctx->cleared_scissor.color = *scissor; + batch->cleared_scissor.color = *scissor; if (cleared_buffers & PIPE_CLEAR_DEPTH) - ctx->cleared_scissor.depth = *scissor; + batch->cleared_scissor.depth = *scissor; if (cleared_buffers & PIPE_CLEAR_STENCIL) - ctx->cleared_scissor.stencil = *scissor; + batch->cleared_scissor.stencil = *scissor; } - ctx->resolve |= buffers; - ctx->needs_flush = true; + batch->resolve |= buffers; + batch->needs_flush = true; if (buffers & PIPE_CLEAR_COLOR) for (i = 0; i < pfb->nr_cbufs; i++) if (buffers & (PIPE_CLEAR_COLOR0 << i)) - resource_written(ctx, pfb->cbufs[i]->texture); + resource_written(batch, pfb->cbufs[i]->texture); if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { - resource_written(ctx, pfb->zsbuf->texture); - ctx->gmem_reason |= FD_GMEM_CLEARS_DEPTH_STENCIL; + resource_written(batch, pfb->zsbuf->texture); + batch->gmem_reason |= FD_GMEM_CLEARS_DEPTH_STENCIL; } DBG("%x depth=%f, stencil=%u (%s/%s)", buffers, depth, stencil, util_format_short_name(pipe_surface_format(pfb->cbufs[0])), util_format_short_name(pipe_surface_format(pfb->zsbuf))); - fd_hw_query_set_stage(ctx, ctx->batch->draw, FD_STAGE_CLEAR); + fd_hw_query_set_stage(ctx, batch->draw, FD_STAGE_CLEAR); ctx->clear(ctx, buffers, color, depth, stencil); diff --git a/src/gallium/drivers/freedreno/freedreno_draw.h b/src/gallium/drivers/freedreno/freedreno_draw.h index 7a970a2..60cd9c0 100644 --- a/src/gallium/drivers/freedreno/freedreno_draw.h +++ b/src/gallium/drivers/freedreno/freedreno_draw.h @@ -42,7 +42,7 @@ struct fd_ringbuffer; void fd_draw_init(struct pipe_context *pctx); static inline void -fd_draw(struct fd_context *ctx, struct fd_ringbuffer *ring, +fd_draw(struct fd_batch *batch, struct fd_ringbuffer *ring, enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode, enum pc_di_src_sel src_sel, uint32_t count, @@ -59,7 +59,7 @@ fd_draw(struct fd_context *ctx, struct fd_ringbuffer *ring, */ emit_marker(ring, 7); - if (is_a3xx_p0(ctx->screen)) { + if (is_a3xx_p0(batch->ctx->screen)) { /* dummy-draw workaround: */ OUT_PKT3(ring, CP_DRAW_INDX, 3); OUT_RING(ring, 0x00000000); @@ -81,7 +81,7 @@ fd_draw(struct fd_context *ctx, struct fd_ringbuffer *ring, * we know if we are binning or not */ OUT_RINGP(ring, DRAW(primtype, src_sel, idx_type, 0, instances), - &ctx->draw_patches); + &batch->draw_patches); } else { OUT_RING(ring, DRAW(primtype, src_sel, idx_type, vismode, instances)); } @@ -93,7 +93,7 @@ fd_draw(struct fd_context *ctx, struct fd_ringbuffer *ring, emit_marker(ring, 7); - fd_reset_wfi(ctx); + fd_reset_wfi(batch->ctx); } @@ -112,18 +112,19 @@ size2indextype(unsigned index_size) /* this is same for a2xx/a3xx, so split into helper: */ static inline void -fd_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, +fd_draw_emit(struct fd_batch *batch, struct fd_ringbuffer *ring, enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode, const struct pipe_draw_info *info) { - struct pipe_index_buffer *idx = &ctx->indexbuf; struct pipe_resource *idx_buffer = NULL; enum pc_di_index_size idx_type = INDEX_SIZE_IGN; enum pc_di_src_sel src_sel; uint32_t idx_size, idx_offset; if (info->indexed) { + struct pipe_index_buffer *idx = &batch->ctx->indexbuf; + assert(!idx->user_buffer); idx_buffer = idx->buffer; @@ -139,7 +140,7 @@ fd_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, src_sel = DI_SRC_SEL_AUTO_INDEX; } - fd_draw(ctx, ring, primtype, vismode, src_sel, + fd_draw(batch, ring, primtype, vismode, src_sel, info->count, info->instance_count - 1, idx_type, idx_size, idx_offset, idx_buffer); } diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c index 54a3247..9ca7f5f 100644 --- a/src/gallium/drivers/freedreno/freedreno_gmem.c +++ b/src/gallium/drivers/freedreno/freedreno_gmem.c @@ -67,11 +67,11 @@ * resolve. */ -static uint32_t bin_width(struct fd_context *ctx) +static uint32_t bin_width(struct fd_screen *screen) { - if (is_a4xx(ctx->screen)) + if (is_a4xx(screen)) return 1024; - if (is_a3xx(ctx->screen)) + if (is_a3xx(screen)) return 992; return 512; } @@ -103,20 +103,21 @@ total_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp[2], } static void -calculate_tiles(struct fd_context *ctx) +calculate_tiles(struct fd_batch *batch) { + struct fd_context *ctx = batch->ctx; struct fd_gmem_stateobj *gmem = &ctx->gmem; - struct pipe_scissor_state *scissor = &ctx->max_scissor; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct pipe_scissor_state *scissor = &batch->max_scissor; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; uint32_t gmem_size = ctx->screen->gmemsize_bytes; uint32_t minx, miny, width, height; uint32_t nbins_x = 1, nbins_y = 1; uint32_t bin_w, bin_h; - uint32_t max_width = bin_width(ctx); + uint32_t max_width = bin_width(ctx->screen); uint8_t cbuf_cpp[MAX_RENDER_TARGETS] = {0}, zsbuf_cpp[2] = {0}; uint32_t i, j, t, xoff, yoff; uint32_t tpp_x, tpp_y; - bool has_zs = !!(ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)); + bool has_zs = !!(batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)); int tile_n[ARRAY_SIZE(ctx->pipe)]; if (has_zs) { @@ -302,14 +303,15 @@ calculate_tiles(struct fd_context *ctx) } static void -render_tiles(struct fd_context *ctx) +render_tiles(struct fd_batch *batch) { + struct fd_context *ctx = batch->ctx; struct fd_gmem_stateobj *gmem = &ctx->gmem; int i; - ctx->emit_tile_init(ctx); + ctx->emit_tile_init(batch); - if (ctx->restore) + if (batch->restore) ctx->stats.batch_restore++; for (i = 0; i < (gmem->nbins_x * gmem->nbins_y); i++) { @@ -318,52 +320,54 @@ render_tiles(struct fd_context *ctx) DBG("bin_h=%d, yoff=%d, bin_w=%d, xoff=%d", tile->bin_h, tile->yoff, tile->bin_w, tile->xoff); - ctx->emit_tile_prep(ctx, tile); + ctx->emit_tile_prep(batch, tile); - if (ctx->restore) { - fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_MEM2GMEM); - ctx->emit_tile_mem2gmem(ctx, tile); - fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL); + if (batch->restore) { + fd_hw_query_set_stage(ctx, batch->gmem, FD_STAGE_MEM2GMEM); + ctx->emit_tile_mem2gmem(batch, tile); + fd_hw_query_set_stage(ctx, batch->gmem, FD_STAGE_NULL); } - ctx->emit_tile_renderprep(ctx, tile); + ctx->emit_tile_renderprep(batch, tile); - fd_hw_query_prepare_tile(ctx, i, ctx->ring); + fd_hw_query_prepare_tile(ctx, i, batch->gmem); /* emit IB to drawcmds: */ - ctx->emit_ib(ctx->ring, ctx->batch->draw); + ctx->emit_ib(batch->gmem, batch->draw); fd_reset_wfi(ctx); /* emit gmem2mem to transfer tile back to system memory: */ - fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_GMEM2MEM); - ctx->emit_tile_gmem2mem(ctx, tile); - fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL); + fd_hw_query_set_stage(ctx, batch->gmem, FD_STAGE_GMEM2MEM); + ctx->emit_tile_gmem2mem(batch, tile); + fd_hw_query_set_stage(ctx, batch->gmem, FD_STAGE_NULL); } } static void -render_sysmem(struct fd_context *ctx) +render_sysmem(struct fd_batch *batch) { - ctx->emit_sysmem_prep(ctx); + struct fd_context *ctx = batch->ctx; - fd_hw_query_prepare_tile(ctx, 0, ctx->ring); + ctx->emit_sysmem_prep(batch); + + fd_hw_query_prepare_tile(ctx, 0, batch->gmem); /* emit IB to drawcmds: */ - ctx->emit_ib(ctx->ring, ctx->batch->draw); + ctx->emit_ib(batch->gmem, batch->draw); fd_reset_wfi(ctx); } void -fd_gmem_render_tiles(struct fd_context *ctx) +fd_gmem_render_tiles(struct fd_batch *batch) { - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - struct fd_batch *batch = ctx->batch; + struct fd_context *ctx = batch->ctx; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; bool sysmem = false; if (ctx->emit_sysmem_prep) { - if (ctx->cleared || ctx->gmem_reason || (ctx->num_draws > 5)) { + if (batch->cleared || batch->gmem_reason || (batch->num_draws > 5)) { DBG("GMEM: cleared=%x, gmem_reason=%x, num_draws=%u", - ctx->cleared, ctx->gmem_reason, ctx->num_draws); + batch->cleared, batch->gmem_reason, batch->num_draws); } else if (!(fd_mesa_debug & FD_DBG_NOBYPASS)) { sysmem = true; } @@ -378,36 +382,28 @@ fd_gmem_render_tiles(struct fd_context *ctx) ctx->stats.batch_total++; - ctx->ring = batch->gmem; - if (sysmem) { DBG("rendering sysmem (%s/%s)", util_format_short_name(pipe_surface_format(pfb->cbufs[0])), util_format_short_name(pipe_surface_format(pfb->zsbuf))); fd_hw_query_prepare(ctx, 1); - render_sysmem(ctx); + render_sysmem(batch); ctx->stats.batch_sysmem++; } else { struct fd_gmem_stateobj *gmem = &ctx->gmem; - calculate_tiles(ctx); + calculate_tiles(batch); DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y, util_format_short_name(pipe_surface_format(pfb->cbufs[0])), util_format_short_name(pipe_surface_format(pfb->zsbuf))); fd_hw_query_prepare(ctx, gmem->nbins_x * gmem->nbins_y); - render_tiles(ctx); + render_tiles(batch); ctx->stats.batch_gmem++; } fd_ringbuffer_flush(batch->gmem); - ctx->ring = NULL; - fd_reset_wfi(ctx); - /* reset maximal bounds: */ - ctx->max_scissor.minx = ctx->max_scissor.miny = ~0; - ctx->max_scissor.maxx = ctx->max_scissor.maxy = 0; - ctx->dirty = ~0; } @@ -431,26 +427,26 @@ skip_restore(struct pipe_scissor_state *scissor, struct fd_tile *tile) * case would be a single clear. */ bool -fd_gmem_needs_restore(struct fd_context *ctx, struct fd_tile *tile, +fd_gmem_needs_restore(struct fd_batch *batch, struct fd_tile *tile, uint32_t buffers) { - if (!(ctx->restore & buffers)) + if (!(batch->restore & buffers)) return false; /* if buffers partially cleared, then slow-path to figure out * if this particular tile needs restoring: */ if ((buffers & FD_BUFFER_COLOR) && - (ctx->partial_cleared & FD_BUFFER_COLOR) && - skip_restore(&ctx->cleared_scissor.color, tile)) + (batch->partial_cleared & FD_BUFFER_COLOR) && + skip_restore(&batch->cleared_scissor.color, tile)) return false; if ((buffers & FD_BUFFER_DEPTH) && - (ctx->partial_cleared & FD_BUFFER_DEPTH) && - skip_restore(&ctx->cleared_scissor.depth, tile)) + (batch->partial_cleared & FD_BUFFER_DEPTH) && + skip_restore(&batch->cleared_scissor.depth, tile)) return false; if ((buffers & FD_BUFFER_STENCIL) && - (ctx->partial_cleared & FD_BUFFER_STENCIL) && - skip_restore(&ctx->cleared_scissor.stencil, tile)) + (batch->partial_cleared & FD_BUFFER_STENCIL) && + skip_restore(&batch->cleared_scissor.stencil, tile)) return false; return true; diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.h b/src/gallium/drivers/freedreno/freedreno_gmem.h index 38b557e..116423a 100644 --- a/src/gallium/drivers/freedreno/freedreno_gmem.h +++ b/src/gallium/drivers/freedreno/freedreno_gmem.h @@ -59,11 +59,11 @@ struct fd_gmem_stateobj { uint16_t width, height; }; -struct fd_context; +struct fd_batch; -void fd_gmem_render_tiles(struct fd_context *ctx); +void fd_gmem_render_tiles(struct fd_batch *batch); -bool fd_gmem_needs_restore(struct fd_context *ctx, struct fd_tile *tile, +bool fd_gmem_needs_restore(struct fd_batch *batch, struct fd_tile *tile, uint32_t buffers); #endif /* FREEDRENO_GMEM_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.c b/src/gallium/drivers/freedreno/freedreno_query_hw.c index 817f129..76d90d6 100644 --- a/src/gallium/drivers/freedreno/freedreno_query_hw.c +++ b/src/gallium/drivers/freedreno/freedreno_query_hw.c @@ -71,7 +71,7 @@ get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring, if (!ctx->sample_cache[idx]) { ctx->sample_cache[idx] = ctx->sample_providers[idx]->get_sample(ctx, ring); - ctx->needs_flush = true; + ctx->batch->needs_flush = true; } fd_hw_sample_reference(ctx, &samp, ctx->sample_cache[idx]); @@ -213,7 +213,7 @@ fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q, /* if app didn't actually trigger any cmdstream, then * we have nothing to do: */ - if (!ctx->needs_flush) + if (!ctx->batch->needs_flush) return true; DBG("reading query result forces flush!"); fd_context_render(&ctx->base); diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index eea53c6..20d68fe 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -821,7 +821,8 @@ fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond) util_blitter_save_depth_stencil_alpha(ctx->blitter, ctx->zsa); util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref); util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask); - util_blitter_save_framebuffer(ctx->blitter, &ctx->framebuffer); + util_blitter_save_framebuffer(ctx->blitter, + ctx->batch ? &ctx->batch->framebuffer : NULL); util_blitter_save_fragment_sampler_states(ctx->blitter, ctx->fragtex.num_samplers, (void **)ctx->fragtex.samplers); diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c index 252d153..98b56c7 100644 --- a/src/gallium/drivers/freedreno/freedreno_state.c +++ b/src/gallium/drivers/freedreno/freedreno_state.c @@ -115,13 +115,15 @@ fd_set_framebuffer_state(struct pipe_context *pctx, const struct pipe_framebuffer_state *framebuffer) { struct fd_context *ctx = fd_context(pctx); - struct pipe_framebuffer_state *cso = &ctx->framebuffer; + struct pipe_framebuffer_state *cso; - DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->needs_flush, + DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->batch->needs_flush, framebuffer->cbufs[0], framebuffer->zsbuf); fd_context_render(pctx); + cso = &ctx->batch->framebuffer; + if ((cso->width != framebuffer->width) || (cso->height != framebuffer->height)) ctx->needs_rb_fbd = true; -- 2.7.4