From 9688166bd9c3e12c74c55b857ad0dbb62b28da9e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 6 Apr 2015 13:17:58 -0700 Subject: [PATCH] vc4: Move the render job state into a separate structure. This is a preparation step for having multiple jobs being queued up at the same time. --- src/gallium/drivers/vc4/vc4_blit.c | 41 +++---- src/gallium/drivers/vc4/vc4_cl.c | 18 ++-- src/gallium/drivers/vc4/vc4_cl.h | 13 +-- src/gallium/drivers/vc4/vc4_context.c | 43 +++++--- src/gallium/drivers/vc4/vc4_context.h | 51 +++++---- src/gallium/drivers/vc4/vc4_draw.c | 107 ++++++++++--------- src/gallium/drivers/vc4/vc4_emit.c | 19 ++-- src/gallium/drivers/vc4/vc4_job.c | 182 +++++++++++++++----------------- src/gallium/drivers/vc4/vc4_program.c | 3 +- src/gallium/drivers/vc4/vc4_simulator.c | 3 +- src/gallium/drivers/vc4/vc4_state.c | 25 ++--- src/gallium/drivers/vc4/vc4_uniforms.c | 37 +++---- 12 files changed, 287 insertions(+), 255 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_blit.c b/src/gallium/drivers/vc4/vc4_blit.c index ee08ab8..83e1e00 100644 --- a/src/gallium/drivers/vc4/vc4_blit.c +++ b/src/gallium/drivers/vc4/vc4_blit.c @@ -51,9 +51,10 @@ static bool vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) { struct vc4_context *vc4 = vc4_context(pctx); - bool old_msaa = vc4->msaa; - int old_tile_width = vc4->tile_width; - int old_tile_height = vc4->tile_height; + struct vc4_job *job = vc4->job; + bool old_msaa = job->msaa; + int old_tile_width = job->tile_width; + int old_tile_height = job->tile_height; bool msaa = (info->src.resource->nr_samples > 1 || info->dst.resource->nr_samples > 1); int tile_width = msaa ? 32 : 64; @@ -131,29 +132,29 @@ vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) struct pipe_surface *src_surf = vc4_get_blit_surface(pctx, info->src.resource, info->src.level); - pipe_surface_reference(&vc4->color_read, src_surf); + pipe_surface_reference(&job->color_read, src_surf); if (dst_surf->texture->nr_samples > 1) - pipe_surface_reference(&vc4->color_write, dst_surf); + pipe_surface_reference(&job->color_write, dst_surf); else - pipe_surface_reference(&vc4->msaa_color_write, dst_surf); + pipe_surface_reference(&job->msaa_color_write, dst_surf); - vc4->draw_min_x = info->dst.box.x; - vc4->draw_min_y = info->dst.box.y; - vc4->draw_max_x = info->dst.box.x + info->dst.box.width; - vc4->draw_max_y = info->dst.box.y + info->dst.box.height; - vc4->draw_width = dst_surf->width; - vc4->draw_height = dst_surf->height; + job->draw_min_x = info->dst.box.x; + job->draw_min_y = info->dst.box.y; + job->draw_max_x = info->dst.box.x + info->dst.box.width; + job->draw_max_y = info->dst.box.y + info->dst.box.height; + job->draw_width = dst_surf->width; + job->draw_height = dst_surf->height; - vc4->tile_width = tile_width; - vc4->tile_height = tile_height; - vc4->msaa = msaa; - vc4->needs_flush = true; + job->tile_width = tile_width; + job->tile_height = tile_height; + job->msaa = msaa; + job->needs_flush = true; - vc4_job_submit(vc4); + vc4_job_submit(vc4, job); - vc4->msaa = old_msaa; - vc4->tile_width = old_tile_width; - vc4->tile_height = old_tile_height; + job->msaa = old_msaa; + job->tile_width = old_tile_width; + job->tile_height = old_tile_height; pipe_surface_reference(&dst_surf, NULL); pipe_surface_reference(&src_surf, NULL); diff --git a/src/gallium/drivers/vc4/vc4_cl.c b/src/gallium/drivers/vc4/vc4_cl.c index ced4f2d..afb9987 100644 --- a/src/gallium/drivers/vc4/vc4_cl.c +++ b/src/gallium/drivers/vc4/vc4_cl.c @@ -26,9 +26,9 @@ #include "vc4_context.h" void -vc4_init_cl(struct vc4_context *vc4, struct vc4_cl *cl) +vc4_init_cl(void *mem_ctx, struct vc4_cl *cl) { - cl->base = ralloc_size(vc4, 1); + cl->base = ralloc_size(mem_ctx, 1); cl->next = cl->base; cl->size = 0; } @@ -56,25 +56,25 @@ vc4_reset_cl(struct vc4_cl *cl) } uint32_t -vc4_gem_hindex(struct vc4_context *vc4, struct vc4_bo *bo) +vc4_gem_hindex(struct vc4_job *job, struct vc4_bo *bo) { uint32_t hindex; - uint32_t *current_handles = vc4->bo_handles.base; + uint32_t *current_handles = job->bo_handles.base; - for (hindex = 0; hindex < cl_offset(&vc4->bo_handles) / 4; hindex++) { + for (hindex = 0; hindex < cl_offset(&job->bo_handles) / 4; hindex++) { if (current_handles[hindex] == bo->handle) return hindex; } struct vc4_cl_out *out; - out = cl_start(&vc4->bo_handles); + out = cl_start(&job->bo_handles); cl_u32(&out, bo->handle); - cl_end(&vc4->bo_handles, out); + cl_end(&job->bo_handles, out); - out = cl_start(&vc4->bo_pointers); + out = cl_start(&job->bo_pointers); cl_ptr(&out, vc4_bo_reference(bo)); - cl_end(&vc4->bo_pointers, out); + cl_end(&job->bo_pointers, out); return hindex; } diff --git a/src/gallium/drivers/vc4/vc4_cl.h b/src/gallium/drivers/vc4/vc4_cl.h index bf4be0e..74bf8cf 100644 --- a/src/gallium/drivers/vc4/vc4_cl.h +++ b/src/gallium/drivers/vc4/vc4_cl.h @@ -32,6 +32,7 @@ #include "kernel/vc4_packet.h" struct vc4_bo; +struct vc4_job; /** * Undefined structure, used for typechecking that you're passing the pointers @@ -49,10 +50,10 @@ struct vc4_cl { #endif }; -void vc4_init_cl(struct vc4_context *vc4, struct vc4_cl *cl); +void vc4_init_cl(void *mem_ctx, struct vc4_cl *cl); void vc4_reset_cl(struct vc4_cl *cl); void vc4_dump_cl(void *cl, uint32_t size, bool is_render); -uint32_t vc4_gem_hindex(struct vc4_context *vc4, struct vc4_bo *bo); +uint32_t vc4_gem_hindex(struct vc4_job *job, struct vc4_bo *bo); struct PACKED unaligned_16 { uint16_t x; }; struct PACKED unaligned_32 { uint32_t x; }; @@ -174,10 +175,10 @@ cl_start_shader_reloc(struct vc4_cl *cl, uint32_t n) } static inline void -cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl, struct vc4_cl_out **cl_out, +cl_reloc(struct vc4_job *job, struct vc4_cl *cl, struct vc4_cl_out **cl_out, struct vc4_bo *bo, uint32_t offset) { - *(uint32_t *)cl->reloc_next = vc4_gem_hindex(vc4, bo); + *(uint32_t *)cl->reloc_next = vc4_gem_hindex(job, bo); cl_advance(&cl->reloc_next, 4); #ifdef DEBUG @@ -188,11 +189,11 @@ cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl, struct vc4_cl_out **cl_out, } static inline void -cl_aligned_reloc(struct vc4_context *vc4, struct vc4_cl *cl, +cl_aligned_reloc(struct vc4_job *job, struct vc4_cl *cl, struct vc4_cl_out **cl_out, struct vc4_bo *bo, uint32_t offset) { - *(uint32_t *)cl->reloc_next = vc4_gem_hindex(vc4, bo); + *(uint32_t *)cl->reloc_next = vc4_gem_hindex(job, bo); cl_advance(&cl->reloc_next, 4); #ifdef DEBUG diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index 6f50b97..1a212e4 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -43,34 +43,41 @@ vc4_flush(struct pipe_context *pctx) struct vc4_context *vc4 = vc4_context(pctx); struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0]; struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf; + struct vc4_job *job = vc4->job; - if (cbuf && (vc4->resolve & PIPE_CLEAR_COLOR0)) { + if (cbuf && (job->resolve & PIPE_CLEAR_COLOR0)) { if (cbuf->texture->nr_samples > 1) { - pipe_surface_reference(&vc4->msaa_color_write, cbuf); + pipe_surface_reference(&job->msaa_color_write, cbuf); } else { - pipe_surface_reference(&vc4->color_write, cbuf); + pipe_surface_reference(&job->color_write, cbuf); } - if (!(vc4->cleared & PIPE_CLEAR_COLOR0)) { - pipe_surface_reference(&vc4->color_read, cbuf); + if (!(job->cleared & PIPE_CLEAR_COLOR0)) { + pipe_surface_reference(&job->color_read, cbuf); } } - if (vc4->framebuffer.zsbuf && - (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { + if (zsbuf && (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { if (zsbuf->texture->nr_samples > 1) { - pipe_surface_reference(&vc4->msaa_zs_write, zsbuf); + pipe_surface_reference(&job->msaa_zs_write, zsbuf); } else { - pipe_surface_reference(&vc4->zs_write, zsbuf); + pipe_surface_reference(&job->zs_write, zsbuf); } - if (!(vc4->cleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { - pipe_surface_reference(&vc4->zs_read, zsbuf); + if (!(job->cleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { + pipe_surface_reference(&job->zs_read, zsbuf); } } - vc4_job_submit(vc4); + vc4_job_submit(vc4, job); + + /* We have no hardware context saved between our draw calls, so we + * need to flag the next draw as needing all state emitted. Emitting + * all state at the start of our draws is also what ensures that we + * return to the state we need after a previous tile has finished. + */ + vc4->dirty = ~0; } static void @@ -100,16 +107,17 @@ vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo, bool include_reads) { struct vc4_context *vc4 = vc4_context(pctx); + struct vc4_job *job = vc4->job; - if (!vc4->needs_flush) + if (!job->needs_flush) return false; /* Walk all the referenced BOs in the drawing command list to see if * they match. */ if (include_reads) { - struct vc4_bo **referenced_bos = vc4->bo_pointers.base; - for (int i = 0; i < cl_offset(&vc4->bo_handles) / 4; i++) { + struct vc4_bo **referenced_bos = job->bo_pointers.base; + for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) { if (referenced_bos[i] == bo) { return true; } @@ -146,7 +154,7 @@ vc4_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc) struct pipe_surface *zsurf = vc4->framebuffer.zsbuf; if (zsurf && zsurf->texture == prsc) - vc4->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL); + vc4->job->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL); } static void @@ -202,7 +210,8 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) vc4_query_init(pctx); vc4_resource_context_init(pctx); - vc4_job_init(vc4); + vc4->job = rzalloc(vc4, struct vc4_job); + vc4_job_init(vc4->job); vc4->fd = screen->fd; diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index d02a971..38dc3a5 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -190,12 +190,16 @@ struct vc4_vertex_stateobj { unsigned num_elements; }; -struct vc4_context { - struct pipe_context base; - - int fd; - struct vc4_screen *screen; - +/** + * A complete bin/render job. + * + * This is all of the state necessary to submit a bin/render to the kernel. + * We want to be able to have multiple in progress at a time, so that we don't + * need to flush an existing CL just to switch to rendering to a new render + * target (which would mean reading back from the old render target when + * starting to render to it again). + */ +struct vc4_job { struct vc4_cl bcl; struct vc4_cl shader_rec; struct vc4_cl uniforms; @@ -238,11 +242,6 @@ struct vc4_context { bool msaa; /** @} */ - struct slab_mempool transfer_pool; - struct blitter_context *blitter; - - /** bitfield of VC4_DIRTY_* */ - uint32_t dirty; /* Bitmask of PIPE_CLEAR_* of buffers that were cleared before the * first rendering. */ @@ -267,11 +266,22 @@ struct vc4_context { * the current job. */ uint32_t draw_calls_queued; +}; - /** Maximum index buffer valid for the current shader_rec. */ - uint32_t max_index; - /** Last index bias baked into the current shader_rec. */ - uint32_t last_index_bias; +struct vc4_context { + struct pipe_context base; + + int fd; + struct vc4_screen *screen; + + /** The render job for the currently bound FBO. */ + struct vc4_job *job; + + struct slab_mempool transfer_pool; + struct blitter_context *blitter; + + /** bitfield of VC4_DIRTY_* */ + uint32_t dirty; struct primconvert_context *primconvert; @@ -289,6 +299,11 @@ struct vc4_context { uint8_t prim_mode; + /** Maximum index buffer valid for the current shader_rec. */ + uint32_t max_index; + /** Last index bias baked into the current shader_rec. */ + uint32_t last_index_bias; + /** Seqno of the last CL flush's job. */ uint64_t last_emit_seqno; @@ -398,9 +413,9 @@ void vc4_write_uniforms(struct vc4_context *vc4, struct vc4_texture_stateobj *texstate); void vc4_flush(struct pipe_context *pctx); -void vc4_job_init(struct vc4_context *vc4); -void vc4_job_submit(struct vc4_context *vc4); -void vc4_job_reset(struct vc4_context *vc4); +void vc4_job_init(struct vc4_job *job); +void vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job); +void vc4_job_reset(struct vc4_job *job); bool vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo, bool include_reads); void vc4_emit_state(struct pipe_context *pctx); diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 9770abf..52a53db 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -33,7 +33,7 @@ #include "vc4_resource.h" static void -vc4_get_draw_cl_space(struct vc4_context *vc4, int vert_count) +vc4_get_draw_cl_space(struct vc4_job *job, int vert_count) { /* The SW-5891 workaround may cause us to emit multiple shader recs * and draw packets. @@ -43,7 +43,7 @@ vc4_get_draw_cl_space(struct vc4_context *vc4, int vert_count) /* Binner gets our packet state -- vc4_emit.c contents, * and the primitive itself. */ - cl_ensure_space(&vc4->bcl, + cl_ensure_space(&job->bcl, 256 + (VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE + VC4_PACKET_GL_SHADER_STATE_SIZE) * num_draws); @@ -53,7 +53,7 @@ vc4_get_draw_cl_space(struct vc4_context *vc4, int vert_count) * sized shader_rec (104 bytes base for 8 vattrs plus 32 bytes of * vattr stride). */ - cl_ensure_space(&vc4->shader_rec, + cl_ensure_space(&job->shader_rec, (12 * sizeof(uint32_t) + 104 + 8 * 32) * num_draws); /* Uniforms are covered by vc4_write_uniforms(). */ @@ -61,8 +61,8 @@ vc4_get_draw_cl_space(struct vc4_context *vc4, int vert_count) /* There could be up to 16 textures per stage, plus misc other * pointers. */ - cl_ensure_space(&vc4->bo_handles, (2 * 16 + 20) * sizeof(uint32_t)); - cl_ensure_space(&vc4->bo_pointers, + cl_ensure_space(&job->bo_handles, (2 * 16 + 20) * sizeof(uint32_t)); + cl_ensure_space(&job->bo_pointers, (2 * 16 + 20) * sizeof(struct vc4_bo *)); } @@ -72,22 +72,24 @@ vc4_get_draw_cl_space(struct vc4_context *vc4, int vert_count) static void vc4_start_draw(struct vc4_context *vc4, int vert_count) { - if (vc4->needs_flush) + struct vc4_job *job = vc4->job; + + if (job->needs_flush) return; - vc4_get_draw_cl_space(vc4, 0); + vc4_get_draw_cl_space(job, 0); - struct vc4_cl_out *bcl = cl_start(&vc4->bcl); + struct vc4_cl_out *bcl = cl_start(&job->bcl); // Tile state data is 48 bytes per tile, I think it can be thrown away // as soon as binning is finished. cl_u8(&bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG); cl_u32(&bcl, 0); /* tile alloc addr, filled by kernel */ cl_u32(&bcl, 0); /* tile alloc size, filled by kernel */ cl_u32(&bcl, 0); /* tile state addr, filled by kernel */ - cl_u8(&bcl, vc4->draw_tiles_x); - cl_u8(&bcl, vc4->draw_tiles_y); + cl_u8(&bcl, job->draw_tiles_x); + cl_u8(&bcl, job->draw_tiles_y); /* Other flags are filled by kernel. */ - cl_u8(&bcl, vc4->msaa ? VC4_BIN_CONFIG_MS_MODE_4X : 0); + cl_u8(&bcl, job->msaa ? VC4_BIN_CONFIG_MS_MODE_4X : 0); /* START_TILE_BINNING resets the statechange counters in the hardware, * which are what is used when a primitive is binned to a tile to @@ -105,12 +107,12 @@ vc4_start_draw(struct vc4_context *vc4, int vert_count) cl_u8(&bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX | VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES)); - vc4->needs_flush = true; - vc4->draw_calls_queued++; - vc4->draw_width = vc4->framebuffer.width; - vc4->draw_height = vc4->framebuffer.height; + job->needs_flush = true; + job->draw_calls_queued++; + job->draw_width = vc4->framebuffer.width; + job->draw_height = vc4->framebuffer.height; - cl_end(&vc4->bcl, bcl); + cl_end(&job->bcl, bcl); } static void @@ -128,9 +130,11 @@ vc4_update_shadow_textures(struct pipe_context *pctx, } static void -vc4_emit_gl_shader_state(struct vc4_context *vc4, const struct pipe_draw_info *info, +vc4_emit_gl_shader_state(struct vc4_context *vc4, + const struct pipe_draw_info *info, uint32_t extra_index_bias) { + struct vc4_job *job = vc4->job; /* VC4_DIRTY_VTXSTATE */ struct vc4_vertex_stateobj *vtx = vc4->vtx; /* VC4_DIRTY_VTXBUF */ @@ -142,7 +146,7 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4, const struct pipe_draw_info *i uint32_t num_elements_emit = MAX2(vtx->num_elements, 1); /* Emit the shader record. */ struct vc4_cl_out *shader_rec = - cl_start_shader_reloc(&vc4->shader_rec, 3 + num_elements_emit); + cl_start_shader_reloc(&job->shader_rec, 3 + num_elements_emit); /* VC4_DIRTY_PRIM_MODE | VC4_DIRTY_RASTERIZER */ cl_u16(&shader_rec, VC4_SHADER_FLAG_ENABLE_CLIPPING | @@ -154,21 +158,21 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4, const struct pipe_draw_info *i /* VC4_DIRTY_COMPILED_FS */ cl_u8(&shader_rec, 0); /* fs num uniforms (unused) */ cl_u8(&shader_rec, vc4->prog.fs->num_inputs); - cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.fs->bo, 0); + cl_reloc(job, &job->shader_rec, &shader_rec, vc4->prog.fs->bo, 0); cl_u32(&shader_rec, 0); /* UBO offset written by kernel */ /* VC4_DIRTY_COMPILED_VS */ cl_u16(&shader_rec, 0); /* vs num uniforms */ cl_u8(&shader_rec, vc4->prog.vs->vattrs_live); cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[8]); - cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.vs->bo, 0); + cl_reloc(job, &job->shader_rec, &shader_rec, vc4->prog.vs->bo, 0); cl_u32(&shader_rec, 0); /* UBO offset written by kernel */ /* VC4_DIRTY_COMPILED_CS */ cl_u16(&shader_rec, 0); /* cs num uniforms */ cl_u8(&shader_rec, vc4->prog.cs->vattrs_live); cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[8]); - cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.cs->bo, 0); + cl_reloc(job, &job->shader_rec, &shader_rec, vc4->prog.cs->bo, 0); cl_u32(&shader_rec, 0); /* UBO offset written by kernel */ uint32_t max_index = 0xffff; @@ -186,7 +190,7 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4, const struct pipe_draw_info *i uint32_t elem_size = util_format_get_blocksize(elem->src_format); - cl_reloc(vc4, &vc4->shader_rec, &shader_rec, rsc->bo, offset); + cl_reloc(job, &job->shader_rec, &shader_rec, rsc->bo, offset); cl_u8(&shader_rec, elem_size - 1); cl_u8(&shader_rec, vb->stride); cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[i]); @@ -201,16 +205,16 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4, const struct pipe_draw_info *i if (vtx->num_elements == 0) { assert(num_elements_emit == 1); struct vc4_bo *bo = vc4_bo_alloc(vc4->screen, 4096, "scratch VBO"); - cl_reloc(vc4, &vc4->shader_rec, &shader_rec, bo, 0); + cl_reloc(job, &job->shader_rec, &shader_rec, bo, 0); cl_u8(&shader_rec, 16 - 1); /* element size */ cl_u8(&shader_rec, 0); /* stride */ cl_u8(&shader_rec, 0); /* VS VPM offset */ cl_u8(&shader_rec, 0); /* CS VPM offset */ vc4_bo_unreference(&bo); } - cl_end(&vc4->shader_rec, shader_rec); + cl_end(&job->shader_rec, shader_rec); - struct vc4_cl_out *bcl = cl_start(&vc4->bcl); + struct vc4_cl_out *bcl = cl_start(&job->bcl); /* the actual draw call. */ cl_u8(&bcl, VC4_PACKET_GL_SHADER_STATE); assert(vtx->num_elements <= 8); @@ -218,7 +222,7 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4, const struct pipe_draw_info *i * attributes. This field also contains the offset into shader_rec. */ cl_u32(&bcl, num_elements_emit & 0x7); - cl_end(&vc4->bcl, bcl); + cl_end(&job->bcl, bcl); vc4_write_uniforms(vc4, vc4->prog.fs, &vc4->constbuf[PIPE_SHADER_FRAGMENT], @@ -232,7 +236,7 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4, const struct pipe_draw_info *i vc4->last_index_bias = info->index_bias + extra_index_bias; vc4->max_index = max_index; - vc4->shader_rec_count++; + job->shader_rec_count++; } /** @@ -259,8 +263,9 @@ static void vc4_hw_2116_workaround(struct pipe_context *pctx) { struct vc4_context *vc4 = vc4_context(pctx); + struct vc4_job *job = vc4->job; - if (vc4->draw_calls_queued == 0x1ef0) { + if (job->draw_calls_queued == 0x1ef0) { perf_debug("Flushing batch due to HW-2116 workaround " "(too many draw calls per scene\n"); vc4_flush(pctx); @@ -271,6 +276,7 @@ static void vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) { struct vc4_context *vc4 = vc4_context(pctx); + struct vc4_job *job = vc4->job; if (info->mode >= PIPE_PRIM_QUADS) { util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf); @@ -287,7 +293,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) vc4_hw_2116_workaround(pctx); - vc4_get_draw_cl_space(vc4, info->count); + vc4_get_draw_cl_space(job, info->count); if (vc4->prim_mode != info->mode) { vc4->prim_mode = info->mode; @@ -297,7 +303,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) vc4_start_draw(vc4, info->count); vc4_update_compiled_shaders(vc4, info->mode); - uint32_t start_draw_calls_queued = vc4->draw_calls_queued; + uint32_t start_draw_calls_queued = job->draw_calls_queued; vc4_emit_state(pctx); if ((vc4->dirty & (VC4_DIRTY_VTXBUF | @@ -319,7 +325,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) /* Note that the primitive type fields match with OpenGL/gallium * definitions, up to but not including QUADS. */ - struct vc4_cl_out *bcl = cl_start(&vc4->bcl); + struct vc4_cl_out *bcl = cl_start(&job->bcl); if (info->indexed) { uint32_t offset = vc4->indexbuf.offset; uint32_t index_size = vc4->indexbuf.index_size; @@ -341,7 +347,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) } struct vc4_resource *rsc = vc4_resource(prsc); - cl_start_reloc(&vc4->bcl, &bcl, 1); + cl_start_reloc(&job->bcl, &bcl, 1); cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE); cl_u8(&bcl, info->mode | @@ -349,7 +355,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) VC4_INDEX_BUFFER_U16: VC4_INDEX_BUFFER_U8)); cl_u32(&bcl, info->count); - cl_reloc(vc4, &vc4->bcl, &bcl, rsc->bo, offset); + cl_reloc(job, &job->bcl, &bcl, rsc->bo, offset); cl_u32(&bcl, vc4->max_index); if (vc4->indexbuf.index_size == 4 || vc4->indexbuf.user_buffer) @@ -376,10 +382,10 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) * plus whatever remainder. */ if (extra_index_bias) { - cl_end(&vc4->bcl, bcl); + cl_end(&job->bcl, bcl); vc4_emit_gl_shader_state(vc4, info, extra_index_bias); - bcl = cl_start(&vc4->bcl); + bcl = cl_start(&job->bcl); } if (start + count > max_verts) { @@ -425,20 +431,20 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) start = 0; } } - cl_end(&vc4->bcl, bcl); + cl_end(&job->bcl, bcl); /* No flushes of the job should have happened between when we started * emitting state for our draw and when we just emitted our draw's * primitives. */ - assert(start_draw_calls_queued == vc4->draw_calls_queued); + assert(start_draw_calls_queued == job->draw_calls_queued); if (vc4->zsa && vc4->zsa->base.depth.enabled) { - vc4->resolve |= PIPE_CLEAR_DEPTH; + job->resolve |= PIPE_CLEAR_DEPTH; } if (vc4->zsa && vc4->zsa->base.stencil[0].enabled) - vc4->resolve |= PIPE_CLEAR_STENCIL; - vc4->resolve |= PIPE_CLEAR_COLOR0; + job->resolve |= PIPE_CLEAR_STENCIL; + job->resolve |= PIPE_CLEAR_COLOR0; if (vc4_debug & VC4_DEBUG_ALWAYS_FLUSH) vc4_flush(pctx); @@ -460,11 +466,12 @@ vc4_clear(struct pipe_context *pctx, unsigned buffers, const union pipe_color_union *color, double depth, unsigned stencil) { struct vc4_context *vc4 = vc4_context(pctx); + struct vc4_job *job = vc4->job; /* We can't flag new buffers for clearing once we've queued draws. We * could avoid this by using the 3d engine to clear. */ - if (vc4->draw_calls_queued) { + if (job->draw_calls_queued) { perf_debug("Flushing rendering to process new clear.\n"); vc4_flush(pctx); } @@ -488,7 +495,7 @@ vc4_clear(struct pipe_context *pctx, unsigned buffers, } if (buffers & PIPE_CLEAR_COLOR0) { - vc4->clear_color[0] = vc4->clear_color[1] = + job->clear_color[0] = job->clear_color[1] = pack_rgba(vc4->framebuffer.cbufs[0]->format, color->f); } @@ -497,16 +504,16 @@ vc4_clear(struct pipe_context *pctx, unsigned buffers, /* Though the depth buffer is stored with Z in the high 24, * for this field we just need to store it in the low 24. */ - vc4->clear_depth = util_pack_z(PIPE_FORMAT_Z24X8_UNORM, depth); - vc4->clear_stencil = stencil; + job->clear_depth = util_pack_z(PIPE_FORMAT_Z24X8_UNORM, depth); + job->clear_stencil = stencil; } - vc4->draw_min_x = 0; - vc4->draw_min_y = 0; - vc4->draw_max_x = vc4->framebuffer.width; - vc4->draw_max_y = vc4->framebuffer.height; - vc4->cleared |= buffers; - vc4->resolve |= buffers; + job->draw_min_x = 0; + job->draw_min_y = 0; + job->draw_max_x = vc4->framebuffer.width; + job->draw_max_y = vc4->framebuffer.height; + job->cleared |= buffers; + job->resolve |= buffers; vc4_start_draw(vc4, 0); } diff --git a/src/gallium/drivers/vc4/vc4_emit.c b/src/gallium/drivers/vc4/vc4_emit.c index 8b192da..9258cee 100644 --- a/src/gallium/drivers/vc4/vc4_emit.c +++ b/src/gallium/drivers/vc4/vc4_emit.c @@ -27,8 +27,9 @@ void vc4_emit_state(struct pipe_context *pctx) { struct vc4_context *vc4 = vc4_context(pctx); + struct vc4_job *job = vc4->job; - struct vc4_cl_out *bcl = cl_start(&vc4->bcl); + struct vc4_cl_out *bcl = cl_start(&job->bcl); if (vc4->dirty & (VC4_DIRTY_SCISSOR | VC4_DIRTY_VIEWPORT | VC4_DIRTY_RASTERIZER)) { float *vpscale = vc4->viewport.scale; @@ -50,8 +51,8 @@ vc4_emit_state(struct pipe_context *pctx) if (!vc4->rasterizer->base.scissor) { minx = MAX2(vp_minx, 0); miny = MAX2(vp_miny, 0); - maxx = MIN2(vp_maxx, vc4->draw_width); - maxy = MIN2(vp_maxy, vc4->draw_height); + maxx = MIN2(vp_maxx, job->draw_width); + maxy = MIN2(vp_maxy, job->draw_height); } else { minx = MAX2(vp_minx, vc4->scissor.minx); miny = MAX2(vp_miny, vc4->scissor.miny); @@ -65,10 +66,10 @@ vc4_emit_state(struct pipe_context *pctx) cl_u16(&bcl, maxx - minx); cl_u16(&bcl, maxy - miny); - vc4->draw_min_x = MIN2(vc4->draw_min_x, minx); - vc4->draw_min_y = MIN2(vc4->draw_min_y, miny); - vc4->draw_max_x = MAX2(vc4->draw_max_x, maxx); - vc4->draw_max_y = MAX2(vc4->draw_max_y, maxy); + job->draw_min_x = MIN2(job->draw_min_x, minx); + job->draw_min_y = MIN2(job->draw_min_y, miny); + job->draw_max_x = MAX2(job->draw_max_x, maxx); + job->draw_max_y = MAX2(job->draw_max_y, maxy); } if (vc4->dirty & (VC4_DIRTY_RASTERIZER | @@ -85,7 +86,7 @@ vc4_emit_state(struct pipe_context *pctx) * was seeing bad rendering on glxgears -samples 4 even in * that case. */ - if (vc4->msaa || vc4->prog.fs->disable_early_z) + if (job->msaa || vc4->prog.fs->disable_early_z) ez_enable_mask_out &= ~VC4_CONFIG_BITS_EARLY_Z; cl_u8(&bcl, VC4_PACKET_CONFIGURATION_BITS); @@ -132,5 +133,5 @@ vc4_emit_state(struct pipe_context *pctx) vc4->prog.fs->color_inputs : 0); } - cl_end(&vc4->bcl, bcl); + cl_end(&job->bcl, bcl); } diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c index a957689..0ed2d3c 100644 --- a/src/gallium/drivers/vc4/vc4_job.c +++ b/src/gallium/drivers/vc4/vc4_job.c @@ -30,57 +30,51 @@ #include "vc4_context.h" void -vc4_job_init(struct vc4_context *vc4) +vc4_job_init(struct vc4_job *job) { - vc4_init_cl(vc4, &vc4->bcl); - vc4_init_cl(vc4, &vc4->shader_rec); - vc4_init_cl(vc4, &vc4->uniforms); - vc4_init_cl(vc4, &vc4->bo_handles); - vc4_init_cl(vc4, &vc4->bo_pointers); - vc4_job_reset(vc4); + vc4_init_cl(job, &job->bcl); + vc4_init_cl(job, &job->shader_rec); + vc4_init_cl(job, &job->uniforms); + vc4_init_cl(job, &job->bo_handles); + vc4_init_cl(job, &job->bo_pointers); + vc4_job_reset(job); } void -vc4_job_reset(struct vc4_context *vc4) +vc4_job_reset(struct vc4_job *job) { - struct vc4_bo **referenced_bos = vc4->bo_pointers.base; - for (int i = 0; i < cl_offset(&vc4->bo_handles) / 4; i++) { + struct vc4_bo **referenced_bos = job->bo_pointers.base; + for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) { vc4_bo_unreference(&referenced_bos[i]); } - vc4_reset_cl(&vc4->bcl); - vc4_reset_cl(&vc4->shader_rec); - vc4_reset_cl(&vc4->uniforms); - vc4_reset_cl(&vc4->bo_handles); - vc4_reset_cl(&vc4->bo_pointers); - vc4->shader_rec_count = 0; - - vc4->needs_flush = false; - vc4->draw_calls_queued = 0; - - /* We have no hardware context saved between our draw calls, so we - * need to flag the next draw as needing all state emitted. Emitting - * all state at the start of our draws is also what ensures that we - * return to the state we need after a previous tile has finished. - */ - vc4->dirty = ~0; - vc4->resolve = 0; - vc4->cleared = 0; - - vc4->draw_min_x = ~0; - vc4->draw_min_y = ~0; - vc4->draw_max_x = 0; - vc4->draw_max_y = 0; - - pipe_surface_reference(&vc4->color_write, NULL); - pipe_surface_reference(&vc4->color_read, NULL); - pipe_surface_reference(&vc4->msaa_color_write, NULL); - pipe_surface_reference(&vc4->zs_write, NULL); - pipe_surface_reference(&vc4->zs_read, NULL); - pipe_surface_reference(&vc4->msaa_zs_write, NULL); + vc4_reset_cl(&job->bcl); + vc4_reset_cl(&job->shader_rec); + vc4_reset_cl(&job->uniforms); + vc4_reset_cl(&job->bo_handles); + vc4_reset_cl(&job->bo_pointers); + job->shader_rec_count = 0; + + job->needs_flush = false; + job->draw_calls_queued = 0; + + job->resolve = 0; + job->cleared = 0; + + job->draw_min_x = ~0; + job->draw_min_y = ~0; + job->draw_max_x = 0; + job->draw_max_y = 0; + + pipe_surface_reference(&job->color_write, NULL); + pipe_surface_reference(&job->color_read, NULL); + pipe_surface_reference(&job->msaa_color_write, NULL); + pipe_surface_reference(&job->zs_write, NULL); + pipe_surface_reference(&job->zs_read, NULL); + pipe_surface_reference(&job->msaa_zs_write, NULL); } static void -vc4_submit_setup_rcl_surface(struct vc4_context *vc4, +vc4_submit_setup_rcl_surface(struct vc4_job *job, struct drm_vc4_submit_rcl_surface *submit_surf, struct pipe_surface *psurf, bool is_depth, bool is_write) @@ -93,7 +87,7 @@ vc4_submit_setup_rcl_surface(struct vc4_context *vc4, } struct vc4_resource *rsc = vc4_resource(psurf->texture); - submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo); + submit_surf->hindex = vc4_gem_hindex(job, rsc->bo); submit_surf->offset = surf->offset; if (psurf->texture->nr_samples <= 1) { @@ -124,7 +118,7 @@ vc4_submit_setup_rcl_surface(struct vc4_context *vc4, } static void -vc4_submit_setup_rcl_render_config_surface(struct vc4_context *vc4, +vc4_submit_setup_rcl_render_config_surface(struct vc4_job *job, struct drm_vc4_submit_rcl_surface *submit_surf, struct pipe_surface *psurf) { @@ -136,7 +130,7 @@ vc4_submit_setup_rcl_render_config_surface(struct vc4_context *vc4, } struct vc4_resource *rsc = vc4_resource(psurf->texture); - submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo); + submit_surf->hindex = vc4_gem_hindex(job, rsc->bo); submit_surf->offset = surf->offset; if (psurf->texture->nr_samples <= 1) { @@ -153,7 +147,7 @@ vc4_submit_setup_rcl_render_config_surface(struct vc4_context *vc4, } static void -vc4_submit_setup_rcl_msaa_surface(struct vc4_context *vc4, +vc4_submit_setup_rcl_msaa_surface(struct vc4_job *job, struct drm_vc4_submit_rcl_surface *submit_surf, struct pipe_surface *psurf) { @@ -165,7 +159,7 @@ vc4_submit_setup_rcl_msaa_surface(struct vc4_context *vc4, } struct vc4_resource *rsc = vc4_resource(psurf->texture); - submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo); + submit_surf->hindex = vc4_gem_hindex(job, rsc->bo); submit_surf->offset = surf->offset; submit_surf->bits = 0; rsc->writes++; @@ -175,60 +169,60 @@ vc4_submit_setup_rcl_msaa_surface(struct vc4_context *vc4, * Submits the job to the kernel and then reinitializes it. */ void -vc4_job_submit(struct vc4_context *vc4) +vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job) { - if (!vc4->needs_flush) + if (!job->needs_flush) return; /* The RCL setup would choke if the draw bounds cause no drawing, so * just drop the drawing if that's the case. */ - if (vc4->draw_max_x <= vc4->draw_min_x || - vc4->draw_max_y <= vc4->draw_min_y) { - vc4_job_reset(vc4); + if (job->draw_max_x <= job->draw_min_x || + job->draw_max_y <= job->draw_min_y) { + vc4_job_reset(job); return; } if (vc4_debug & VC4_DEBUG_CL) { fprintf(stderr, "BCL:\n"); - vc4_dump_cl(vc4->bcl.base, cl_offset(&vc4->bcl), false); + vc4_dump_cl(job->bcl.base, cl_offset(&job->bcl), false); } - if (cl_offset(&vc4->bcl) > 0) { + if (cl_offset(&job->bcl) > 0) { /* Increment the semaphore indicating that binning is done and * unblocking the render thread. Note that this doesn't act * until the FLUSH completes. */ - cl_ensure_space(&vc4->bcl, 8); - struct vc4_cl_out *bcl = cl_start(&vc4->bcl); + cl_ensure_space(&job->bcl, 8); + struct vc4_cl_out *bcl = cl_start(&job->bcl); cl_u8(&bcl, VC4_PACKET_INCREMENT_SEMAPHORE); /* The FLUSH caps all of our bin lists with a * VC4_PACKET_RETURN. */ cl_u8(&bcl, VC4_PACKET_FLUSH); - cl_end(&vc4->bcl, bcl); + cl_end(&job->bcl, bcl); } struct drm_vc4_submit_cl submit; memset(&submit, 0, sizeof(submit)); - cl_ensure_space(&vc4->bo_handles, 6 * sizeof(uint32_t)); - cl_ensure_space(&vc4->bo_pointers, 6 * sizeof(struct vc4_bo *)); + cl_ensure_space(&job->bo_handles, 6 * sizeof(uint32_t)); + cl_ensure_space(&job->bo_pointers, 6 * sizeof(struct vc4_bo *)); - vc4_submit_setup_rcl_surface(vc4, &submit.color_read, - vc4->color_read, false, false); - vc4_submit_setup_rcl_render_config_surface(vc4, &submit.color_write, - vc4->color_write); - vc4_submit_setup_rcl_surface(vc4, &submit.zs_read, - vc4->zs_read, true, false); - vc4_submit_setup_rcl_surface(vc4, &submit.zs_write, - vc4->zs_write, true, true); + vc4_submit_setup_rcl_surface(job, &submit.color_read, + job->color_read, false, false); + vc4_submit_setup_rcl_render_config_surface(job, &submit.color_write, + job->color_write); + vc4_submit_setup_rcl_surface(job, &submit.zs_read, + job->zs_read, true, false); + vc4_submit_setup_rcl_surface(job, &submit.zs_write, + job->zs_write, true, true); - vc4_submit_setup_rcl_msaa_surface(vc4, &submit.msaa_color_write, - vc4->msaa_color_write); - vc4_submit_setup_rcl_msaa_surface(vc4, &submit.msaa_zs_write, - vc4->msaa_zs_write); + vc4_submit_setup_rcl_msaa_surface(job, &submit.msaa_color_write, + job->msaa_color_write); + vc4_submit_setup_rcl_msaa_surface(job, &submit.msaa_zs_write, + job->msaa_zs_write); - if (vc4->msaa) { + if (job->msaa) { /* This bit controls how many pixels the general * (i.e. subsampled) loads/stores are iterating over * (multisample loads replicate out to the other samples). @@ -240,29 +234,29 @@ vc4_job_submit(struct vc4_context *vc4) submit.color_write.bits |= VC4_RENDER_CONFIG_DECIMATE_MODE_4X; } - submit.bo_handles = (uintptr_t)vc4->bo_handles.base; - submit.bo_handle_count = cl_offset(&vc4->bo_handles) / 4; - submit.bin_cl = (uintptr_t)vc4->bcl.base; - submit.bin_cl_size = cl_offset(&vc4->bcl); - submit.shader_rec = (uintptr_t)vc4->shader_rec.base; - submit.shader_rec_size = cl_offset(&vc4->shader_rec); - submit.shader_rec_count = vc4->shader_rec_count; - submit.uniforms = (uintptr_t)vc4->uniforms.base; - submit.uniforms_size = cl_offset(&vc4->uniforms); - - assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0); - submit.min_x_tile = vc4->draw_min_x / vc4->tile_width; - submit.min_y_tile = vc4->draw_min_y / vc4->tile_height; - submit.max_x_tile = (vc4->draw_max_x - 1) / vc4->tile_width; - submit.max_y_tile = (vc4->draw_max_y - 1) / vc4->tile_height; - submit.width = vc4->draw_width; - submit.height = vc4->draw_height; - if (vc4->cleared) { + submit.bo_handles = (uintptr_t)job->bo_handles.base; + submit.bo_handle_count = cl_offset(&job->bo_handles) / 4; + submit.bin_cl = (uintptr_t)job->bcl.base; + submit.bin_cl_size = cl_offset(&job->bcl); + submit.shader_rec = (uintptr_t)job->shader_rec.base; + submit.shader_rec_size = cl_offset(&job->shader_rec); + submit.shader_rec_count = job->shader_rec_count; + submit.uniforms = (uintptr_t)job->uniforms.base; + submit.uniforms_size = cl_offset(&job->uniforms); + + assert(job->draw_min_x != ~0 && job->draw_min_y != ~0); + submit.min_x_tile = job->draw_min_x / job->tile_width; + submit.min_y_tile = job->draw_min_y / job->tile_height; + submit.max_x_tile = (job->draw_max_x - 1) / job->tile_width; + submit.max_y_tile = (job->draw_max_y - 1) / job->tile_height; + submit.width = job->draw_width; + submit.height = job->draw_height; + if (job->cleared) { submit.flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR; - submit.clear_color[0] = vc4->clear_color[0]; - submit.clear_color[1] = vc4->clear_color[1]; - submit.clear_z = vc4->clear_depth; - submit.clear_s = vc4->clear_stencil; + submit.clear_color[0] = job->clear_color[0]; + submit.clear_color[1] = job->clear_color[1]; + submit.clear_z = job->clear_depth; + submit.clear_s = job->clear_stencil; } if (!(vc4_debug & VC4_DEBUG_NORAST)) { @@ -300,5 +294,5 @@ vc4_job_submit(struct vc4_context *vc4) } } - vc4_job_reset(vc4); + vc4_job_reset(vc4->job); } diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index b1189bf..52b938d 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2520,6 +2520,7 @@ vc4_setup_shared_key(struct vc4_context *vc4, struct vc4_key *key, static void vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode) { + struct vc4_job *job = vc4->job; struct vc4_fs_key local_key; struct vc4_fs_key *key = &local_key; @@ -2546,7 +2547,7 @@ vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode) } else { key->logicop_func = PIPE_LOGICOP_COPY; } - if (vc4->msaa) { + if (job->msaa) { key->msaa = vc4->rasterizer->base.multisample; key->sample_coverage = (vc4->rasterizer->base.multisample && vc4->sample_mask != (1 << VC4_MAX_SAMPLES) - 1); diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c index 521ef50..81d3476 100644 --- a/src/gallium/drivers/vc4/vc4_simulator.c +++ b/src/gallium/drivers/vc4/vc4_simulator.c @@ -78,7 +78,8 @@ vc4_simulator_pin_bos(struct drm_device *dev, struct vc4_exec_info *exec) { struct drm_vc4_submit_cl *args = exec->args; struct vc4_context *vc4 = dev->vc4; - struct vc4_bo **bos = vc4->bo_pointers.base; + struct vc4_job *job = vc4->job; + struct vc4_bo **bos = job->bo_pointers.base; exec->bo_count = args->bo_handle_count; exec->bo = calloc(exec->bo_count, sizeof(void *)); diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c index bf211b1..d697f7c 100644 --- a/src/gallium/drivers/vc4/vc4_state.c +++ b/src/gallium/drivers/vc4/vc4_state.c @@ -406,6 +406,7 @@ vc4_set_framebuffer_state(struct pipe_context *pctx, const struct pipe_framebuffer_state *framebuffer) { struct vc4_context *vc4 = vc4_context(pctx); + struct vc4_job *job = vc4->job; struct pipe_framebuffer_state *cso = &vc4->framebuffer; unsigned i; @@ -430,14 +431,14 @@ vc4_set_framebuffer_state(struct pipe_context *pctx, struct vc4_resource *rsc = vc4_resource(cso->cbufs[0]->texture); if (!rsc->writes) - vc4->cleared |= PIPE_CLEAR_COLOR0; + job->cleared |= PIPE_CLEAR_COLOR0; } if (cso->zsbuf) { struct vc4_resource *rsc = vc4_resource(cso->zsbuf->texture); if (!rsc->writes) - vc4->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL; + job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL; } /* Nonzero texture mipmap levels are laid out as if they were in @@ -460,21 +461,21 @@ vc4_set_framebuffer_state(struct pipe_context *pctx, rsc->cpp); } - vc4->msaa = false; + job->msaa = false; if (cso->cbufs[0]) - vc4->msaa = cso->cbufs[0]->texture->nr_samples > 1; + job->msaa = cso->cbufs[0]->texture->nr_samples > 1; else if (cso->zsbuf) - vc4->msaa = cso->zsbuf->texture->nr_samples > 1; + job->msaa = cso->zsbuf->texture->nr_samples > 1; - if (vc4->msaa) { - vc4->tile_width = 32; - vc4->tile_height = 32; + if (job->msaa) { + job->tile_width = 32; + job->tile_height = 32; } else { - vc4->tile_width = 64; - vc4->tile_height = 64; + job->tile_width = 64; + job->tile_height = 64; } - vc4->draw_tiles_x = DIV_ROUND_UP(cso->width, vc4->tile_width); - vc4->draw_tiles_y = DIV_ROUND_UP(cso->height, vc4->tile_height); + job->draw_tiles_x = DIV_ROUND_UP(cso->width, job->tile_width); + job->draw_tiles_y = DIV_ROUND_UP(cso->height, job->tile_height); vc4->dirty |= VC4_DIRTY_FRAMEBUFFER; } diff --git a/src/gallium/drivers/vc4/vc4_uniforms.c b/src/gallium/drivers/vc4/vc4_uniforms.c index e8cd153..07781b8 100644 --- a/src/gallium/drivers/vc4/vc4_uniforms.c +++ b/src/gallium/drivers/vc4/vc4_uniforms.c @@ -28,7 +28,7 @@ #include "vc4_qir.h" static void -write_texture_p0(struct vc4_context *vc4, +write_texture_p0(struct vc4_job *job, struct vc4_cl_out **uniforms, struct vc4_texture_stateobj *texstate, uint32_t unit) @@ -37,11 +37,11 @@ write_texture_p0(struct vc4_context *vc4, vc4_sampler_view(texstate->textures[unit]); struct vc4_resource *rsc = vc4_resource(sview->base.texture); - cl_reloc(vc4, &vc4->uniforms, uniforms, rsc->bo, sview->texture_p0); + cl_reloc(job, &job->uniforms, uniforms, rsc->bo, sview->texture_p0); } static void -write_texture_p1(struct vc4_context *vc4, +write_texture_p1(struct vc4_job *job, struct vc4_cl_out **uniforms, struct vc4_texture_stateobj *texstate, uint32_t unit) @@ -55,7 +55,7 @@ write_texture_p1(struct vc4_context *vc4, } static void -write_texture_p2(struct vc4_context *vc4, +write_texture_p2(struct vc4_job *job, struct vc4_cl_out **uniforms, struct vc4_texture_stateobj *texstate, uint32_t data) @@ -72,7 +72,7 @@ write_texture_p2(struct vc4_context *vc4, } static void -write_texture_first_level(struct vc4_context *vc4, +write_texture_first_level(struct vc4_job *job, struct vc4_cl_out **uniforms, struct vc4_texture_stateobj *texstate, uint32_t data) @@ -84,7 +84,7 @@ write_texture_first_level(struct vc4_context *vc4, } static void -write_texture_msaa_addr(struct vc4_context *vc4, +write_texture_msaa_addr(struct vc4_job *job, struct vc4_cl_out **uniforms, struct vc4_texture_stateobj *texstate, uint32_t unit) @@ -92,7 +92,7 @@ write_texture_msaa_addr(struct vc4_context *vc4, struct pipe_sampler_view *texture = texstate->textures[unit]; struct vc4_resource *rsc = vc4_resource(texture->texture); - cl_aligned_reloc(vc4, &vc4->uniforms, uniforms, rsc->bo, 0); + cl_aligned_reloc(job, &job->uniforms, uniforms, rsc->bo, 0); } @@ -104,7 +104,7 @@ write_texture_msaa_addr(struct vc4_context *vc4, } static void -write_texture_border_color(struct vc4_context *vc4, +write_texture_border_color(struct vc4_job *job, struct vc4_cl_out **uniforms, struct vc4_texture_stateobj *texstate, uint32_t unit) @@ -211,14 +211,15 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader, struct vc4_texture_stateobj *texstate) { struct vc4_shader_uniform_info *uinfo = &shader->uniforms; + struct vc4_job *job = vc4->job; const uint32_t *gallium_uniforms = cb->cb[0].user_buffer; struct vc4_bo *ubo = vc4_upload_ubo(vc4, shader, gallium_uniforms); - cl_ensure_space(&vc4->uniforms, (uinfo->count + + cl_ensure_space(&job->uniforms, (uinfo->count + uinfo->num_texture_samples) * 4); struct vc4_cl_out *uniforms = - cl_start_shader_reloc(&vc4->uniforms, + cl_start_shader_reloc(&job->uniforms, uinfo->num_texture_samples); for (int i = 0; i < uinfo->count; i++) { @@ -251,36 +252,36 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader, break; case QUNIFORM_TEXTURE_CONFIG_P0: - write_texture_p0(vc4, &uniforms, texstate, + write_texture_p0(job, &uniforms, texstate, uinfo->data[i]); break; case QUNIFORM_TEXTURE_CONFIG_P1: - write_texture_p1(vc4, &uniforms, texstate, + write_texture_p1(job, &uniforms, texstate, uinfo->data[i]); break; case QUNIFORM_TEXTURE_CONFIG_P2: - write_texture_p2(vc4, &uniforms, texstate, + write_texture_p2(job, &uniforms, texstate, uinfo->data[i]); break; case QUNIFORM_TEXTURE_FIRST_LEVEL: - write_texture_first_level(vc4, &uniforms, texstate, + write_texture_first_level(job, &uniforms, texstate, uinfo->data[i]); break; case QUNIFORM_UBO_ADDR: - cl_aligned_reloc(vc4, &vc4->uniforms, &uniforms, ubo, 0); + cl_aligned_reloc(job, &job->uniforms, &uniforms, ubo, 0); break; case QUNIFORM_TEXTURE_MSAA_ADDR: - write_texture_msaa_addr(vc4, &uniforms, + write_texture_msaa_addr(job, &uniforms, texstate, uinfo->data[i]); break; case QUNIFORM_TEXTURE_BORDER_COLOR: - write_texture_border_color(vc4, &uniforms, + write_texture_border_color(job, &uniforms, texstate, uinfo->data[i]); break; @@ -355,7 +356,7 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader, #endif } - cl_end(&vc4->uniforms, uniforms); + cl_end(&job->uniforms, uniforms); vc4_bo_unreference(&ubo); } -- 2.7.4