From 585baac652ffa172fb3fbbdd4c7559d03b7c27ef Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 6 Jul 2012 03:18:06 +0200 Subject: [PATCH] r600g: do fine-grained vertex buffer updates If only some buffers are changed, the other ones don't have to re-emitted. This uses bitmasks of enabled and dirty buffers just like emit_constant_buffers does. --- src/gallium/drivers/r600/evergreen_compute.c | 12 +++--- src/gallium/drivers/r600/evergreen_state.c | 23 +++--------- src/gallium/drivers/r600/r600_blit.c | 4 +- src/gallium/drivers/r600/r600_buffer.c | 15 ++++---- src/gallium/drivers/r600/r600_hw_context.c | 5 ++- src/gallium/drivers/r600/r600_pipe.h | 12 +++--- src/gallium/drivers/r600/r600_state.c | 23 ++++++------ src/gallium/drivers/r600/r600_state_common.c | 56 +++++++++++++++++++++++----- 8 files changed, 87 insertions(+), 63 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 947a328..caaa752 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -89,14 +89,15 @@ static void evergreen_cs_set_vertex_buffer( unsigned offset, struct pipe_resource * buffer) { - struct pipe_vertex_buffer *vb = &rctx->cs_vertex_buffer[vb_index]; - struct r600_vertexbuf_state * state = &rctx->cs_vertex_buffer_state; + struct r600_vertexbuf_state *state = &rctx->cs_vertex_buffer_state; + struct pipe_vertex_buffer *vb = &state->vb[vb_index]; vb->stride = 1; vb->buffer_offset = offset; vb->buffer = buffer; vb->user_buffer = NULL; r600_inval_vertex_cache(rctx); + state->enabled_mask |= 1 << vb_index; state->dirty_mask |= 1 << vb_index; r600_atom_dirty(rctx, &state->atom); } @@ -369,7 +370,7 @@ static void compute_emit_cs(struct r600_context *ctx) r600_context_pipe_state_emit(ctx, cb_state, RADEON_CP_PACKET3_COMPUTE_MODE); /* Emit vertex buffer state */ - ctx->cs_vertex_buffer_state.atom.num_dw = 12 * ctx->nr_cs_vertex_buffers; + ctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(ctx->cs_vertex_buffer_state.dirty_mask); r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state.atom); for (i = 0; i < get_compute_resource_num(); i++) { @@ -493,10 +494,8 @@ static void evergreen_set_compute_resources(struct pipe_context * ctx_, evergreen_cs_set_vertex_buffer(ctx, vtx_id, buffer->chunk->start_in_dw * 4, resources[i]->base.texture); - ctx->nr_cs_vertex_buffers = vtx_id + 1; } } - } static void evergreen_set_cs_sampler_view(struct pipe_context *ctx_, @@ -740,7 +739,8 @@ void evergreen_init_compute_state_functions(struct r600_context *ctx) /* We always use at least two vertex buffers for compute, one for * parameters and one for global memory */ - ctx->nr_cs_vertex_buffers = 2; + ctx->cs_vertex_buffer_state.enabled_mask = + ctx->cs_vertex_buffer_state.dirty_mask = 1 | 2; } diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 0d2fa30..72ddc0b 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1772,8 +1772,6 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_ static void evergreen_emit_vertex_buffers(struct r600_context *rctx, struct r600_vertexbuf_state *state, - struct pipe_vertex_buffer *vertex_buffers, - unsigned vb_count, unsigned resource_offset, unsigned pkt_flags) { @@ -1784,13 +1782,11 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx, struct pipe_vertex_buffer *vb; struct r600_resource *rbuffer; uint64_t va; - unsigned buffer_index = ffs(dirty_mask) - 1; + unsigned buffer_index = u_bit_scan(&dirty_mask); - vb = &vertex_buffers[buffer_index]; + vb = &state->vb[buffer_index]; rbuffer = (struct r600_resource*)vb->buffer; - if (!rbuffer) { - goto next; - } + assert(rbuffer); va = r600_resource_va(&rctx->screen->screen, &rbuffer->b.b); va += vb->buffer_offset; @@ -1816,26 +1812,19 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx, r600_write_value(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ)); - -next: - dirty_mask &= ~(1 << buffer_index); } state->dirty_mask = 0; } static void evergreen_fs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom) { - evergreen_emit_vertex_buffers(rctx, &rctx->vertex_buffer_state, - rctx->vertex_buffer, - rctx->nr_vertex_buffers, 992, 0); + evergreen_emit_vertex_buffers(rctx, &rctx->vertex_buffer_state, 992, 0); } static void evergreen_cs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom) { - evergreen_emit_vertex_buffers(rctx, &rctx->cs_vertex_buffer_state, - rctx->cs_vertex_buffer, - rctx->nr_cs_vertex_buffers, 816, - RADEON_CP_PACKET3_COMPUTE_MODE); + evergreen_emit_vertex_buffers(rctx, &rctx->cs_vertex_buffer_state, 816, + RADEON_CP_PACKET3_COMPUTE_MODE); } static void evergreen_emit_constant_buffers(struct r600_context *rctx, diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 98f8b84..ca5aaf8 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -60,8 +60,8 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op util_blitter_save_viewport(rctx->blitter, &rctx->viewport); } util_blitter_save_vertex_buffers(rctx->blitter, - rctx->nr_vertex_buffers, - rctx->vertex_buffer); + util_last_bit(rctx->vertex_buffer_state.enabled_mask), + rctx->vertex_buffer_state.vb); util_blitter_save_so_targets(rctx->blitter, rctx->num_so_targets, (struct pipe_stream_output_target**)rctx->so_targets); diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 8e2deb1..165427e 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -93,7 +93,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, /* Check if mapping this buffer would cause waiting for the GPU. */ if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rbuffer->cs_buf, RADEON_USAGE_READWRITE) || rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) { - unsigned i; + unsigned i, mask; /* Discard the buffer. */ pb_reference(&rbuffer->buf, NULL); @@ -105,13 +105,12 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, /* We changed the buffer, now we need to bind it where the old one was bound. */ /* Vertex buffers. */ - for (i = 0; i < rctx->nr_vertex_buffers; i++) { - if (rctx->vertex_buffer[i].buffer == &rbuffer->b.b) { - struct r600_vertexbuf_state * state = - &rctx->vertex_buffer_state; - state->dirty_mask |= 1 << i; - r600_inval_vertex_cache(rctx); - r600_atom_dirty(rctx, &state->atom); + mask = rctx->vertex_buffer_state.enabled_mask; + while (mask) { + i = u_bit_scan(&mask); + if (rctx->vertex_buffer_state.vb[i].buffer == &rbuffer->b.b) { + rctx->vertex_buffer_state.dirty_mask |= 1 << i; + r600_vertex_buffers_dirty(rctx); } } /* Streamout buffers. */ diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 2951b86..d0a5918 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -1274,14 +1274,15 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) r600_emit_atom(ctx, &ctx->start_cs_cmd.atom); /* Invalidate caches. */ - r600_inval_vertex_cache(ctx); r600_inval_texture_cache(ctx); r600_flush_framebuffer(ctx, false); /* Re-emit states. */ r600_atom_dirty(ctx, &ctx->cb_misc_state.atom); r600_atom_dirty(ctx, &ctx->db_misc_state.atom); - r600_atom_dirty(ctx, &ctx->vertex_buffer_state.atom); + + ctx->vertex_buffer_state.dirty_mask = ctx->vertex_buffer_state.enabled_mask; + r600_vertex_buffers_dirty(ctx); ctx->vs_constbuf_state.dirty_mask = ctx->vs_constbuf_state.enabled_mask; ctx->ps_constbuf_state.dirty_mask = ctx->ps_constbuf_state.enabled_mask; diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 6449a4d..200f0a2 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -278,6 +278,8 @@ struct r600_constbuf_state struct r600_vertexbuf_state { struct r600_atom atom; + struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS]; + uint32_t enabled_mask; /* non-NULL buffers */ uint32_t dirty_mask; }; @@ -399,13 +401,8 @@ struct r600_context { boolean dual_src_blend; - /* Vertex and index buffers. */ - bool vertex_buffers_dirty; + /* Index buffer. */ struct pipe_index_buffer index_buffer; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - unsigned nr_vertex_buffers; - struct pipe_vertex_buffer cs_vertex_buffer[PIPE_MAX_ATTRIBS]; - unsigned nr_cs_vertex_buffers; }; static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom) @@ -528,8 +525,9 @@ unsigned r600_get_cb_flush_flags(struct r600_context *rctx); void r600_texture_barrier(struct pipe_context *ctx); void r600_set_index_buffer(struct pipe_context *ctx, const struct pipe_index_buffer *ib); +void r600_vertex_buffers_dirty(struct r600_context *rctx); void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, - const struct pipe_vertex_buffer *buffers); + const struct pipe_vertex_buffer *input); void *r600_create_vertex_elements(struct pipe_context *ctx, unsigned count, const struct pipe_vertex_element *elements); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 3d5835c..4f475b3 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1748,27 +1748,28 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = rctx->cs; - struct pipe_vertex_buffer *vb = rctx->vertex_buffer; - unsigned count = rctx->nr_vertex_buffers; - unsigned i, offset; + uint32_t dirty_mask = rctx->vertex_buffer_state.dirty_mask; - for (i = 0; i < count; i++) { - struct r600_resource *rbuffer = (struct r600_resource*)vb[i].buffer; + while (dirty_mask) { + struct pipe_vertex_buffer *vb; + struct r600_resource *rbuffer; + unsigned offset; + unsigned buffer_index = u_bit_scan(&dirty_mask); - if (!rbuffer) { - continue; - } + vb = &rctx->vertex_buffer_state.vb[buffer_index]; + rbuffer = (struct r600_resource*)vb->buffer; + assert(rbuffer); - offset = vb[i].buffer_offset; + offset = vb->buffer_offset; /* fetch resources start at index 320 */ r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 7, 0)); - r600_write_value(cs, (320 + i) * 7); + r600_write_value(cs, (320 + buffer_index) * 7); r600_write_value(cs, offset); /* RESOURCEi_WORD0 */ r600_write_value(cs, rbuffer->buf->size - offset - 1); /* RESOURCEi_WORD1 */ r600_write_value(cs, /* RESOURCEi_WORD2 */ S_038008_ENDIAN_SWAP(r600_endian_swap(32)) | - S_038008_STRIDE(vb[i].stride)); + S_038008_STRIDE(vb->stride)); r600_write_value(cs, 0); /* RESOURCEi_WORD3 */ r600_write_value(cs, 0); /* RESOURCEi_WORD4 */ r600_write_value(cs, 0); /* RESOURCEi_WORD5 */ diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 3c42a44..4fa2699 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -403,22 +403,58 @@ void r600_set_index_buffer(struct pipe_context *ctx, } } +void r600_vertex_buffers_dirty(struct r600_context *rctx) +{ + if (rctx->vertex_buffer_state.dirty_mask) { + r600_inval_vertex_cache(rctx); + rctx->vertex_buffer_state.atom.num_dw = (rctx->chip_class >= EVERGREEN ? 12 : 11) * + util_bitcount(rctx->vertex_buffer_state.dirty_mask); + r600_atom_dirty(rctx, &rctx->vertex_buffer_state.atom); + } +} + void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, - const struct pipe_vertex_buffer *buffers) + const struct pipe_vertex_buffer *input) { struct r600_context *rctx = (struct r600_context *)ctx; - struct r600_vertexbuf_state * state = &rctx->vertex_buffer_state; + struct r600_vertexbuf_state *state = &rctx->vertex_buffer_state; + struct pipe_vertex_buffer *vb = state->vb; unsigned i; + /* This sets 1-bit for buffers with index >= count. */ + uint32_t disable_mask = ~((1ull << count) - 1); + /* These are the new buffers set by this function. */ + uint32_t new_buffer_mask = 0; + + /* Set buffers with index >= count to NULL. */ + uint32_t remaining_buffers_mask = + rctx->vertex_buffer_state.enabled_mask & disable_mask; + + while (remaining_buffers_mask) { + i = u_bit_scan(&remaining_buffers_mask); + pipe_resource_reference(&vb[i].buffer, NULL); + } - util_copy_vertex_buffers(rctx->vertex_buffer, &rctx->nr_vertex_buffers, buffers, count); + /* Set vertex buffers. */ + for (i = 0; i < count; i++) { + if (memcmp(&input[i], &vb[i], sizeof(struct pipe_vertex_buffer))) { + if (input[i].buffer) { + vb[i].stride = input[i].stride; + vb[i].buffer_offset = input[i].buffer_offset; + pipe_resource_reference(&vb[i].buffer, input[i].buffer); + new_buffer_mask |= 1 << i; + } else { + pipe_resource_reference(&vb[i].buffer, NULL); + disable_mask |= 1 << i; + } + } + } - r600_inval_vertex_cache(rctx); - state->atom.num_dw = (rctx->chip_class >= EVERGREEN ? 12 : 11) * - rctx->nr_vertex_buffers; - for (i = 0 ; i < rctx->nr_vertex_buffers; i++) { - state->dirty_mask |= 1 << i; - } - r600_atom_dirty(rctx, &state->atom); + rctx->vertex_buffer_state.enabled_mask &= ~disable_mask; + rctx->vertex_buffer_state.dirty_mask &= rctx->vertex_buffer_state.enabled_mask; + rctx->vertex_buffer_state.enabled_mask |= new_buffer_mask; + rctx->vertex_buffer_state.dirty_mask |= new_buffer_mask; + + r600_vertex_buffers_dirty(rctx); } void *r600_create_vertex_elements(struct pipe_context *ctx, -- 2.7.4