From: Marek Olšák Date: Sun, 1 Apr 2012 20:03:15 +0000 (+0200) Subject: r600g: rework state emission of constant buffers X-Git-Tag: 062012170305~950 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=68bbfc1afe210d82acfb14a78b0fd8c436a8f78c;p=profile%2Fivi%2Fmesa.git r600g: rework state emission of constant buffers Framerate in ipers: before: 43.6 FPS after: 46.6 FPS Reviewed-by: Alex Deucher --- diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c index 010b5f3..2ab29c9 100644 --- a/src/gallium/drivers/r600/evergreen_hw_context.c +++ b/src/gallium/drivers/r600/evergreen_hw_context.c @@ -63,10 +63,6 @@ static const struct r600_reg evergreen_context_reg_list[] = { {GROUP_FORCE_NEW_BLOCK, 0, 0}, {R_028058_DB_DEPTH_SIZE, 0, 0}, {R_02805C_DB_DEPTH_SLICE, 0, 0}, - {R_028140_ALU_CONST_BUFFER_SIZE_PS_0, REG_FLAG_DIRTY_ALWAYS, 0}, - {R_028144_ALU_CONST_BUFFER_SIZE_PS_1, REG_FLAG_DIRTY_ALWAYS, 0}, - {R_028180_ALU_CONST_BUFFER_SIZE_VS_0, REG_FLAG_DIRTY_ALWAYS, 0}, - {R_028184_ALU_CONST_BUFFER_SIZE_VS_1, REG_FLAG_DIRTY_ALWAYS, 0}, {R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0}, {R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0}, {R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0, 0}, @@ -193,10 +189,6 @@ static const struct r600_reg evergreen_context_reg_list[] = { {R_028860_SQ_PGM_RESOURCES_VS, 0, 0}, {R_0288A4_SQ_PGM_START_FS, REG_FLAG_NEED_BO, 0}, {R_0288EC_SQ_LDS_ALLOC_PS, 0, 0}, - {R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, 0}, - {R_028944_ALU_CONST_CACHE_PS_1, REG_FLAG_NEED_BO, 0}, - {R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, 0}, - {R_028984_ALU_CONST_CACHE_VS_1, REG_FLAG_NEED_BO, 0}, {R_028A00_PA_SU_POINT_SIZE, 0, 0}, {R_028A04_PA_SU_POINT_MINMAX, 0, 0}, {R_028A08_PA_SU_LINE_CNTL, 0, 0}, @@ -331,10 +323,6 @@ static const struct r600_reg cayman_context_reg_list[] = { {GROUP_FORCE_NEW_BLOCK, 0, 0}, {R_028058_DB_DEPTH_SIZE, 0, 0}, {R_02805C_DB_DEPTH_SLICE, 0, 0}, - {R_028140_ALU_CONST_BUFFER_SIZE_PS_0, REG_FLAG_DIRTY_ALWAYS, 0}, - {R_028144_ALU_CONST_BUFFER_SIZE_PS_1, REG_FLAG_DIRTY_ALWAYS, 0}, - {R_028180_ALU_CONST_BUFFER_SIZE_VS_0, REG_FLAG_DIRTY_ALWAYS, 0}, - {R_028184_ALU_CONST_BUFFER_SIZE_VS_1, REG_FLAG_DIRTY_ALWAYS, 0}, {R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0}, {R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0}, {R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0, 0}, @@ -468,10 +456,6 @@ static const struct r600_reg cayman_context_reg_list[] = { {R_028920_SQ_GS_VERT_ITEMSIZE_1, 0, 0}, {R_028924_SQ_GS_VERT_ITEMSIZE_2, 0, 0}, {R_028928_SQ_GS_VERT_ITEMSIZE_3, 0, 0}, - {R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, 0}, - {R_028944_ALU_CONST_CACHE_PS_1, REG_FLAG_NEED_BO, 0}, - {R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, 0}, - {R_028984_ALU_CONST_CACHE_VS_1, REG_FLAG_NEED_BO, 0}, {R_028A00_PA_SU_POINT_SIZE, 0, 0}, {R_028A04_PA_SU_POINT_MINMAX, 0, 0}, {R_028A08_PA_SU_LINE_CNTL, 0, 0}, diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 00d2d0d..e97f8e3 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1763,11 +1763,82 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx, struct r600 } } +static void evergreen_emit_constant_buffer(struct r600_context *rctx, + struct r600_constbuf_state *state, + unsigned buffer_id_base, + unsigned reg_alu_constbuf_size, + unsigned reg_alu_const_cache) +{ + struct radeon_winsys_cs *cs = rctx->cs; + uint32_t dirty_mask = state->dirty_mask; + + while (dirty_mask) { + struct r600_constant_buffer *cb; + struct r600_resource *rbuffer; + uint64_t va; + unsigned buffer_index = ffs(dirty_mask) - 1; + + cb = &state->cb[buffer_index]; + rbuffer = (struct r600_resource*)cb->buffer; + assert(rbuffer); + + va = r600_resource_va(&rctx->screen->screen, &rbuffer->b.b.b); + va += cb->buffer_offset; + + r600_write_context_reg(cs, reg_alu_constbuf_size + buffer_index * 4, + ALIGN_DIVUP(cb->buffer_size >> 4, 16)); + r600_write_context_reg(cs, reg_alu_const_cache + buffer_index * 4, va >> 8); + + r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); + r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ)); + + r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 8, 0)); + r600_write_value(cs, (buffer_id_base + buffer_index) * 8); + r600_write_value(cs, va); /* RESOURCEi_WORD0 */ + r600_write_value(cs, rbuffer->buf->size - cb->buffer_offset - 1); /* RESOURCEi_WORD1 */ + r600_write_value(cs, /* RESOURCEi_WORD2 */ + S_030008_ENDIAN_SWAP(r600_endian_swap(32)) | + S_030008_STRIDE(16) | + S_030008_BASE_ADDRESS_HI(va >> 32UL)); + r600_write_value(cs, /* RESOURCEi_WORD3 */ + S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | + S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | + S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | + S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W)); + r600_write_value(cs, 0); /* RESOURCEi_WORD4 */ + r600_write_value(cs, 0); /* RESOURCEi_WORD5 */ + r600_write_value(cs, 0); /* RESOURCEi_WORD6 */ + r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD7 */ + + r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); + r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ)); + + dirty_mask &= ~(1 << buffer_index); + } + state->dirty_mask = 0; +} + +static void evergreen_emit_vs_constant_buffer(struct r600_context *rctx, struct r600_atom *atom) +{ + evergreen_emit_constant_buffer(rctx, &rctx->vs_constbuf_state, 176, + R_028180_ALU_CONST_BUFFER_SIZE_VS_0, + R_028980_ALU_CONST_CACHE_VS_0); +} + +static void evergreen_emit_ps_constant_buffer(struct r600_context *rctx, struct r600_atom *atom) +{ + evergreen_emit_constant_buffer(rctx, &rctx->ps_constbuf_state, 0, + R_028140_ALU_CONST_BUFFER_SIZE_PS_0, + R_028940_ALU_CONST_CACHE_PS_0); +} + void evergreen_init_state_functions(struct r600_context *rctx) { r600_init_atom(&rctx->db_misc_state.atom, evergreen_emit_db_misc_state, 6, 0); r600_atom_dirty(rctx, &rctx->db_misc_state.atom); r600_init_atom(&rctx->vertex_buffer_state, evergreen_emit_vertex_buffers, 0, 0); + r600_init_atom(&rctx->vs_constbuf_state.atom, evergreen_emit_vs_constant_buffer, 0, 0); + r600_init_atom(&rctx->ps_constbuf_state.atom, evergreen_emit_ps_constant_buffer, 0, 0); rctx->context.create_blend_state = evergreen_create_blend_state; rctx->context.create_depth_stencil_alpha_state = evergreen_create_dsa_state; diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 29e4d23..1717295 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -335,14 +335,6 @@ static const struct r600_reg r600_context_reg_list[] = { {R_028124_CB_CLEAR_GREEN, 0, 0}, {R_028128_CB_CLEAR_BLUE, 0, 0}, {R_02812C_CB_CLEAR_ALPHA, 0, 0}, - {R_028140_ALU_CONST_BUFFER_SIZE_PS_0, REG_FLAG_DIRTY_ALWAYS, 0}, - {R_028144_ALU_CONST_BUFFER_SIZE_PS_1, REG_FLAG_DIRTY_ALWAYS, 0}, - {R_028180_ALU_CONST_BUFFER_SIZE_VS_0, REG_FLAG_DIRTY_ALWAYS, 0}, - {R_028184_ALU_CONST_BUFFER_SIZE_VS_1, REG_FLAG_DIRTY_ALWAYS, 0}, - {R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, 0}, - {R_028944_ALU_CONST_CACHE_PS_1, REG_FLAG_NEED_BO, 0}, - {R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, 0}, - {R_028984_ALU_CONST_CACHE_VS_1, REG_FLAG_NEED_BO, 0}, {R_02823C_CB_SHADER_MASK, 0, 0}, {R_028238_CB_TARGET_MASK, 0, 0}, {R_028410_SX_ALPHA_TEST_CONTROL, 0, 0}, @@ -1235,6 +1227,11 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) r600_atom_dirty(ctx, &ctx->db_misc_state.atom); r600_atom_dirty(ctx, &ctx->vertex_buffer_state); + ctx->vs_constbuf_state.dirty_mask = ctx->vs_constbuf_state.enabled_mask; + ctx->ps_constbuf_state.dirty_mask = ctx->ps_constbuf_state.enabled_mask; + r600_constant_buffers_dirty(ctx, &ctx->vs_constbuf_state); + r600_constant_buffers_dirty(ctx, &ctx->ps_constbuf_state); + if (streamout_suspended) { ctx->streamout_start = TRUE; ctx->streamout_append_bitmask = ~0; diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 6de33cb..21bb535 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -225,6 +225,21 @@ struct r600_stencil_ref ubyte writemask[2]; }; +struct r600_constant_buffer +{ + struct pipe_resource *buffer; + unsigned buffer_offset; + unsigned buffer_size; +}; + +struct r600_constbuf_state +{ + struct r600_atom atom; + struct r600_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS]; + uint32_t enabled_mask; + uint32_t dirty_mask; +}; + struct r600_context { struct pipe_context context; struct blitter_context *blitter; @@ -248,10 +263,6 @@ struct r600_context { struct pipe_clip_state clip; struct r600_pipe_shader *ps_shader; struct r600_pipe_shader *vs_shader; - struct r600_pipe_state vs_const_buffer; - struct r600_pipe_resource_state vs_const_buffer_resource[R600_MAX_CONST_BUFFERS]; - struct r600_pipe_state ps_const_buffer; - struct r600_pipe_resource_state ps_const_buffer_resource[R600_MAX_CONST_BUFFERS]; struct r600_pipe_rasterizer *rasterizer; struct r600_pipe_state vgt; struct r600_pipe_state spi; @@ -282,9 +293,9 @@ struct r600_context { struct r600_atom r6xx_flush_and_inv_cmd; struct r600_db_misc_state db_misc_state; struct r600_atom vertex_buffer_state; + struct r600_constbuf_state vs_constbuf_state; + struct r600_constbuf_state ps_constbuf_state; - /* Below are variables from the old r600_context. - */ struct radeon_winsys_cs *cs; struct r600_range *range; @@ -488,6 +499,7 @@ void r600_bind_ps_shader(struct pipe_context *ctx, void *state); void r600_bind_vs_shader(struct pipe_context *ctx, void *state); void r600_delete_ps_shader(struct pipe_context *ctx, void *state); void r600_delete_vs_shader(struct pipe_context *ctx, void *state); +void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state); void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, struct pipe_resource *buffer); struct pipe_stream_output_target * diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 96df9cb..3a304d3 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1733,11 +1733,75 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom } } +static void r600_emit_constant_buffers(struct r600_context *rctx, + struct r600_constbuf_state *state, + unsigned buffer_id_base, + unsigned reg_alu_constbuf_size, + unsigned reg_alu_const_cache) +{ + struct radeon_winsys_cs *cs = rctx->cs; + uint32_t dirty_mask = state->dirty_mask; + + while (dirty_mask) { + struct r600_constant_buffer *cb; + struct r600_resource *rbuffer; + unsigned offset; + unsigned buffer_index = ffs(dirty_mask) - 1; + + cb = &state->cb[buffer_index]; + rbuffer = (struct r600_resource*)cb->buffer; + assert(rbuffer); + + offset = cb->buffer_offset; + + r600_write_context_reg(cs, reg_alu_constbuf_size + buffer_index * 4, + ALIGN_DIVUP(cb->buffer_size >> 4, 16)); + r600_write_context_reg(cs, reg_alu_const_cache + buffer_index * 4, offset >> 8); + + r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); + r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ)); + + r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 7, 0)); + r600_write_value(cs, (buffer_id_base + buffer_index) * 7); + r600_write_value(cs, offset); /* RESOURCEi_WORD0 */ + r600_write_value(cs, rbuffer->buf->size - offset - 1); /* RESOURCEi_WORD1 */ + r600_write_value(cs, /* RESOURCEi_WORD2 */ + S_038008_ENDIAN_SWAP(r600_endian_swap(32)) | + S_038008_STRIDE(16)); + r600_write_value(cs, 0); /* RESOURCEi_WORD3 */ + r600_write_value(cs, 0); /* RESOURCEi_WORD4 */ + r600_write_value(cs, 0); /* RESOURCEi_WORD5 */ + r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD6 */ + + r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); + r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ)); + + dirty_mask &= ~(1 << buffer_index); + } + state->dirty_mask = 0; +} + +static void r600_emit_vs_constant_buffer(struct r600_context *rctx, struct r600_atom *atom) +{ + r600_emit_constant_buffers(rctx, &rctx->vs_constbuf_state, 160, + R_028180_ALU_CONST_BUFFER_SIZE_VS_0, + R_028980_ALU_CONST_CACHE_VS_0); +} + +static void r600_emit_ps_constant_buffer(struct r600_context *rctx, struct r600_atom *atom) +{ + r600_emit_constant_buffers(rctx, &rctx->ps_constbuf_state, 0, + R_028140_ALU_CONST_BUFFER_SIZE_PS_0, + R_028940_ALU_CONST_CACHE_PS_0); +} + void r600_init_state_functions(struct r600_context *rctx) { r600_init_atom(&rctx->db_misc_state.atom, r600_emit_db_misc_state, 4, 0); r600_atom_dirty(rctx, &rctx->db_misc_state.atom); r600_init_atom(&rctx->vertex_buffer_state, r600_emit_vertex_buffers, 0, 0); + r600_init_atom(&rctx->vs_constbuf_state.atom, r600_emit_vs_constant_buffer, 0, 0); + r600_init_atom(&rctx->ps_constbuf_state.atom, r600_emit_ps_constant_buffer, 0, 0); rctx->context.create_blend_state = r600_create_blend_state; rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state; diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 61f59f7..3c93f49 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -519,87 +519,54 @@ static void r600_update_alpha_ref(struct r600_context *rctx) rctx->alpha_ref_dirty = false; } +void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state) +{ + state->atom.num_dw = rctx->chip_class >= EVERGREEN ? util_bitcount(state->dirty_mask)*20 + : util_bitcount(state->dirty_mask)*19; + r600_atom_dirty(rctx, &state->atom); +} + void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, struct pipe_resource *buffer) { struct r600_context *rctx = (struct r600_context *)ctx; struct r600_resource *rbuffer = r600_resource(buffer); - struct r600_pipe_resource_state *rstate; - uint64_t va_offset; + struct r600_constbuf_state *state; + struct r600_constant_buffer *cb; uint32_t offset; + switch (shader) { + case PIPE_SHADER_VERTEX: + state = &rctx->vs_constbuf_state; + break; + case PIPE_SHADER_FRAGMENT: + state = &rctx->ps_constbuf_state; + break; + default: + return; + } + /* Note that the state tracker can unbind constant buffers by * passing NULL here. */ if (buffer == NULL) { + state->enabled_mask &= ~(1 << index); + state->dirty_mask &= ~(1 << index); + pipe_resource_reference(&state->cb[index].buffer, NULL); return; } r600_inval_shader_cache(rctx); - r600_upload_const_buffer(rctx, &rbuffer, &offset); - va_offset = r600_resource_va(ctx->screen, (void*)rbuffer); - va_offset += offset; - va_offset >>= 8; - switch (shader) { - case PIPE_SHADER_VERTEX: - rctx->vs_const_buffer.nregs = 0; - r600_pipe_state_add_reg(&rctx->vs_const_buffer, - R_028180_ALU_CONST_BUFFER_SIZE_VS_0 + index * 4, - ALIGN_DIVUP(buffer->width0 >> 4, 16), - NULL, 0); - r600_pipe_state_add_reg(&rctx->vs_const_buffer, - R_028980_ALU_CONST_CACHE_VS_0 + index * 4, - va_offset, rbuffer, RADEON_USAGE_READ); - r600_context_pipe_state_set(rctx, &rctx->vs_const_buffer); - - rstate = &rctx->vs_const_buffer_resource[index]; - if (!rstate->id) { - if (rctx->chip_class >= EVERGREEN) { - evergreen_pipe_init_buffer_resource(rctx, rstate); - } else { - r600_pipe_init_buffer_resource(rctx, rstate); - } - } + cb = &state->cb[index]; + pipe_resource_reference(&cb->buffer, &rbuffer->b.b.b); + cb->buffer_offset = offset; + cb->buffer_size = buffer->width0; - if (rctx->chip_class >= EVERGREEN) { - evergreen_pipe_mod_buffer_resource(ctx, rstate, rbuffer, offset, 16, RADEON_USAGE_READ); - } else { - r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ); - } - r600_context_pipe_state_set_vs_resource(rctx, rstate, index); - break; - case PIPE_SHADER_FRAGMENT: - rctx->ps_const_buffer.nregs = 0; - r600_pipe_state_add_reg(&rctx->ps_const_buffer, - R_028140_ALU_CONST_BUFFER_SIZE_PS_0, - ALIGN_DIVUP(buffer->width0 >> 4, 16), - NULL, 0); - r600_pipe_state_add_reg(&rctx->ps_const_buffer, - R_028940_ALU_CONST_CACHE_PS_0, - va_offset, rbuffer, RADEON_USAGE_READ); - r600_context_pipe_state_set(rctx, &rctx->ps_const_buffer); - - rstate = &rctx->ps_const_buffer_resource[index]; - if (!rstate->id) { - if (rctx->chip_class >= EVERGREEN) { - evergreen_pipe_init_buffer_resource(rctx, rstate); - } else { - r600_pipe_init_buffer_resource(rctx, rstate); - } - } - if (rctx->chip_class >= EVERGREEN) { - evergreen_pipe_mod_buffer_resource(ctx, rstate, rbuffer, offset, 16, RADEON_USAGE_READ); - } else { - r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ); - } - r600_context_pipe_state_set_ps_resource(rctx, rstate, index); - break; - default: - R600_ERR("unsupported %d\n", shader); - return; - } + state->enabled_mask |= 1 << index; + state->dirty_mask |= 1 << index; + r600_constant_buffers_dirty(rctx, state); if (buffer != &rbuffer->b.b.b) pipe_resource_reference((struct pipe_resource**)&rbuffer, NULL);