r600g: rework state emission of constant buffers
authorMarek Olšák <maraeo@gmail.com>
Sun, 1 Apr 2012 20:03:15 +0000 (22:03 +0200)
committerMarek Olšák <maraeo@gmail.com>
Wed, 4 Apr 2012 11:09:47 +0000 (13:09 +0200)
Framerate in ipers:
  before: 43.6 FPS
  after:  46.6 FPS

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
src/gallium/drivers/r600/evergreen_hw_context.c
src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/r600_hw_context.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_state.c
src/gallium/drivers/r600/r600_state_common.c

index 010b5f3..2ab29c9 100644 (file)
@@ -63,10 +63,6 @@ static const struct r600_reg evergreen_context_reg_list[] = {
        {GROUP_FORCE_NEW_BLOCK, 0, 0},
        {R_028058_DB_DEPTH_SIZE, 0, 0},
        {R_02805C_DB_DEPTH_SLICE, 0, 0},
-       {R_028140_ALU_CONST_BUFFER_SIZE_PS_0, REG_FLAG_DIRTY_ALWAYS, 0},
-       {R_028144_ALU_CONST_BUFFER_SIZE_PS_1, REG_FLAG_DIRTY_ALWAYS, 0},
-       {R_028180_ALU_CONST_BUFFER_SIZE_VS_0, REG_FLAG_DIRTY_ALWAYS, 0},
-       {R_028184_ALU_CONST_BUFFER_SIZE_VS_1, REG_FLAG_DIRTY_ALWAYS, 0},
        {R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0},
        {R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0},
        {R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0, 0},
@@ -193,10 +189,6 @@ static const struct r600_reg evergreen_context_reg_list[] = {
        {R_028860_SQ_PGM_RESOURCES_VS, 0, 0},
        {R_0288A4_SQ_PGM_START_FS, REG_FLAG_NEED_BO, 0},
        {R_0288EC_SQ_LDS_ALLOC_PS, 0, 0},
-       {R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, 0},
-       {R_028944_ALU_CONST_CACHE_PS_1, REG_FLAG_NEED_BO, 0},
-       {R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, 0},
-       {R_028984_ALU_CONST_CACHE_VS_1, REG_FLAG_NEED_BO, 0},
        {R_028A00_PA_SU_POINT_SIZE, 0, 0},
        {R_028A04_PA_SU_POINT_MINMAX, 0, 0},
        {R_028A08_PA_SU_LINE_CNTL, 0, 0},
@@ -331,10 +323,6 @@ static const struct r600_reg cayman_context_reg_list[] = {
        {GROUP_FORCE_NEW_BLOCK, 0, 0},
        {R_028058_DB_DEPTH_SIZE, 0, 0},
        {R_02805C_DB_DEPTH_SLICE, 0, 0},
-       {R_028140_ALU_CONST_BUFFER_SIZE_PS_0, REG_FLAG_DIRTY_ALWAYS, 0},
-       {R_028144_ALU_CONST_BUFFER_SIZE_PS_1, REG_FLAG_DIRTY_ALWAYS, 0},
-       {R_028180_ALU_CONST_BUFFER_SIZE_VS_0, REG_FLAG_DIRTY_ALWAYS, 0},
-       {R_028184_ALU_CONST_BUFFER_SIZE_VS_1, REG_FLAG_DIRTY_ALWAYS, 0},
        {R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0},
        {R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0},
        {R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0, 0},
@@ -468,10 +456,6 @@ static const struct r600_reg cayman_context_reg_list[] = {
        {R_028920_SQ_GS_VERT_ITEMSIZE_1, 0, 0},
        {R_028924_SQ_GS_VERT_ITEMSIZE_2, 0, 0},
        {R_028928_SQ_GS_VERT_ITEMSIZE_3, 0, 0},
-       {R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, 0},
-       {R_028944_ALU_CONST_CACHE_PS_1, REG_FLAG_NEED_BO, 0},
-       {R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, 0},
-       {R_028984_ALU_CONST_CACHE_VS_1, REG_FLAG_NEED_BO, 0},
        {R_028A00_PA_SU_POINT_SIZE, 0, 0},
        {R_028A04_PA_SU_POINT_MINMAX, 0, 0},
        {R_028A08_PA_SU_LINE_CNTL, 0, 0},
index 00d2d0d..e97f8e3 100644 (file)
@@ -1763,11 +1763,82 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx, struct r600
        }
 }
 
+static void evergreen_emit_constant_buffer(struct r600_context *rctx,
+                                          struct r600_constbuf_state *state,
+                                          unsigned buffer_id_base,
+                                          unsigned reg_alu_constbuf_size,
+                                          unsigned reg_alu_const_cache)
+{
+       struct radeon_winsys_cs *cs = rctx->cs;
+       uint32_t dirty_mask = state->dirty_mask;
+
+       while (dirty_mask) {
+               struct r600_constant_buffer *cb;
+               struct r600_resource *rbuffer;
+               uint64_t va;
+               unsigned buffer_index = ffs(dirty_mask) - 1;
+
+               cb = &state->cb[buffer_index];
+               rbuffer = (struct r600_resource*)cb->buffer;
+               assert(rbuffer);
+
+               va = r600_resource_va(&rctx->screen->screen, &rbuffer->b.b.b);
+               va += cb->buffer_offset;
+
+               r600_write_context_reg(cs, reg_alu_constbuf_size + buffer_index * 4,
+                                      ALIGN_DIVUP(cb->buffer_size >> 4, 16));
+               r600_write_context_reg(cs, reg_alu_const_cache + buffer_index * 4, va >> 8);
+
+               r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
+               r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
+
+               r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 8, 0));
+               r600_write_value(cs, (buffer_id_base + buffer_index) * 8);
+               r600_write_value(cs, va); /* RESOURCEi_WORD0 */
+               r600_write_value(cs, rbuffer->buf->size - cb->buffer_offset - 1); /* RESOURCEi_WORD1 */
+               r600_write_value(cs, /* RESOURCEi_WORD2 */
+                                S_030008_ENDIAN_SWAP(r600_endian_swap(32)) |
+                                S_030008_STRIDE(16) |
+                                S_030008_BASE_ADDRESS_HI(va >> 32UL));
+               r600_write_value(cs, /* RESOURCEi_WORD3 */
+                                S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) |
+                                S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) |
+                                S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) |
+                                S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W));
+               r600_write_value(cs, 0); /* RESOURCEi_WORD4 */
+               r600_write_value(cs, 0); /* RESOURCEi_WORD5 */
+               r600_write_value(cs, 0); /* RESOURCEi_WORD6 */
+               r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD7 */
+
+               r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
+               r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
+
+               dirty_mask &= ~(1 << buffer_index);
+       }
+       state->dirty_mask = 0;
+}
+
+static void evergreen_emit_vs_constant_buffer(struct r600_context *rctx, struct r600_atom *atom)
+{
+       evergreen_emit_constant_buffer(rctx, &rctx->vs_constbuf_state, 176,
+                                      R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
+                                      R_028980_ALU_CONST_CACHE_VS_0);
+}
+
+static void evergreen_emit_ps_constant_buffer(struct r600_context *rctx, struct r600_atom *atom)
+{
+       evergreen_emit_constant_buffer(rctx, &rctx->ps_constbuf_state, 0,
+                                      R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
+                                      R_028940_ALU_CONST_CACHE_PS_0);
+}
+
 void evergreen_init_state_functions(struct r600_context *rctx)
 {
        r600_init_atom(&rctx->db_misc_state.atom, evergreen_emit_db_misc_state, 6, 0);
        r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
        r600_init_atom(&rctx->vertex_buffer_state, evergreen_emit_vertex_buffers, 0, 0);
+       r600_init_atom(&rctx->vs_constbuf_state.atom, evergreen_emit_vs_constant_buffer, 0, 0);
+       r600_init_atom(&rctx->ps_constbuf_state.atom, evergreen_emit_ps_constant_buffer, 0, 0);
 
        rctx->context.create_blend_state = evergreen_create_blend_state;
        rctx->context.create_depth_stencil_alpha_state = evergreen_create_dsa_state;
index 29e4d23..1717295 100644 (file)
@@ -335,14 +335,6 @@ static const struct r600_reg r600_context_reg_list[] = {
        {R_028124_CB_CLEAR_GREEN, 0, 0},
        {R_028128_CB_CLEAR_BLUE, 0, 0},
        {R_02812C_CB_CLEAR_ALPHA, 0, 0},
-       {R_028140_ALU_CONST_BUFFER_SIZE_PS_0, REG_FLAG_DIRTY_ALWAYS, 0},
-       {R_028144_ALU_CONST_BUFFER_SIZE_PS_1, REG_FLAG_DIRTY_ALWAYS, 0},
-       {R_028180_ALU_CONST_BUFFER_SIZE_VS_0, REG_FLAG_DIRTY_ALWAYS, 0},
-       {R_028184_ALU_CONST_BUFFER_SIZE_VS_1, REG_FLAG_DIRTY_ALWAYS, 0},
-       {R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, 0},
-       {R_028944_ALU_CONST_CACHE_PS_1, REG_FLAG_NEED_BO, 0},
-       {R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, 0},
-       {R_028984_ALU_CONST_CACHE_VS_1, REG_FLAG_NEED_BO, 0},
        {R_02823C_CB_SHADER_MASK, 0, 0},
        {R_028238_CB_TARGET_MASK, 0, 0},
        {R_028410_SX_ALPHA_TEST_CONTROL, 0, 0},
@@ -1235,6 +1227,11 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
        r600_atom_dirty(ctx, &ctx->db_misc_state.atom);
        r600_atom_dirty(ctx, &ctx->vertex_buffer_state);
 
+       ctx->vs_constbuf_state.dirty_mask = ctx->vs_constbuf_state.enabled_mask;
+       ctx->ps_constbuf_state.dirty_mask = ctx->ps_constbuf_state.enabled_mask;
+       r600_constant_buffers_dirty(ctx, &ctx->vs_constbuf_state);
+       r600_constant_buffers_dirty(ctx, &ctx->ps_constbuf_state);
+
        if (streamout_suspended) {
                ctx->streamout_start = TRUE;
                ctx->streamout_append_bitmask = ~0;
index 6de33cb..21bb535 100644 (file)
@@ -225,6 +225,21 @@ struct r600_stencil_ref
        ubyte writemask[2];
 };
 
+struct r600_constant_buffer
+{
+       struct pipe_resource            *buffer;
+       unsigned                        buffer_offset;
+       unsigned                        buffer_size;
+};
+
+struct r600_constbuf_state
+{
+       struct r600_atom                atom;
+       struct r600_constant_buffer     cb[PIPE_MAX_CONSTANT_BUFFERS];
+       uint32_t                        enabled_mask;
+       uint32_t                        dirty_mask;
+};
+
 struct r600_context {
        struct pipe_context             context;
        struct blitter_context          *blitter;
@@ -248,10 +263,6 @@ struct r600_context {
        struct pipe_clip_state          clip;
        struct r600_pipe_shader         *ps_shader;
        struct r600_pipe_shader         *vs_shader;
-       struct r600_pipe_state          vs_const_buffer;
-       struct r600_pipe_resource_state         vs_const_buffer_resource[R600_MAX_CONST_BUFFERS];
-       struct r600_pipe_state          ps_const_buffer;
-       struct r600_pipe_resource_state         ps_const_buffer_resource[R600_MAX_CONST_BUFFERS];
        struct r600_pipe_rasterizer     *rasterizer;
        struct r600_pipe_state          vgt;
        struct r600_pipe_state          spi;
@@ -282,9 +293,9 @@ struct r600_context {
        struct r600_atom                r6xx_flush_and_inv_cmd;
        struct r600_db_misc_state       db_misc_state;
        struct r600_atom                vertex_buffer_state;
+       struct r600_constbuf_state      vs_constbuf_state;
+       struct r600_constbuf_state      ps_constbuf_state;
 
-       /* Below are variables from the old r600_context.
-        */
        struct radeon_winsys_cs *cs;
 
        struct r600_range       *range;
@@ -488,6 +499,7 @@ void r600_bind_ps_shader(struct pipe_context *ctx, void *state);
 void r600_bind_vs_shader(struct pipe_context *ctx, void *state);
 void r600_delete_ps_shader(struct pipe_context *ctx, void *state);
 void r600_delete_vs_shader(struct pipe_context *ctx, void *state);
+void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state);
 void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
                              struct pipe_resource *buffer);
 struct pipe_stream_output_target *
index 96df9cb..3a304d3 100644 (file)
@@ -1733,11 +1733,75 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom
        }
 }
 
+static void r600_emit_constant_buffers(struct r600_context *rctx,
+                                      struct r600_constbuf_state *state,
+                                      unsigned buffer_id_base,
+                                      unsigned reg_alu_constbuf_size,
+                                      unsigned reg_alu_const_cache)
+{
+       struct radeon_winsys_cs *cs = rctx->cs;
+       uint32_t dirty_mask = state->dirty_mask;
+
+       while (dirty_mask) {
+               struct r600_constant_buffer *cb;
+               struct r600_resource *rbuffer;
+               unsigned offset;
+               unsigned buffer_index = ffs(dirty_mask) - 1;
+
+               cb = &state->cb[buffer_index];
+               rbuffer = (struct r600_resource*)cb->buffer;
+               assert(rbuffer);
+
+               offset = cb->buffer_offset;
+
+               r600_write_context_reg(cs, reg_alu_constbuf_size + buffer_index * 4,
+                                      ALIGN_DIVUP(cb->buffer_size >> 4, 16));
+               r600_write_context_reg(cs, reg_alu_const_cache + buffer_index * 4, offset >> 8);
+
+               r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
+               r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
+
+               r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
+               r600_write_value(cs, (buffer_id_base + buffer_index) * 7);
+               r600_write_value(cs, offset); /* RESOURCEi_WORD0 */
+               r600_write_value(cs, rbuffer->buf->size - offset - 1); /* RESOURCEi_WORD1 */
+               r600_write_value(cs, /* RESOURCEi_WORD2 */
+                                S_038008_ENDIAN_SWAP(r600_endian_swap(32)) |
+                                S_038008_STRIDE(16));
+               r600_write_value(cs, 0); /* RESOURCEi_WORD3 */
+               r600_write_value(cs, 0); /* RESOURCEi_WORD4 */
+               r600_write_value(cs, 0); /* RESOURCEi_WORD5 */
+               r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD6 */
+
+               r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
+               r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
+
+               dirty_mask &= ~(1 << buffer_index);
+       }
+       state->dirty_mask = 0;
+}
+
+static void r600_emit_vs_constant_buffer(struct r600_context *rctx, struct r600_atom *atom)
+{
+       r600_emit_constant_buffers(rctx, &rctx->vs_constbuf_state, 160,
+                                  R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
+                                  R_028980_ALU_CONST_CACHE_VS_0);
+}
+
+static void r600_emit_ps_constant_buffer(struct r600_context *rctx, struct r600_atom *atom)
+{
+       r600_emit_constant_buffers(rctx, &rctx->ps_constbuf_state, 0,
+                                  R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
+                                  R_028940_ALU_CONST_CACHE_PS_0);
+}
+
 void r600_init_state_functions(struct r600_context *rctx)
 {
        r600_init_atom(&rctx->db_misc_state.atom, r600_emit_db_misc_state, 4, 0);
        r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
        r600_init_atom(&rctx->vertex_buffer_state, r600_emit_vertex_buffers, 0, 0);
+       r600_init_atom(&rctx->vs_constbuf_state.atom, r600_emit_vs_constant_buffer, 0, 0);
+       r600_init_atom(&rctx->ps_constbuf_state.atom, r600_emit_ps_constant_buffer, 0, 0);
 
        rctx->context.create_blend_state = r600_create_blend_state;
        rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state;
index 61f59f7..3c93f49 100644 (file)
@@ -519,87 +519,54 @@ static void r600_update_alpha_ref(struct r600_context *rctx)
        rctx->alpha_ref_dirty = false;
 }
 
+void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state)
+{
+       state->atom.num_dw = rctx->chip_class >= EVERGREEN ? util_bitcount(state->dirty_mask)*20
+                                                          : util_bitcount(state->dirty_mask)*19;
+       r600_atom_dirty(rctx, &state->atom);
+}
+
 void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
                              struct pipe_resource *buffer)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
        struct r600_resource *rbuffer = r600_resource(buffer);
-       struct r600_pipe_resource_state *rstate;
-       uint64_t va_offset;
+       struct r600_constbuf_state *state;
+       struct r600_constant_buffer *cb;
        uint32_t offset;
 
+       switch (shader) {
+       case PIPE_SHADER_VERTEX:
+               state = &rctx->vs_constbuf_state;
+               break;
+       case PIPE_SHADER_FRAGMENT:
+               state = &rctx->ps_constbuf_state;
+               break;
+       default:
+               return;
+       }
+
        /* Note that the state tracker can unbind constant buffers by
         * passing NULL here.
         */
        if (buffer == NULL) {
+               state->enabled_mask &= ~(1 << index);
+               state->dirty_mask &= ~(1 << index);
+               pipe_resource_reference(&state->cb[index].buffer, NULL);
                return;
        }
 
        r600_inval_shader_cache(rctx);
-
        r600_upload_const_buffer(rctx, &rbuffer, &offset);
-       va_offset = r600_resource_va(ctx->screen, (void*)rbuffer);
-       va_offset += offset;
-       va_offset >>= 8;
 
-       switch (shader) {
-       case PIPE_SHADER_VERTEX:
-               rctx->vs_const_buffer.nregs = 0;
-               r600_pipe_state_add_reg(&rctx->vs_const_buffer,
-                                       R_028180_ALU_CONST_BUFFER_SIZE_VS_0 + index * 4,
-                                       ALIGN_DIVUP(buffer->width0 >> 4, 16),
-                                       NULL, 0);
-               r600_pipe_state_add_reg(&rctx->vs_const_buffer,
-                                       R_028980_ALU_CONST_CACHE_VS_0 + index * 4,
-                                       va_offset, rbuffer, RADEON_USAGE_READ);
-               r600_context_pipe_state_set(rctx, &rctx->vs_const_buffer);
-
-               rstate = &rctx->vs_const_buffer_resource[index];
-               if (!rstate->id) {
-                       if (rctx->chip_class >= EVERGREEN) {
-                               evergreen_pipe_init_buffer_resource(rctx, rstate);
-                       } else {
-                               r600_pipe_init_buffer_resource(rctx, rstate);
-                       }
-               }
+       cb = &state->cb[index];
+       pipe_resource_reference(&cb->buffer, &rbuffer->b.b.b);
+       cb->buffer_offset = offset;
+       cb->buffer_size = buffer->width0;
 
-               if (rctx->chip_class >= EVERGREEN) {
-                       evergreen_pipe_mod_buffer_resource(ctx, rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
-               } else {
-                       r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
-               }
-               r600_context_pipe_state_set_vs_resource(rctx, rstate, index);
-               break;
-       case PIPE_SHADER_FRAGMENT:
-               rctx->ps_const_buffer.nregs = 0;
-               r600_pipe_state_add_reg(&rctx->ps_const_buffer,
-                                       R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
-                                       ALIGN_DIVUP(buffer->width0 >> 4, 16),
-                                       NULL, 0);
-               r600_pipe_state_add_reg(&rctx->ps_const_buffer,
-                                       R_028940_ALU_CONST_CACHE_PS_0,
-                                       va_offset, rbuffer, RADEON_USAGE_READ);
-               r600_context_pipe_state_set(rctx, &rctx->ps_const_buffer);
-
-               rstate = &rctx->ps_const_buffer_resource[index];
-               if (!rstate->id) {
-                       if (rctx->chip_class >= EVERGREEN) {
-                               evergreen_pipe_init_buffer_resource(rctx, rstate);
-                       } else {
-                               r600_pipe_init_buffer_resource(rctx, rstate);
-                       }
-               }
-               if (rctx->chip_class >= EVERGREEN) {
-                       evergreen_pipe_mod_buffer_resource(ctx, rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
-               } else {
-                       r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
-               }
-               r600_context_pipe_state_set_ps_resource(rctx, rstate, index);
-               break;
-       default:
-               R600_ERR("unsupported %d\n", shader);
-               return;
-       }
+       state->enabled_mask |= 1 << index;
+       state->dirty_mask |= 1 << index;
+       r600_constant_buffers_dirty(rctx, state);
 
        if (buffer != &rbuffer->b.b.b)
                pipe_resource_reference((struct pipe_resource**)&rbuffer, NULL);