From 384213cb5122361f1b4561b3d591e8c8fbef26ab Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 7 Dec 2014 15:52:15 +0100 Subject: [PATCH] radeonsi: emit draw packets directly into the CS MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Michel Dänzer --- src/gallium/drivers/radeonsi/si_pipe.h | 2 +- src/gallium/drivers/radeonsi/si_state.h | 1 - src/gallium/drivers/radeonsi/si_state_draw.c | 160 ++++++++++++++++----------- 3 files changed, 95 insertions(+), 68 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 5f5404d..5867e73 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -37,7 +37,7 @@ #define SI_TRACE_CS 0 #define SI_TRACE_CS_DWORDS 6 -#define SI_MAX_DRAW_CS_DWORDS 18 +#define SI_MAX_DRAW_CS_DWORDS 31 struct si_compute; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index b8d8040..be2e858 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -109,7 +109,6 @@ union si_state { struct si_pm4_state *ps; struct si_pm4_state *spi; struct si_pm4_state *draw_info; - struct si_pm4_state *draw; } named; struct si_pm4_state *array[0]; }; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 674b4a0..4ed3847 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -708,16 +708,13 @@ static void si_update_derived_state(struct si_context *sctx) } } -static void si_state_draw(struct si_context *sctx, - const struct pipe_draw_info *info, - const struct pipe_index_buffer *ib) +static void si_emit_draw_packets(struct si_context *sctx, + const struct pipe_draw_info *info, + const struct pipe_index_buffer *ib) { + struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; unsigned sh_base_reg = (sctx->gs_shader ? R_00B330_SPI_SHADER_USER_DATA_ES_0 : R_00B130_SPI_SHADER_USER_DATA_VS_0); - struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); - - if (pm4 == NULL) - return; if (info->count_from_stream_output) { struct r600_so_target *t = @@ -725,85 +722,116 @@ static void si_state_draw(struct si_context *sctx, uint64_t va = t->buf_filled_size->gpu_address + t->buf_filled_size_offset; - si_pm4_set_reg(pm4, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, - t->stride_in_dw); - - si_pm4_cmd_begin(pm4, PKT3_COPY_DATA); - si_pm4_cmd_add(pm4, - COPY_DATA_SRC_SEL(COPY_DATA_MEM) | - COPY_DATA_DST_SEL(COPY_DATA_REG) | - COPY_DATA_WR_CONFIRM); - si_pm4_cmd_add(pm4, va); /* src address lo */ - si_pm4_cmd_add(pm4, va >> 32UL); /* src address hi */ - si_pm4_cmd_add(pm4, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2); - si_pm4_cmd_add(pm4, 0); /* unused */ - si_pm4_add_bo(pm4, t->buf_filled_size, RADEON_USAGE_READ, - RADEON_PRIO_MIN); - si_pm4_cmd_end(pm4, true); + r600_write_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, + t->stride_in_dw); + + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | + COPY_DATA_DST_SEL(COPY_DATA_REG) | + COPY_DATA_WR_CONFIRM); + radeon_emit(cs, va); /* src address lo */ + radeon_emit(cs, va >> 32); /* src address hi */ + radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2); + radeon_emit(cs, 0); /* unused */ + + r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, + t->buf_filled_size, RADEON_USAGE_READ, + RADEON_PRIO_MIN); } /* draw packet */ - si_pm4_cmd_begin(pm4, PKT3_INDEX_TYPE); - if (ib->index_size == 4) { - si_pm4_cmd_add(pm4, V_028A7C_VGT_INDEX_32 | (SI_BIG_ENDIAN ? - V_028A7C_VGT_DMA_SWAP_32_BIT : 0)); - } else { - si_pm4_cmd_add(pm4, V_028A7C_VGT_INDEX_16 | (SI_BIG_ENDIAN ? - V_028A7C_VGT_DMA_SWAP_16_BIT : 0)); + if (info->indexed) { + radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0)); + + if (ib->index_size == 4) { + radeon_emit(cs, V_028A7C_VGT_INDEX_32 | (SI_BIG_ENDIAN ? + V_028A7C_VGT_DMA_SWAP_32_BIT : 0)); + } else { + radeon_emit(cs, V_028A7C_VGT_INDEX_16 | (SI_BIG_ENDIAN ? + V_028A7C_VGT_DMA_SWAP_16_BIT : 0)); + } } - si_pm4_cmd_end(pm4, sctx->b.predicate_drawing); if (!info->indirect) { - si_pm4_cmd_begin(pm4, PKT3_NUM_INSTANCES); - si_pm4_cmd_add(pm4, info->instance_count); - si_pm4_cmd_end(pm4, sctx->b.predicate_drawing); - - si_pm4_set_reg(pm4, sh_base_reg + SI_SGPR_BASE_VERTEX * 4, - info->indexed ? info->index_bias : info->start); - si_pm4_set_reg(pm4, sh_base_reg + SI_SGPR_START_INSTANCE * 4, - info->start_instance); + radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0)); + radeon_emit(cs, info->instance_count); + + si_write_sh_reg_seq(cs, sh_base_reg + SI_SGPR_BASE_VERTEX * 4, 2); + radeon_emit(cs, info->indexed ? info->index_bias : info->start); + radeon_emit(cs, info->start_instance); } else { - si_pm4_add_bo(pm4, (struct r600_resource *)info->indirect, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, + (struct r600_resource *)info->indirect, + RADEON_USAGE_READ, RADEON_PRIO_MIN); } if (info->indexed) { - uint32_t max_size = (ib->buffer->width0 - ib->offset) / - ib->index_size; - uint64_t va = r600_resource(ib->buffer)->gpu_address + ib->offset; + uint32_t index_max_size = (ib->buffer->width0 - ib->offset) / + ib->index_size; + uint64_t index_va = r600_resource(ib->buffer)->gpu_address + ib->offset; - si_pm4_add_bo(pm4, (struct r600_resource *)ib->buffer, RADEON_USAGE_READ, - RADEON_PRIO_MIN); + r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, + (struct r600_resource *)ib->buffer, + RADEON_USAGE_READ, RADEON_PRIO_MIN); if (info->indirect) { uint64_t indirect_va = r600_resource(info->indirect)->gpu_address; - si_cmd_draw_index_indirect(pm4, indirect_va, va, max_size, - info->indirect_offset, - sh_base_reg + SI_SGPR_BASE_VERTEX * 4, - sh_base_reg + SI_SGPR_START_INSTANCE * 4, - sctx->b.predicate_drawing); + + assert(indirect_va % 8 == 0); + assert(index_va % 2 == 0); + assert(info->indirect_offset % 4 == 0); + + radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0)); + radeon_emit(cs, 1); + radeon_emit(cs, indirect_va); + radeon_emit(cs, indirect_va >> 32); + + radeon_emit(cs, PKT3(PKT3_INDEX_BASE, 1, 0)); + radeon_emit(cs, index_va); + radeon_emit(cs, index_va >> 32); + + radeon_emit(cs, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0)); + radeon_emit(cs, index_max_size); + + radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_INDIRECT, 3, sctx->b.predicate_drawing)); + radeon_emit(cs, info->indirect_offset); + radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2); + radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2); + radeon_emit(cs, V_0287F0_DI_SRC_SEL_DMA); } else { - va += info->start * ib->index_size; - si_cmd_draw_index_2(pm4, max_size, va, info->count, - V_0287F0_DI_SRC_SEL_DMA, - sctx->b.predicate_drawing); + index_va += info->start * ib->index_size; + + radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_2, 4, sctx->b.predicate_drawing)); + radeon_emit(cs, index_max_size); + radeon_emit(cs, index_va); + radeon_emit(cs, (index_va >> 32UL) & 0xFF); + radeon_emit(cs, info->count); + radeon_emit(cs, V_0287F0_DI_SRC_SEL_DMA); } } else { if (info->indirect) { uint64_t indirect_va = r600_resource(info->indirect)->gpu_address; - si_cmd_draw_indirect(pm4, indirect_va, info->indirect_offset, - sh_base_reg + SI_SGPR_BASE_VERTEX * 4, - sh_base_reg + SI_SGPR_START_INSTANCE * 4, - sctx->b.predicate_drawing); + + assert(indirect_va % 8 == 0); + assert(info->indirect_offset % 4 == 0); + + radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0)); + radeon_emit(cs, 1); + radeon_emit(cs, indirect_va); + radeon_emit(cs, indirect_va >> 32); + + radeon_emit(cs, PKT3(PKT3_DRAW_INDIRECT, 3, sctx->b.predicate_drawing)); + radeon_emit(cs, info->indirect_offset); + radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2); + radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2); + radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX); } else { - si_cmd_draw_index_auto(pm4, info->count, - V_0287F0_DI_SRC_SEL_AUTO_INDEX | - S_0287F0_USE_OPAQUE(!!info->count_from_stream_output), - sctx->b.predicate_drawing); + radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, sctx->b.predicate_drawing)); + radeon_emit(cs, info->count); + radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX | + S_0287F0_USE_OPAQUE(!!info->count_from_stream_output)); } } - - si_pm4_set_state(sctx, draw, pm4); } void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *atom) @@ -988,8 +1016,6 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) if (!si_update_draw_info_state(sctx, info, &ib)) return; - si_state_draw(sctx, info, &ib); - sctx->pm4_dirty_cdwords += si_pm4_dirty_dw(sctx); /* Check flush flags. */ @@ -1009,6 +1035,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) si_pm4_emit_dirty(sctx); sctx->pm4_dirty_cdwords = 0; + si_emit_draw_packets(sctx, info, &ib); + #if SI_TRACE_CS if (sctx->screen->b.trace_bo) { si_trace_emit(sctx); -- 2.7.4