From f6a10f60b75821c20ce7cf338b519b92ed0330fc Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 28 Aug 2015 21:08:49 +0200 Subject: [PATCH] radeonsi: optimize scissor states MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit - convert 16 states to 1 atom - only emit 1 scissor if VIEWPORT_INDEX isn't written - use only one packet when emitting consecutive scissors Reviewed-by: Alex Deucher Acked-by: Christian König --- src/gallium/drivers/radeonsi/si_blit.c | 4 +- src/gallium/drivers/radeonsi/si_hw_context.c | 3 ++ src/gallium/drivers/radeonsi/si_pipe.c | 2 +- src/gallium/drivers/radeonsi/si_pipe.h | 10 +++++ src/gallium/drivers/radeonsi/si_shader.h | 4 +- src/gallium/drivers/radeonsi/si_state.c | 57 ++++++++++++++++++------- src/gallium/drivers/radeonsi/si_state.h | 6 --- src/gallium/drivers/radeonsi/si_state_shaders.c | 20 +++++++++ 8 files changed, 79 insertions(+), 27 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index b7450b6..b2f342f 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -68,9 +68,7 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op) if (sctx->queued.named.viewport[0]) { util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport[0]->viewport); } - if (sctx->queued.named.scissor[0]) { - util_blitter_save_scissor(sctx->blitter, &sctx->queued.named.scissor[0]->scissor); - } + util_blitter_save_scissor(sctx->blitter, &sctx->scissors.states[0]); util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer); util_blitter_save_so_targets(sctx->blitter, sctx->b.streamout.num_targets, (struct pipe_stream_output_target**)sctx->b.streamout.targets); diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index 563251d..873a472 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -194,6 +194,9 @@ void si_begin_new_cs(struct si_context *ctx) si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom); si_all_descriptors_begin_new_cs(ctx); + ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; + si_mark_atom_dirty(ctx, &ctx->scissors.atom); + r600_postflush_resume_features(&ctx->b); ctx->b.initial_gfx_cs_size = ctx->b.rings.gfx.cs->cdw; diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 92c6ae3..330b946 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -369,7 +369,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return 8; case PIPE_CAP_MAX_VIEWPORTS: - return 16; + return SI_MAX_VIEWPORTS; /* Timer queries, present when the clock frequency is non zero. */ case PIPE_CAP_QUERY_TIMESTAMP: diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 52167f2..9060f94 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -85,6 +85,8 @@ #define SI_IS_TRACE_POINT(x) (((x) & 0xcafe0000) == 0xcafe0000) #define SI_GET_TRACE_POINT_ID(x) ((x) & 0xffff) +#define SI_MAX_VIEWPORTS 16 + struct si_compute; struct si_screen { @@ -127,6 +129,12 @@ struct si_framebuffer { unsigned export_16bpc; }; +struct si_scissors { + struct r600_atom atom; + unsigned dirty_mask; + struct pipe_scissor_state states[SI_MAX_VIEWPORTS]; +}; + #define SI_NUM_ATOMS(sctx) (sizeof((sctx)->atoms)/sizeof((sctx)->atoms.array[0])) struct si_context { @@ -154,6 +162,7 @@ struct si_context { struct r600_atom *msaa_config; struct r600_atom *clip_regs; struct r600_atom *shader_userdata; + struct r600_atom *scissors; } s; struct r600_atom *array[0]; } atoms; @@ -181,6 +190,7 @@ struct si_context { struct r600_resource *border_color_table; unsigned border_color_offset; + struct si_scissors scissors; struct r600_atom clip_regs; struct r600_atom msaa_sample_locs; struct r600_atom msaa_config; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index ad32473..c748f71 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -278,8 +278,10 @@ static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx) return &sctx->gs_shader->info; else if (sctx->tes_shader) return &sctx->tes_shader->info; - else + else if (sctx->vs_shader) return &sctx->vs_shader->info; + else + return NULL; } static inline struct si_shader* si_get_vs_state(struct si_context *sctx) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 62eda97..8bd35a8 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -526,26 +526,50 @@ static void si_set_scissor_states(struct pipe_context *ctx, const struct pipe_scissor_state *state) { struct si_context *sctx = (struct si_context *)ctx; - struct si_state_scissor *scissor; - struct si_pm4_state *pm4; int i; - for (i = start_slot; i < start_slot + num_scissors; i++) { - int idx = i - start_slot; - int offset = i * 4 * 2; + for (i = 0; i < num_scissors; i++) + sctx->scissors.states[start_slot + i] = state[i]; - scissor = CALLOC_STRUCT(si_state_scissor); - if (scissor == NULL) - return; - pm4 = &scissor->pm4; - scissor->scissor = state[idx]; - si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset, - S_028250_TL_X(state[idx].minx) | S_028250_TL_Y(state[idx].miny) | - S_028250_WINDOW_OFFSET_DISABLE(1)); - si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR + offset, - S_028254_BR_X(state[idx].maxx) | S_028254_BR_Y(state[idx].maxy)); - si_pm4_set_state(sctx, scissor[i], scissor); + sctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot; + si_mark_atom_dirty(sctx, &sctx->scissors.atom); +} + +static void si_emit_scissors(struct si_context *sctx, struct r600_atom *atom) +{ + struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; + struct pipe_scissor_state *states = sctx->scissors.states; + unsigned mask = sctx->scissors.dirty_mask; + + /* The simple case: Only 1 viewport is active. */ + if (mask & 1 && + !si_get_vs_info(sctx)->writes_viewport_index) { + r600_write_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2); + radeon_emit(cs, S_028250_TL_X(states[0].minx) | + S_028250_TL_Y(states[0].miny) | + S_028250_WINDOW_OFFSET_DISABLE(1)); + radeon_emit(cs, S_028254_BR_X(states[0].maxx) | + S_028254_BR_Y(states[0].maxy)); + sctx->scissors.dirty_mask &= ~1; /* clear one bit */ + return; + } + + while (mask) { + int start, count, i; + + u_bit_scan_consecutive_range(&mask, &start, &count); + + r600_write_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + + start * 4 * 2, count * 2); + for (i = start; i < start+count; i++) { + radeon_emit(cs, S_028250_TL_X(states[i].minx) | + S_028250_TL_Y(states[i].miny) | + S_028250_WINDOW_OFFSET_DISABLE(1)); + radeon_emit(cs, S_028254_BR_X(states[i].maxx) | + S_028254_BR_Y(states[i].maxy)); + } } + sctx->scissors.dirty_mask = 0; } static void si_set_viewport_states(struct pipe_context *ctx, @@ -2986,6 +3010,7 @@ void si_init_state_functions(struct si_context *sctx) si_init_atom(&sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0); si_init_atom(&sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10); si_init_atom(&sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs, 6); + si_init_atom(&sctx->scissors.atom, &sctx->atoms.s.scissors, si_emit_scissors, 16*4); sctx->b.b.create_blend_state = si_create_blend_state; sctx->b.b.bind_blend_state = si_bind_blend_state; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index bf713c4..34dbba4 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -48,11 +48,6 @@ struct si_state_sample_mask { uint16_t sample_mask; }; -struct si_state_scissor { - struct si_pm4_state pm4; - struct pipe_scissor_state scissor; -}; - struct si_state_viewport { struct si_pm4_state pm4; struct pipe_viewport_state viewport; @@ -96,7 +91,6 @@ union si_state { struct si_pm4_state *blend_color; struct si_pm4_state *clip; struct si_state_sample_mask *sample_mask; - struct si_state_scissor *scissor[16]; struct si_state_viewport *viewport[16]; struct si_state_rasterizer *rasterizer; struct si_state_dsa *dsa; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index b223e06..5a9ef29 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -760,6 +760,23 @@ static void *si_create_tes_state(struct pipe_context *ctx, return si_create_shader_state(ctx, state, PIPE_SHADER_TESS_EVAL); } +/** + * Normally, we only emit 1 viewport and 1 scissor if no shader is using + * the VIEWPORT_INDEX output, and emitting the other viewports and scissors + * is delayed. When a shader with VIEWPORT_INDEX appears, this should be + * called to emit the rest. + */ +static void si_update_viewports_and_scissors(struct si_context *sctx) +{ + struct tgsi_shader_info *info = si_get_vs_info(sctx); + + if (!info || !info->writes_viewport_index) + return; + + if (sctx->scissors.dirty_mask) + si_mark_atom_dirty(sctx, &sctx->scissors.atom); +} + static void si_bind_vs_shader(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; @@ -770,6 +787,7 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state) sctx->vs_shader = sel; si_mark_atom_dirty(sctx, &sctx->clip_regs); + si_update_viewports_and_scissors(sctx); } static void si_bind_gs_shader(struct pipe_context *ctx, void *state) @@ -787,6 +805,7 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state) if (enable_changed) si_shader_change_notify(sctx); + si_update_viewports_and_scissors(sctx); } static void si_bind_tcs_shader(struct pipe_context *ctx, void *state) @@ -821,6 +840,7 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state) si_shader_change_notify(sctx); sctx->last_tes_sh_base = -1; /* invalidate derived tess state */ } + si_update_viewports_and_scissors(sctx); } static void si_make_dummy_ps(struct si_context *sctx) -- 2.7.4