From 8a97528b3a97a430a887e9044b938b349585f4ab Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 28 Aug 2015 21:48:37 +0200 Subject: [PATCH] radeonsi: optimize viewport states MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit same as scissors Reviewed-by: Alex Deucher Acked-by: Christian König --- src/gallium/drivers/radeonsi/si_blit.c | 4 +- src/gallium/drivers/radeonsi/si_hw_context.c | 2 + src/gallium/drivers/radeonsi/si_pipe.h | 8 ++++ src/gallium/drivers/radeonsi/si_state.c | 58 +++++++++++++++++-------- src/gallium/drivers/radeonsi/si_state.h | 6 --- src/gallium/drivers/radeonsi/si_state_shaders.c | 2 + 6 files changed, 54 insertions(+), 26 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index b2f342f..c28b2a8 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -65,9 +65,7 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op) util_blitter_save_sample_mask(sctx->blitter, sctx->queued.named.sample_mask->sample_mask); } - if (sctx->queued.named.viewport[0]) { - util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport[0]->viewport); - } + util_blitter_save_viewport(sctx->blitter, &sctx->viewports.states[0]); util_blitter_save_scissor(sctx->blitter, &sctx->scissors.states[0]); util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer); util_blitter_save_so_targets(sctx->blitter, sctx->b.streamout.num_targets, diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index 873a472..8284306 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -195,7 +195,9 @@ void si_begin_new_cs(struct si_context *ctx) si_all_descriptors_begin_new_cs(ctx); ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; + ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; si_mark_atom_dirty(ctx, &ctx->scissors.atom); + si_mark_atom_dirty(ctx, &ctx->viewports.atom); r600_postflush_resume_features(&ctx->b); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 9060f94..a1845ba 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -135,6 +135,12 @@ struct si_scissors { struct pipe_scissor_state states[SI_MAX_VIEWPORTS]; }; +struct si_viewports { + struct r600_atom atom; + unsigned dirty_mask; + struct pipe_viewport_state states[SI_MAX_VIEWPORTS]; +}; + #define SI_NUM_ATOMS(sctx) (sizeof((sctx)->atoms)/sizeof((sctx)->atoms.array[0])) struct si_context { @@ -163,6 +169,7 @@ struct si_context { struct r600_atom *clip_regs; struct r600_atom *shader_userdata; struct r600_atom *scissors; + struct r600_atom *viewports; } s; struct r600_atom *array[0]; } atoms; @@ -191,6 +198,7 @@ struct si_context { unsigned border_color_offset; struct si_scissors scissors; + struct si_viewports viewports; struct r600_atom clip_regs; struct r600_atom msaa_sample_locs; struct r600_atom msaa_config; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 8bd35a8..940aaa0 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -578,29 +578,52 @@ static void si_set_viewport_states(struct pipe_context *ctx, const struct pipe_viewport_state *state) { struct si_context *sctx = (struct si_context *)ctx; - struct si_state_viewport *viewport; - struct si_pm4_state *pm4; int i; - for (i = start_slot; i < start_slot + num_viewports; i++) { - int idx = i - start_slot; - int offset = i * 4 * 6; + for (i = 0; i < num_viewports; i++) + sctx->viewports.states[start_slot + i] = state[i]; - viewport = CALLOC_STRUCT(si_state_viewport); - if (!viewport) - return; - pm4 = &viewport->pm4; + sctx->viewports.dirty_mask |= ((1 << num_viewports) - 1) << start_slot; + si_mark_atom_dirty(sctx, &sctx->viewports.atom); +} + +static void si_emit_viewports(struct si_context *sctx, struct r600_atom *atom) +{ + struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; + struct pipe_viewport_state *states = sctx->viewports.states; + unsigned mask = sctx->viewports.dirty_mask; + + /* The simple case: Only 1 viewport is active. */ + if (mask & 1 && + !si_get_vs_info(sctx)->writes_viewport_index) { + r600_write_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6); + radeon_emit(cs, fui(states[0].scale[0])); + radeon_emit(cs, fui(states[0].translate[0])); + radeon_emit(cs, fui(states[0].scale[1])); + radeon_emit(cs, fui(states[0].translate[1])); + radeon_emit(cs, fui(states[0].scale[2])); + radeon_emit(cs, fui(states[0].translate[2])); + sctx->viewports.dirty_mask &= ~1; /* clear one bit */ + return; + } - viewport->viewport = state[idx]; - si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE + offset, fui(state[idx].scale[0])); - si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET + offset, fui(state[idx].translate[0])); - si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE + offset, fui(state[idx].scale[1])); - si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET + offset, fui(state[idx].translate[1])); - si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE + offset, fui(state[idx].scale[2])); - si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET + offset, fui(state[idx].translate[2])); + while (mask) { + int start, count, i; - si_pm4_set_state(sctx, viewport[i], viewport); + u_bit_scan_consecutive_range(&mask, &start, &count); + + r600_write_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE + + start * 4 * 6, count * 6); + for (i = start; i < start+count; i++) { + radeon_emit(cs, fui(states[i].scale[0])); + radeon_emit(cs, fui(states[i].translate[0])); + radeon_emit(cs, fui(states[i].scale[1])); + radeon_emit(cs, fui(states[i].translate[1])); + radeon_emit(cs, fui(states[i].scale[2])); + radeon_emit(cs, fui(states[i].translate[2])); + } } + sctx->viewports.dirty_mask = 0; } /* @@ -3011,6 +3034,7 @@ void si_init_state_functions(struct si_context *sctx) si_init_atom(&sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10); si_init_atom(&sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs, 6); si_init_atom(&sctx->scissors.atom, &sctx->atoms.s.scissors, si_emit_scissors, 16*4); + si_init_atom(&sctx->viewports.atom, &sctx->atoms.s.viewports, si_emit_viewports, 16*8); sctx->b.b.create_blend_state = si_create_blend_state; sctx->b.b.bind_blend_state = si_bind_blend_state; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 34dbba4..3214783 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -48,11 +48,6 @@ struct si_state_sample_mask { uint16_t sample_mask; }; -struct si_state_viewport { - struct si_pm4_state pm4; - struct pipe_viewport_state viewport; -}; - struct si_state_rasterizer { struct si_pm4_state pm4; bool flatshade; @@ -91,7 +86,6 @@ union si_state { struct si_pm4_state *blend_color; struct si_pm4_state *clip; struct si_state_sample_mask *sample_mask; - struct si_state_viewport *viewport[16]; struct si_state_rasterizer *rasterizer; struct si_state_dsa *dsa; struct si_pm4_state *fb_rs; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 5a9ef29..4ca9aa5 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -775,6 +775,8 @@ static void si_update_viewports_and_scissors(struct si_context *sctx) if (sctx->scissors.dirty_mask) si_mark_atom_dirty(sctx, &sctx->scissors.atom); + if (sctx->viewports.dirty_mask) + si_mark_atom_dirty(sctx, &sctx->viewports.atom); } static void si_bind_vs_shader(struct pipe_context *ctx, void *state) -- 2.7.4