From 1a24f443b492972eec8f01ffb36d0ae300acd7c8 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 10 Dec 2015 01:37:39 +0100 Subject: [PATCH] radeonsi: implement fast stencil clear Reviewed-by: Alex Deucher --- src/gallium/drivers/radeon/r600_pipe_common.h | 2 ++ src/gallium/drivers/radeonsi/si_blit.c | 46 ++++++++++++++++++++------- src/gallium/drivers/radeonsi/si_pipe.h | 2 ++ src/gallium/drivers/radeonsi/si_state.c | 26 ++++++++------- 4 files changed, 53 insertions(+), 23 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index dd23ed5..8fbbe88 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -222,6 +222,8 @@ struct r600_texture { struct r600_resource *htile_buffer; bool depth_cleared; /* if it was cleared at least once */ float depth_clear_value; + bool stencil_cleared; /* if it was cleared at least once */ + uint8_t stencil_clear_value; bool non_disp_tiling; /* R600-Cayman only */ }; diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 13d8e6f..75a9d56 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -377,22 +377,39 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers, } } - if (buffers & PIPE_CLEAR_DEPTH && - zstex && zstex->htile_buffer && + if (zstex && zstex->htile_buffer && zsbuf->u.tex.level == 0 && zsbuf->u.tex.first_layer == 0 && zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) { - /* Need to disable EXPCLEAR temporarily if clearing - * to a new value. */ - if (zstex->depth_cleared && zstex->depth_clear_value != depth) { - sctx->db_depth_disable_expclear = true; + if (buffers & PIPE_CLEAR_DEPTH) { + /* Need to disable EXPCLEAR temporarily if clearing + * to a new value. */ + if (zstex->depth_cleared && zstex->depth_clear_value != depth) { + sctx->db_depth_disable_expclear = true; + } + + zstex->depth_clear_value = depth; + sctx->framebuffer.dirty_zsbuf = true; + si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */ + sctx->db_depth_clear = true; + si_mark_atom_dirty(sctx, &sctx->db_render_state); } - zstex->depth_clear_value = depth; - sctx->framebuffer.dirty_zsbuf = true; - si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */ - sctx->db_depth_clear = true; - si_mark_atom_dirty(sctx, &sctx->db_render_state); + if (buffers & PIPE_CLEAR_STENCIL) { + stencil &= 0xff; + + /* Need to disable EXPCLEAR temporarily if clearing + * to a new value. */ + if (zstex->stencil_cleared && zstex->stencil_clear_value != stencil) { + sctx->db_stencil_disable_expclear = true; + } + + zstex->stencil_clear_value = stencil; + sctx->framebuffer.dirty_zsbuf = true; + si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_STENCIL_CLEAR */ + sctx->db_stencil_clear = true; + si_mark_atom_dirty(sctx, &sctx->db_render_state); + } } si_blitter_begin(ctx, SI_CLEAR); @@ -407,6 +424,13 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers, zstex->depth_cleared = true; si_mark_atom_dirty(sctx, &sctx->db_render_state); } + + if (sctx->db_stencil_clear) { + sctx->db_stencil_clear = false; + sctx->db_stencil_disable_expclear = false; + zstex->stencil_cleared = true; + si_mark_atom_dirty(sctx, &sctx->db_render_state); + } } static void si_clear_render_target(struct pipe_context *ctx, diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 834c358..65c7e19 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -253,6 +253,8 @@ struct si_context { bool db_flush_stencil_inplace; bool db_depth_clear; bool db_depth_disable_expclear; + bool db_stencil_clear; + bool db_stencil_disable_expclear; unsigned ps_db_shader_control; /* Emitted draw state. */ diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index c2d4d44..4086819 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1090,10 +1090,10 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s radeon_emit(cs, S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) | S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace)); - } else if (sctx->db_depth_clear) { - radeon_emit(cs, S_028000_DEPTH_CLEAR_ENABLE(1)); } else { - radeon_emit(cs, 0); + radeon_emit(cs, + S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) | + S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear)); } /* DB_COUNT_CONTROL (occlusion queries) */ @@ -1120,12 +1120,9 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s } /* DB_RENDER_OVERRIDE2 */ - if (sctx->db_depth_disable_expclear) { - radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, - S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(1)); - } else { - radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 0); - } + radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, + S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) | + S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear)); db_shader_control = S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) | sctx->ps_db_shader_control; @@ -2217,7 +2214,10 @@ static void si_init_depth_surface(struct si_context *sctx, z_info |= S_028040_TILE_SURFACE_ENABLE(1) | S_028040_ALLOW_EXPCLEAR(1); - if (!(rtex->surface.flags & RADEON_SURF_SBUFFER)) + if (rtex->surface.flags & RADEON_SURF_SBUFFER) + s_info |= S_028044_ALLOW_EXPCLEAR(1); + else + /* Use all of the htile_buffer for depth if there's no stencil. */ s_info |= S_028044_TILE_STENCIL_DISABLE(1); uint64_t va = rtex->htile_buffer->gpu_address; @@ -2486,8 +2486,11 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom radeon_emit(cs, zb->db_depth_size); /* R_028058_DB_DEPTH_SIZE */ radeon_emit(cs, zb->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */ + radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2); + radeon_emit(cs, rtex->stencil_clear_value); /* R_028028_DB_STENCIL_CLEAR */ + radeon_emit(cs, fui(rtex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */ + radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface); - radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value)); radeon_set_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, zb->pa_su_poly_offset_db_fmt_cntl); } else if (sctx->framebuffer.dirty_zsbuf) { @@ -3578,7 +3581,6 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0)); si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, fui(1.0)); si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, fui(1.0)); - si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0); si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); -- 2.7.4