From 9e182b8313c5ab952498a76495f57e8420f9e5ad Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 28 Sep 2018 18:49:29 -0400 Subject: [PATCH] radeonsi: center viewport to improve guardband clipping for high resolutions This will be more useful when we change the quant mode to increase subpixel precision and decrease the viewport range (which might not be possible if the viewport is not centered in the viewport range). --- src/gallium/drivers/radeonsi/si_gfx_cs.c | 1 + src/gallium/drivers/radeonsi/si_state.c | 11 ++++- src/gallium/drivers/radeonsi/si_state.h | 2 + src/gallium/drivers/radeonsi/si_state_viewport.c | 62 +++++++++++++++++++----- 4 files changed, 62 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 3ddd786..84f5e4c 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -348,6 +348,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx) ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ] = 0x3f800000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ] = 0x3f800000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ] = 0x3f800000; + ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET] = 0; ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_CLIPRECT_RULE] = 0xffff; ctx->tracked_regs.reg_value[SI_TRACKED_VGT_ESGS_RING_ITEMSIZE] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_1] = 0x00000000; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index a170d52..babd171 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2730,6 +2730,15 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, bool unbound = false; int i; + /* Reject zero-sized framebuffers due to a hw bug on SI that occurs + * when PA_SU_HARDWARE_SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0. + * We could implement the full workaround here, but it's a useless case. + */ + if ((!state->width || !state->height) && (state->nr_cbufs || state->zsbuf)) { + unreachable("the framebuffer shouldn't have zero area"); + return; + } + si_update_fb_dirtiness_after_rendering(sctx); for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { @@ -4879,8 +4888,6 @@ static void si_init_config(struct si_context *sctx) S_028230_ER_LINE_RL(0x26) | S_028230_ER_LINE_TB(0xA) | S_028230_ER_LINE_BT(0xA)); - /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */ - si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0); si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index fffc636..173e210 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -275,6 +275,8 @@ enum si_tracked_reg { SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ, SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ, + SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET, + SI_TRACKED_PA_SC_CLIPRECT_RULE, SI_TRACKED_VGT_ESGS_RING_ITEMSIZE, diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c index 8dc68b1..335d63b 100644 --- a/src/gallium/drivers/radeonsi/si_state_viewport.c +++ b/src/gallium/drivers/radeonsi/si_state_viewport.c @@ -126,6 +126,18 @@ static void si_emit_one_scissor(struct si_context *ctx, if (scissor) si_clip_scissor(&final, scissor); + /* Workaround for a hw bug on SI that occurs when PA_SU_HARDWARE_- + * SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0. + */ + if (ctx->chip_class == SI && (final.maxx == 0 || final.maxy == 0)) { + radeon_emit(cs, S_028250_TL_X(1) | + S_028250_TL_Y(1) | + S_028250_WINDOW_OFFSET_DISABLE(1)); + radeon_emit(cs, S_028254_BR_X(1) | + S_028254_BR_Y(1)); + return; + } + radeon_emit(cs, S_028250_TL_X(final.minx) | S_028250_TL_Y(final.miny) | S_028250_WINDOW_OFFSET_DISABLE(1)); @@ -138,8 +150,7 @@ static void si_emit_one_scissor(struct si_context *ctx, static void si_emit_guardband(struct si_context *ctx) { - const struct si_signed_scissor *vp_as_scissor; - struct si_signed_scissor max_vp_scissor; + struct si_signed_scissor vp_as_scissor; struct pipe_viewport_state vp; float left, top, right, bottom, max_range, guardband_x, guardband_y; float discard_x, discard_y; @@ -147,26 +158,49 @@ static void si_emit_guardband(struct si_context *ctx) if (ctx->vs_writes_viewport_index) { /* Shaders can draw to any viewport. Make a union of all * viewports. */ - max_vp_scissor = ctx->viewports.as_scissor[0]; + vp_as_scissor = ctx->viewports.as_scissor[0]; for (unsigned i = 1; i < SI_MAX_VIEWPORTS; i++) { - si_scissor_make_union(&max_vp_scissor, + si_scissor_make_union(&vp_as_scissor, &ctx->viewports.as_scissor[i]); } - vp_as_scissor = &max_vp_scissor; } else { - vp_as_scissor = &ctx->viewports.as_scissor[0]; + vp_as_scissor = ctx->viewports.as_scissor[0]; } + /* Determine the optimal hardware screen offset to center the viewport + * within the viewport range in order to maximize the guardband size. + */ + int hw_screen_offset_x = (vp_as_scissor.maxx - vp_as_scissor.minx) / 2; + int hw_screen_offset_y = (vp_as_scissor.maxy - vp_as_scissor.miny) / 2; + + const unsigned hw_screen_offset_max = 8176; + /* SI-CI need to align the offset to an ubertile consisting of all SEs. */ + const unsigned hw_screen_offset_alignment = + ctx->chip_class >= VI ? 16 : MAX2(ctx->screen->se_tile_repeat, 16); + + hw_screen_offset_x = MIN2(hw_screen_offset_x, hw_screen_offset_max); + hw_screen_offset_y = MIN2(hw_screen_offset_y, hw_screen_offset_max); + + /* Align the screen offset by dropping the low 4 bits. */ + hw_screen_offset_x &= ~(hw_screen_offset_alignment - 1); + hw_screen_offset_y &= ~(hw_screen_offset_alignment - 1); + + /* Apply the offset to center the viewport and maximize the guardband. */ + vp_as_scissor.minx -= hw_screen_offset_x; + vp_as_scissor.maxx -= hw_screen_offset_x; + vp_as_scissor.miny -= hw_screen_offset_y; + vp_as_scissor.maxy -= hw_screen_offset_y; + /* Reconstruct the viewport transformation from the scissor. */ - vp.translate[0] = (vp_as_scissor->minx + vp_as_scissor->maxx) / 2.0; - vp.translate[1] = (vp_as_scissor->miny + vp_as_scissor->maxy) / 2.0; - vp.scale[0] = vp_as_scissor->maxx - vp.translate[0]; - vp.scale[1] = vp_as_scissor->maxy - vp.translate[1]; + vp.translate[0] = (vp_as_scissor.minx + vp_as_scissor.maxx) / 2.0; + vp.translate[1] = (vp_as_scissor.miny + vp_as_scissor.maxy) / 2.0; + vp.scale[0] = vp_as_scissor.maxx - vp.translate[0]; + vp.scale[1] = vp_as_scissor.maxy - vp.translate[1]; /* Treat a 0x0 viewport as 1x1 to prevent division by zero. */ - if (vp_as_scissor->minx == vp_as_scissor->maxx) + if (vp_as_scissor.minx == vp_as_scissor.maxx) vp.scale[0] = 0.5; - if (vp_as_scissor->miny == vp_as_scissor->maxy) + if (vp_as_scissor.miny == vp_as_scissor.maxy) vp.scale[1] = 0.5; /* Find the biggest guard band that is inside the supported viewport @@ -221,6 +255,10 @@ static void si_emit_guardband(struct si_context *ctx) SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, fui(guardband_y), fui(discard_y), fui(guardband_x), fui(discard_x)); + radeon_opt_set_context_reg(ctx, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, + SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET, + S_028234_HW_SCREEN_OFFSET_X(hw_screen_offset_x >> 4) | + S_028234_HW_SCREEN_OFFSET_Y(hw_screen_offset_y >> 4)); } static void si_emit_scissors(struct si_context *ctx) -- 2.7.4