From 903056e016e3ea52c2f493f8b0938b519ee40894 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 15 Nov 2016 22:59:45 -0800 Subject: [PATCH] i965: Use 3DSTATE_CLIP's User Clip Distance Enable bitmask on Gen8+. Gen6-7.5 specify the user clip distance enable bitmask in 3DSTATE_CLIP. Gen8+ normally uses the new internal signalling mechanism to select the one specified in the last enabled shader stage (3DSTATE_VS, DS, or GS). This is a pretty good fit for Vulkan, or even newer GL, where the bitmask comes entirely from the shader. But with glClipPlane(), this is dynamic state, and we have to listen to _NEW_TRASNFORM. Clip plane enables are the only reason the VS/DS/GS atoms need to listen to _NEW_TRANSFORM. 3DSTATE_CLIP already has to listen to it in order to support ARB_clip_control settings. Setting the "Use the 3DSTATE_CLIP bitmask" force enable bit allows us to drop _NEW_TRANSFORM from all the shader stage atoms, so we can re-emit them less often. Improves performance of OglBatch7 (version 6) by 2.70773% +/- 0.491257% (n = 38) at 1024x768 on Cherryview. Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_defines.h | 1 + src/mesa/drivers/dri/i965/gen6_clip_state.c | 9 +++++++++ src/mesa/drivers/dri/i965/gen8_ds_state.c | 8 ++------ src/mesa/drivers/dri/i965/gen8_gs_state.c | 9 +++------ src/mesa/drivers/dri/i965/gen8_vs_state.c | 8 ++------ 5 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index c4e0f27..84a51b4 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -2217,6 +2217,7 @@ enum brw_message_target { # define GEN7_CLIP_VERTEX_SUBPIXEL_PRECISION_8 (0 << 19) # define GEN7_CLIP_VERTEX_SUBPIXEL_PRECISION_4 (1 << 19) # define GEN7_CLIP_EARLY_CULL (1 << 18) +# define GEN8_CLIP_FORCE_USER_CLIP_DISTANCE_BITMASK (1 << 17) # define GEN7_CLIP_CULLMODE_BOTH (0 << 16) # define GEN7_CLIP_CULLMODE_NONE (1 << 16) # define GEN7_CLIP_CULLMODE_FRONT (2 << 16) diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index 17fef76..0b3c7f1 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -153,6 +153,15 @@ upload_clip_state(struct brw_context *brw) /* _NEW_TRANSFORM */ dw2 |= (ctx->Transform.ClipPlanesEnabled << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT); + + /* Have the hardware use the user clip distance clip test enable bitmask + * specified here in 3DSTATE_CLIP rather than the one in 3DSTATE_VS/DS/GS. + * We already listen to _NEW_TRANSFORM here, but the other atoms don't + * need to other than this. + */ + if (brw->gen >= 8) + dw1 |= GEN8_CLIP_FORCE_USER_CLIP_DISTANCE_BITMASK; + if (ctx->Transform.ClipDepthMode == GL_ZERO_TO_ONE) dw2 |= GEN6_CLIP_API_D3D; else diff --git a/src/mesa/drivers/dri/i965/gen8_ds_state.c b/src/mesa/drivers/dri/i965/gen8_ds_state.c index 0ea1456..ee2f82e 100644 --- a/src/mesa/drivers/dri/i965/gen8_ds_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ds_state.c @@ -30,7 +30,6 @@ static void gen8_upload_ds_state(struct brw_context *brw) { const struct gen_device_info *devinfo = &brw->screen->devinfo; - struct gl_context *ctx = &brw->ctx; const struct brw_stage_state *stage_state = &brw->tes.base; /* BRW_NEW_TESS_PROGRAMS */ bool active = brw->tess_eval_program; @@ -72,10 +71,7 @@ gen8_upload_ds_state(struct brw_context *brw) GEN7_DS_SIMD8_DISPATCH_ENABLE : 0) | (tes_prog_data->domain == BRW_TESS_DOMAIN_TRI ? GEN7_DS_COMPUTE_W_COORDINATE_ENABLE : 0)); - /* _NEW_TRANSFORM */ - OUT_BATCH(SET_FIELD(ctx->Transform.ClipPlanesEnabled, - GEN8_DS_USER_CLIP_DISTANCE) | - SET_FIELD(vue_prog_data->cull_distance_mask, + OUT_BATCH(SET_FIELD(vue_prog_data->cull_distance_mask, GEN8_DS_USER_CULL_DISTANCE)); @@ -110,7 +106,7 @@ gen8_upload_ds_state(struct brw_context *brw) const struct brw_tracked_state gen8_ds_state = { .dirty = { - .mesa = _NEW_TRANSFORM, + .mesa = 0, .brw = BRW_NEW_BATCH | BRW_NEW_BLORP | BRW_NEW_TESS_PROGRAMS | diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c index c39dc61..2b74f1b 100644 --- a/src/mesa/drivers/dri/i965/gen8_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c @@ -30,7 +30,6 @@ static void gen8_upload_gs_state(struct brw_context *brw) { const struct gen_device_info *devinfo = &brw->screen->devinfo; - struct gl_context *ctx = &brw->ctx; const struct brw_stage_state *stage_state = &brw->gs.base; /* BRW_NEW_GEOMETRY_PROGRAM */ bool active = brw->geometry_program; @@ -112,10 +111,8 @@ gen8_upload_gs_state(struct brw_context *brw) /* DW8 */ OUT_BATCH(dw8); - /* DW9 / _NEW_TRANSFORM */ - OUT_BATCH((vue_prog_data->cull_distance_mask | - ctx->Transform.ClipPlanesEnabled << - GEN8_GS_USER_CLIP_DISTANCE_SHIFT) | + /* DW9 */ + OUT_BATCH(vue_prog_data->cull_distance_mask | (urb_entry_output_length << GEN8_GS_URB_OUTPUT_LENGTH_SHIFT) | (urb_entry_write_offset << GEN8_GS_URB_ENTRY_OUTPUT_OFFSET_SHIFT)); @@ -138,7 +135,7 @@ gen8_upload_gs_state(struct brw_context *brw) const struct brw_tracked_state gen8_gs_state = { .dirty = { - .mesa = _NEW_TRANSFORM, + .mesa = 0, .brw = BRW_NEW_BATCH | BRW_NEW_BLORP | BRW_NEW_CONTEXT | diff --git a/src/mesa/drivers/dri/i965/gen8_vs_state.c b/src/mesa/drivers/dri/i965/gen8_vs_state.c index aed6be0..7b66da4 100644 --- a/src/mesa/drivers/dri/i965/gen8_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_vs_state.c @@ -33,7 +33,6 @@ static void upload_vs_state(struct brw_context *brw) { const struct gen_device_info *devinfo = &brw->screen->devinfo; - struct gl_context *ctx = &brw->ctx; const struct brw_stage_state *stage_state = &brw->vs.base; uint32_t floating_point_mode = 0; @@ -81,16 +80,13 @@ upload_vs_state(struct brw_context *brw) simd8_enable | GEN6_VS_ENABLE); - /* _NEW_TRANSFORM */ - OUT_BATCH(vue_prog_data->cull_distance_mask | - (ctx->Transform.ClipPlanesEnabled << - GEN8_VS_USER_CLIP_DISTANCE_SHIFT)); + OUT_BATCH(vue_prog_data->cull_distance_mask); ADVANCE_BATCH(); } const struct brw_tracked_state gen8_vs_state = { .dirty = { - .mesa = _NEW_TRANSFORM, + .mesa = 0, .brw = BRW_NEW_BATCH | BRW_NEW_BLORP | BRW_NEW_CONTEXT | -- 2.7.4