From f9a2679db5886a65eac7e08a8f75674cf3dff8b7 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 28 Apr 2014 11:27:22 -0700 Subject: [PATCH] i965/gen7+: Move sampler state packets to the stage sampler state table update. Now that we have the stage state coming into our setup of sampler states, it's easy to drop an identifier into it of which stage the stage_state is, and then look up which packet to emit in a little table. No performance difference on cairo on glamor (n=492). v2: Don't forget to do the workaround flush on IVB. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.c | 3 +++ src/mesa/drivers/dri/i965/brw_context.h | 1 + src/mesa/drivers/dri/i965/gen7_gs_state.c | 8 +------- src/mesa/drivers/dri/i965/gen7_sampler_state.c | 15 ++++++++++++++- src/mesa/drivers/dri/i965/gen7_vs_state.c | 8 +------- src/mesa/drivers/dri/i965/gen7_wm_state.c | 9 +-------- src/mesa/drivers/dri/i965/gen8_gs_state.c | 8 +------- src/mesa/drivers/dri/i965/gen8_ps_state.c | 8 +------- src/mesa/drivers/dri/i965/gen8_vs_state.c | 8 +------- 9 files changed, 24 insertions(+), 44 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 449fcfc..17ae685 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -628,6 +628,9 @@ brwCreateContext(gl_api api, brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil; brw->has_swizzling = screen->hw_has_swizzling; + brw->vs.base.stage = MESA_SHADER_VERTEX; + brw->gs.base.stage = MESA_SHADER_GEOMETRY; + brw->wm.base.stage = MESA_SHADER_FRAGMENT; if (brw->gen >= 8) { gen8_init_vtable_surface_functions(brw); gen7_init_vtable_sampler_functions(brw); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 379af38..92e1592 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -925,6 +925,7 @@ struct brw_transform_feedback_object { */ struct brw_stage_state { + gl_shader_stage stage; struct brw_stage_prog_data *prog_data; /** diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c index d18ae15..06e6cf7 100644 --- a/src/mesa/drivers/dri/i965/gen7_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c @@ -66,12 +66,6 @@ upload_gs_state(struct brw_context *brw) /* CACHE_NEW_GS_PROG */ const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base; - /* CACHE_NEW_SAMPLER */ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_GS << 16 | (2 - 2)); - OUT_BATCH(stage_state->sampler_offset); - ADVANCE_BATCH(); - gen7_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS); /** @@ -198,7 +192,7 @@ const struct brw_tracked_state gen7_gs_state = { BRW_NEW_GS_BINDING_TABLE | BRW_NEW_BATCH | BRW_NEW_PUSH_CONSTANT_ALLOCATION), - .cache = CACHE_NEW_GS_PROG | CACHE_NEW_SAMPLER + .cache = CACHE_NEW_GS_PROG }, .emit = upload_gs_state, }; diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c index 8eb337d..74d5e9e 100644 --- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c @@ -187,6 +187,11 @@ gen7_upload_sampler_state_table(struct brw_context *brw, struct gl_context *ctx = &brw->ctx; struct gen7_sampler_state *samplers; uint32_t sampler_count = stage_state->sampler_count; + static const uint16_t packet_headers[] = { + [MESA_SHADER_VERTEX] = _3DSTATE_SAMPLER_STATE_POINTERS_VS, + [MESA_SHADER_GEOMETRY] = _3DSTATE_SAMPLER_STATE_POINTERS_GS, + [MESA_SHADER_FRAGMENT] = _3DSTATE_SAMPLER_STATE_POINTERS_PS, + }; GLbitfield SamplersUsed = prog->SamplersUsed; @@ -207,7 +212,15 @@ gen7_upload_sampler_state_table(struct brw_context *brw, } } - brw->state.dirty.cache |= CACHE_NEW_SAMPLER; + if (brw->gen == 7 && !brw->is_haswell && + stage_state->stage == MESA_SHADER_VERTEX) { + gen7_emit_vs_workaround_flush(brw); + } + + BEGIN_BATCH(2); + OUT_BATCH(packet_headers[stage_state->stage] << 16 | (2 - 2)); + OUT_BATCH(stage_state->sampler_offset); + ADVANCE_BATCH(); } void diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index b5fc871..6b1f680 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -75,12 +75,6 @@ upload_vs_state(struct brw_context *brw) if (!brw->is_haswell) gen7_emit_vs_workaround_flush(brw); - /* CACHE_NEW_SAMPLER */ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_VS << 16 | (2 - 2)); - OUT_BATCH(stage_state->sampler_offset); - ADVANCE_BATCH(); - gen7_upload_constant_state(brw, stage_state, true /* active */, _3DSTATE_CONSTANT_VS); @@ -126,7 +120,7 @@ const struct brw_tracked_state gen7_vs_state = { BRW_NEW_VS_BINDING_TABLE | BRW_NEW_BATCH | BRW_NEW_PUSH_CONSTANT_ALLOCATION), - .cache = CACHE_NEW_VS_PROG | CACHE_NEW_SAMPLER + .cache = CACHE_NEW_VS_PROG }, .emit = upload_vs_state, }; diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index eabadee..2b95ef1 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -143,12 +143,6 @@ upload_ps_state(struct brw_context *brw) const int max_threads_shift = brw->is_haswell ? HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT; - /* CACHE_NEW_SAMPLER */ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2)); - OUT_BATCH(brw->wm.base.sampler_offset); - ADVANCE_BATCH(); - /* CACHE_NEW_WM_PROG */ gen7_upload_constant_state(brw, &brw->wm.base, true, _3DSTATE_CONSTANT_PS); @@ -281,8 +275,7 @@ const struct brw_tracked_state gen7_ps_state = { BRW_NEW_PS_BINDING_TABLE | BRW_NEW_BATCH | BRW_NEW_PUSH_CONSTANT_ALLOCATION), - .cache = (CACHE_NEW_SAMPLER | - CACHE_NEW_WM_PROG) + .cache = (CACHE_NEW_WM_PROG) }, .emit = upload_ps_state, }; diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c index 97fbf84..6baada3 100644 --- a/src/mesa/drivers/dri/i965/gen8_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c @@ -36,12 +36,6 @@ gen8_upload_gs_state(struct brw_context *brw) /* CACHE_NEW_GS_PROG */ const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base; - /* CACHE_NEW_SAMPLER */ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_GS << 16 | (2 - 2)); - OUT_BATCH(stage_state->sampler_offset); - ADVANCE_BATCH(); - gen8_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS); if (active) { @@ -135,7 +129,7 @@ const struct brw_tracked_state gen8_gs_state = { BRW_NEW_GS_BINDING_TABLE | BRW_NEW_BATCH | BRW_NEW_PUSH_CONSTANT_ALLOCATION), - .cache = CACHE_NEW_GS_PROG | CACHE_NEW_SAMPLER + .cache = CACHE_NEW_GS_PROG }, .emit = gen8_upload_gs_state, }; diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c index 0856060..aa7183b 100644 --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c @@ -136,12 +136,6 @@ upload_ps_state(struct brw_context *brw) struct gl_context *ctx = &brw->ctx; uint32_t dw3 = 0, dw6 = 0, dw7 = 0; - /* CACHE_NEW_SAMPLER */ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2)); - OUT_BATCH(brw->wm.base.sampler_offset); - ADVANCE_BATCH(); - /* CACHE_NEW_WM_PROG */ gen8_upload_constant_state(brw, &brw->wm.base, true, _3DSTATE_CONSTANT_PS); @@ -254,7 +248,7 @@ const struct brw_tracked_state gen8_ps_state = { BRW_NEW_PS_BINDING_TABLE | BRW_NEW_BATCH | BRW_NEW_PUSH_CONSTANT_ALLOCATION, - .cache = CACHE_NEW_SAMPLER | CACHE_NEW_WM_PROG + .cache = CACHE_NEW_WM_PROG }, .emit = upload_ps_state, }; diff --git a/src/mesa/drivers/dri/i965/gen8_vs_state.c b/src/mesa/drivers/dri/i965/gen8_vs_state.c index 373cfe4..e7634ee 100644 --- a/src/mesa/drivers/dri/i965/gen8_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_vs_state.c @@ -62,12 +62,6 @@ upload_vs_state(struct brw_context *brw) /* CACHE_NEW_VS_PROG */ const struct brw_vec4_prog_data *prog_data = &brw->vs.prog_data->base; - /* CACHE_NEW_SAMPLER */ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_VS << 16 | (2 - 2)); - OUT_BATCH(stage_state->sampler_offset); - ADVANCE_BATCH(); - gen8_upload_constant_state(brw, stage_state, true /* active */, _3DSTATE_CONSTANT_VS); @@ -119,7 +113,7 @@ const struct brw_tracked_state gen8_vs_state = { BRW_NEW_VS_BINDING_TABLE | BRW_NEW_BATCH | BRW_NEW_PUSH_CONSTANT_ALLOCATION, - .cache = CACHE_NEW_VS_PROG | CACHE_NEW_SAMPLER + .cache = CACHE_NEW_VS_PROG }, .emit = upload_vs_state, }; -- 2.7.4