From 6ba9090ea05e817bd38c1fcc63c53168b16593c7 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 1 Nov 2011 10:54:08 -0700 Subject: [PATCH] i965: Use 0 for the number of binding table entries in 3DSTATE_(VS|WM). These fields control how many entries the hardware prefetches into the state cache, so they only impact performance, not correctness. However, it's not clear how to use this in a way that's beneficial. According to the documentation, kernels "using a large number" of entries may wish to program this to zero to avoid thrashing the cache; it's unclear how many is too many. Also, Ironlake's WM was missing this feature entirely---the count had to be zero. The dirty bit tracking to handle this complicates the surface state and binding table setup; removing it should simplify things and make future refactoring easier. So just set 0 for the number of entries rather than trying to compute and track it. Appears to have no impact on Nexuiz and OpenArena on Sandybridge. Signed-off-by: Kenneth Graunke Reviewed-by: Eric Anholt Reviewed-by: Paul Berry --- src/mesa/drivers/dri/i965/brw_context.h | 4 ---- src/mesa/drivers/dri/i965/brw_state_upload.c | 2 -- src/mesa/drivers/dri/i965/brw_vs_state.c | 7 +------ src/mesa/drivers/dri/i965/brw_vs_surface_state.c | 8 -------- src/mesa/drivers/dri/i965/brw_wm_state.c | 10 ++-------- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 5 ----- src/mesa/drivers/dri/i965/gen6_vs_state.c | 7 ++----- src/mesa/drivers/dri/i965/gen6_wm_state.c | 4 ---- src/mesa/drivers/dri/i965/gen7_vs_state.c | 5 +---- src/mesa/drivers/dri/i965/gen7_wm_state.c | 4 ---- 10 files changed, 6 insertions(+), 50 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 3e05e36..f8e6f81 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -169,8 +169,6 @@ enum brw_state_id { */ #define BRW_NEW_BATCH (1 << BRW_STATE_BATCH) /** \see brw.state.depth_region */ -#define BRW_NEW_NR_WM_SURFACES (1 << BRW_STATE_NR_WM_SURFACES) -#define BRW_NEW_NR_VS_SURFACES (1 << BRW_STATE_NR_VS_SURFACES) #define BRW_NEW_INDEX_BUFFER (1 << BRW_STATE_INDEX_BUFFER) #define BRW_NEW_VS_CONSTBUF (1 << BRW_STATE_VS_CONSTBUF) #define BRW_NEW_WM_CONSTBUF (1 << BRW_STATE_WM_CONSTBUF) @@ -739,7 +737,6 @@ struct brw_context /** Binding table of pointers to surf_bo entries */ uint32_t bind_bo_offset; uint32_t surf_offset[BRW_VS_MAX_SURF]; - GLuint nr_surfaces; uint32_t push_const_offset; /* Offset in the batchbuffer */ int push_const_size; /* in 256-bit register increments */ @@ -810,7 +807,6 @@ struct brw_context uint32_t sdc_offset[BRW_MAX_TEX_UNIT]; GLuint render_surf; - GLuint nr_surfaces; drm_intel_bo *scratch_bo; diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 2f16891..862c5a6 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -357,8 +357,6 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_INDEX_BUFFER), DEFINE_BIT(BRW_NEW_VERTICES), DEFINE_BIT(BRW_NEW_BATCH), - DEFINE_BIT(BRW_NEW_NR_WM_SURFACES), - DEFINE_BIT(BRW_NEW_NR_VS_SURFACES), DEFINE_BIT(BRW_NEW_VS_CONSTBUF), DEFINE_BIT(BRW_NEW_WM_CONSTBUF), DEFINE_BIT(BRW_NEW_VS_BINDING_TABLE), diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index 5a9032c..631e182 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -71,11 +71,7 @@ brw_upload_vs_unit(struct brw_context *brw) */ vs->thread1.single_program_flow = (intel->gen == 5); - /* BRW_NEW_NR_VS_SURFACES */ - if (intel->gen == 5) - vs->thread1.binding_table_entry_count = 0; /* hardware requirement */ - else - vs->thread1.binding_table_entry_count = brw->vs.nr_surfaces; + vs->thread1.binding_table_entry_count = 0; if (brw->vs.prog_data->total_scratch != 0) { vs->thread2.scratch_space_base_pointer = @@ -176,7 +172,6 @@ const struct brw_tracked_state brw_vs_unit = { .brw = (BRW_NEW_BATCH | BRW_NEW_PROGRAM_CACHE | BRW_NEW_CURBE_OFFSETS | - BRW_NEW_NR_VS_SURFACES | BRW_NEW_URB_FENCE | BRW_NEW_VERTEX_PROGRAM), .cache = CACHE_NEW_VS_PROG diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index 84d3101..4c99185 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -136,9 +136,6 @@ brw_update_vs_constant_surface( struct gl_context *ctx, /** * Vertex shader surfaces (constant buffer). - * - * This consumes the state updates for the constant buffer needing - * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit. */ static void brw_upload_vs_surfaces(struct brw_context *brw) @@ -170,11 +167,6 @@ brw_upload_vs_surfaces(struct brw_context *brw) brw->vs.bind_bo_offset = 0; } } - - if (brw->vs.nr_surfaces != nr_surfaces) { - brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; - brw->vs.nr_surfaces = nr_surfaces; - } } const struct brw_tracked_state brw_vs_surfaces = { diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 51ef745..69d7a76 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -113,12 +113,7 @@ brw_upload_wm_unit(struct brw_context *brw) wm->thread1.depth_coef_urb_read_offset = 1; wm->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - if (intel->gen == 5) - wm->thread1.binding_table_entry_count = 0; /* hardware requirement */ - else { - /* BRW_NEW_NR_SURFACES */ - wm->thread1.binding_table_entry_count = brw->wm.nr_surfaces; - } + wm->thread1.binding_table_entry_count = 0; if (brw->wm.prog_data->total_scratch != 0) { wm->thread2.scratch_space_base_pointer = @@ -263,8 +258,7 @@ const struct brw_tracked_state brw_wm_unit = { .brw = (BRW_NEW_BATCH | BRW_NEW_PROGRAM_CACHE | BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_CURBE_OFFSETS | - BRW_NEW_NR_WM_SURFACES), + BRW_NEW_CURBE_OFFSETS), .cache = (CACHE_NEW_WM_PROG | CACHE_NEW_SAMPLER) diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index d120313..df7a0ca 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -624,11 +624,6 @@ brw_upload_wm_surfaces(struct brw_context *brw) } } - if (brw->wm.nr_surfaces != nr_surfaces) { - brw->wm.nr_surfaces = nr_surfaces; - brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES; - } - brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; } diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index e22fd39..7ea7e21 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -167,9 +167,7 @@ upload_vs_state(struct brw_context *brw) BEGIN_BATCH(6); OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); OUT_BATCH(brw->vs.prog_offset); - OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | - floating_point_mode | - (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + OUT_BATCH(floating_point_mode | (0 << GEN6_VS_SAMPLER_COUNT_SHIFT)); if (brw->vs.prog_data->total_scratch) { OUT_RELOC(brw->vs.scratch_bo, @@ -220,8 +218,7 @@ upload_vs_state(struct brw_context *brw) const struct brw_tracked_state gen6_vs_state = { .dirty = { .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS, - .brw = (BRW_NEW_NR_VS_SURFACES | - BRW_NEW_URB_FENCE | + .brw = (BRW_NEW_URB_FENCE | BRW_NEW_CONTEXT | BRW_NEW_VERTEX_PROGRAM | BRW_NEW_BATCH), diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 714d594..b98516e 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -140,9 +140,6 @@ upload_wm_state(struct brw_context *brw) if (ctx->Shader.CurrentFragmentProgram == NULL) dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT; - /* BRW_NEW_NR_WM_SURFACES */ - dw2 |= brw->wm.nr_surfaces << GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT; - /* CACHE_NEW_SAMPLER */ dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT; dw4 |= (brw->wm.prog_data->first_curbe_grf << @@ -217,7 +214,6 @@ const struct brw_tracked_state gen6_wm_state = { _NEW_PROGRAM_CONSTANTS | _NEW_POLYGON), .brw = (BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_NR_WM_SURFACES | BRW_NEW_URB_FENCE | BRW_NEW_BATCH), .cache = (CACHE_NEW_SAMPLER | diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index dbf9346..462db5b 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -75,9 +75,7 @@ upload_vs_state(struct brw_context *brw) BEGIN_BATCH(6); OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); OUT_BATCH(brw->vs.prog_offset); - OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | - floating_point_mode | - (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | floating_point_mode); if (brw->vs.prog_data->total_scratch) { OUT_RELOC(brw->vs.scratch_bo, @@ -101,7 +99,6 @@ const struct brw_tracked_state gen7_vs_state = { .dirty = { .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_CURBE_OFFSETS | - BRW_NEW_NR_VS_SURFACES | BRW_NEW_URB_FENCE | BRW_NEW_CONTEXT | BRW_NEW_VERTEX_PROGRAM | diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 2dce359..8b79663 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -146,9 +146,6 @@ upload_ps_state(struct brw_context *brw) dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT; - /* BRW_NEW_NR_WM_SURFACES */ - dw2 |= brw->wm.nr_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT; - /* Use ALT floating point mode for ARB fragment programs, because they * require 0^0 == 1. */ @@ -198,7 +195,6 @@ const struct brw_tracked_state gen7_ps_state = { .mesa = _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_CURBE_OFFSETS | BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_NR_WM_SURFACES | BRW_NEW_PS_BINDING_TABLE | BRW_NEW_URB_FENCE | BRW_NEW_BATCH), -- 2.7.4