i965: Use 0 for the number of binding table entries in 3DSTATE_(VS|WM).
authorKenneth Graunke <kenneth@whitecape.org>
Tue, 1 Nov 2011 17:54:08 +0000 (10:54 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Fri, 11 Nov 2011 06:51:19 +0000 (22:51 -0800)
These fields control how many entries the hardware prefetches into the
state cache, so they only impact performance, not correctness.  However,
it's not clear how to use this in a way that's beneficial.

According to the documentation, kernels "using a large number" of
entries may wish to program this to zero to avoid thrashing the cache;
it's unclear how many is too many.  Also, Ironlake's WM was missing this
feature entirely---the count had to be zero.

The dirty bit tracking to handle this complicates the surface state
and binding table setup; removing it should simplify things and make
future refactoring easier.  So just set 0 for the number of entries
rather than trying to compute and track it.

Appears to have no impact on Nexuiz and OpenArena on Sandybridge.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Paul Berry <stereotype441@gmail.com>
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_state_upload.c
src/mesa/drivers/dri/i965/brw_vs_state.c
src/mesa/drivers/dri/i965/brw_vs_surface_state.c
src/mesa/drivers/dri/i965/brw_wm_state.c
src/mesa/drivers/dri/i965/brw_wm_surface_state.c
src/mesa/drivers/dri/i965/gen6_vs_state.c
src/mesa/drivers/dri/i965/gen6_wm_state.c
src/mesa/drivers/dri/i965/gen7_vs_state.c
src/mesa/drivers/dri/i965/gen7_wm_state.c

index 3e05e36..f8e6f81 100644 (file)
@@ -169,8 +169,6 @@ enum brw_state_id {
  */
 #define BRW_NEW_BATCH                  (1 << BRW_STATE_BATCH)
 /** \see brw.state.depth_region */
-#define BRW_NEW_NR_WM_SURFACES         (1 << BRW_STATE_NR_WM_SURFACES)
-#define BRW_NEW_NR_VS_SURFACES         (1 << BRW_STATE_NR_VS_SURFACES)
 #define BRW_NEW_INDEX_BUFFER           (1 << BRW_STATE_INDEX_BUFFER)
 #define BRW_NEW_VS_CONSTBUF            (1 << BRW_STATE_VS_CONSTBUF)
 #define BRW_NEW_WM_CONSTBUF            (1 << BRW_STATE_WM_CONSTBUF)
@@ -739,7 +737,6 @@ struct brw_context
       /** Binding table of pointers to surf_bo entries */
       uint32_t bind_bo_offset;
       uint32_t surf_offset[BRW_VS_MAX_SURF];
-      GLuint nr_surfaces;      
 
       uint32_t push_const_offset; /* Offset in the batchbuffer */
       int push_const_size; /* in 256-bit register increments */
@@ -810,7 +807,6 @@ struct brw_context
       uint32_t sdc_offset[BRW_MAX_TEX_UNIT];
 
       GLuint render_surf;
-      GLuint nr_surfaces;      
 
       drm_intel_bo *scratch_bo;
 
index 2f16891..862c5a6 100644 (file)
@@ -357,8 +357,6 @@ static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
    DEFINE_BIT(BRW_NEW_VERTICES),
    DEFINE_BIT(BRW_NEW_BATCH),
-   DEFINE_BIT(BRW_NEW_NR_WM_SURFACES),
-   DEFINE_BIT(BRW_NEW_NR_VS_SURFACES),
    DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
    DEFINE_BIT(BRW_NEW_WM_CONSTBUF),
    DEFINE_BIT(BRW_NEW_VS_BINDING_TABLE),
index 5a9032c..631e182 100644 (file)
@@ -71,11 +71,7 @@ brw_upload_vs_unit(struct brw_context *brw)
    */
    vs->thread1.single_program_flow = (intel->gen == 5);
 
-   /* BRW_NEW_NR_VS_SURFACES */
-   if (intel->gen == 5)
-      vs->thread1.binding_table_entry_count = 0; /* hardware requirement */
-   else
-      vs->thread1.binding_table_entry_count = brw->vs.nr_surfaces;
+   vs->thread1.binding_table_entry_count = 0;
 
    if (brw->vs.prog_data->total_scratch != 0) {
       vs->thread2.scratch_space_base_pointer =
@@ -176,7 +172,6 @@ const struct brw_tracked_state brw_vs_unit = {
       .brw   = (BRW_NEW_BATCH |
                BRW_NEW_PROGRAM_CACHE |
                BRW_NEW_CURBE_OFFSETS |
-                BRW_NEW_NR_VS_SURFACES |
                BRW_NEW_URB_FENCE |
                 BRW_NEW_VERTEX_PROGRAM),
       .cache = CACHE_NEW_VS_PROG
index 84d3101..4c99185 100644 (file)
@@ -136,9 +136,6 @@ brw_update_vs_constant_surface( struct gl_context *ctx,
 
 /**
  * Vertex shader surfaces (constant buffer).
- *
- * This consumes the state updates for the constant buffer needing
- * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit.
  */
 static void
 brw_upload_vs_surfaces(struct brw_context *brw)
@@ -170,11 +167,6 @@ brw_upload_vs_surfaces(struct brw_context *brw)
         brw->vs.bind_bo_offset = 0;
       }
    }
-
-   if (brw->vs.nr_surfaces != nr_surfaces) {
-      brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
-      brw->vs.nr_surfaces = nr_surfaces;
-   }
 }
 
 const struct brw_tracked_state brw_vs_surfaces = {
index 51ef745..69d7a76 100644 (file)
@@ -113,12 +113,7 @@ brw_upload_wm_unit(struct brw_context *brw)
    wm->thread1.depth_coef_urb_read_offset = 1;
    wm->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
 
-   if (intel->gen == 5)
-      wm->thread1.binding_table_entry_count = 0; /* hardware requirement */
-   else {
-      /* BRW_NEW_NR_SURFACES */
-      wm->thread1.binding_table_entry_count = brw->wm.nr_surfaces;
-   }
+   wm->thread1.binding_table_entry_count = 0;
 
    if (brw->wm.prog_data->total_scratch != 0) {
       wm->thread2.scratch_space_base_pointer =
@@ -263,8 +258,7 @@ const struct brw_tracked_state brw_wm_unit = {
       .brw = (BRW_NEW_BATCH |
              BRW_NEW_PROGRAM_CACHE |
              BRW_NEW_FRAGMENT_PROGRAM |
-             BRW_NEW_CURBE_OFFSETS |
-             BRW_NEW_NR_WM_SURFACES),
+             BRW_NEW_CURBE_OFFSETS),
 
       .cache = (CACHE_NEW_WM_PROG |
                CACHE_NEW_SAMPLER)
index d120313..df7a0ca 100644 (file)
@@ -624,11 +624,6 @@ brw_upload_wm_surfaces(struct brw_context *brw)
       }
    }
 
-   if (brw->wm.nr_surfaces != nr_surfaces) {
-      brw->wm.nr_surfaces = nr_surfaces;
-      brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
-   }
-
    brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
 }
 
index e22fd39..7ea7e21 100644 (file)
@@ -167,9 +167,7 @@ upload_vs_state(struct brw_context *brw)
    BEGIN_BATCH(6);
    OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
    OUT_BATCH(brw->vs.prog_offset);
-   OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
-             floating_point_mode |
-            (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+   OUT_BATCH(floating_point_mode | (0 << GEN6_VS_SAMPLER_COUNT_SHIFT));
 
    if (brw->vs.prog_data->total_scratch) {
       OUT_RELOC(brw->vs.scratch_bo,
@@ -220,8 +218,7 @@ upload_vs_state(struct brw_context *brw)
 const struct brw_tracked_state gen6_vs_state = {
    .dirty = {
       .mesa  = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
-      .brw   = (BRW_NEW_NR_VS_SURFACES |
-               BRW_NEW_URB_FENCE |
+      .brw   = (BRW_NEW_URB_FENCE |
                BRW_NEW_CONTEXT |
                BRW_NEW_VERTEX_PROGRAM |
                BRW_NEW_BATCH),
index 714d594..b98516e 100644 (file)
@@ -140,9 +140,6 @@ upload_wm_state(struct brw_context *brw)
    if (ctx->Shader.CurrentFragmentProgram == NULL)
       dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT;
 
-   /* BRW_NEW_NR_WM_SURFACES */
-   dw2 |= brw->wm.nr_surfaces << GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT;
-
    /* CACHE_NEW_SAMPLER */
    dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT;
    dw4 |= (brw->wm.prog_data->first_curbe_grf <<
@@ -217,7 +214,6 @@ const struct brw_tracked_state gen6_wm_state = {
                _NEW_PROGRAM_CONSTANTS |
                _NEW_POLYGON),
       .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
-                BRW_NEW_NR_WM_SURFACES |
                BRW_NEW_URB_FENCE |
                BRW_NEW_BATCH),
       .cache = (CACHE_NEW_SAMPLER |
index dbf9346..462db5b 100644 (file)
@@ -75,9 +75,7 @@ upload_vs_state(struct brw_context *brw)
    BEGIN_BATCH(6);
    OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
    OUT_BATCH(brw->vs.prog_offset);
-   OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
-             floating_point_mode |
-            (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+   OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | floating_point_mode);
 
    if (brw->vs.prog_data->total_scratch) {
       OUT_RELOC(brw->vs.scratch_bo,
@@ -101,7 +99,6 @@ const struct brw_tracked_state gen7_vs_state = {
    .dirty = {
       .mesa  = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
       .brw   = (BRW_NEW_CURBE_OFFSETS |
-                BRW_NEW_NR_VS_SURFACES |
                BRW_NEW_URB_FENCE |
                BRW_NEW_CONTEXT |
                BRW_NEW_VERTEX_PROGRAM |
index 2dce359..8b79663 100644 (file)
@@ -146,9 +146,6 @@ upload_ps_state(struct brw_context *brw)
 
    dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT;
 
-   /* BRW_NEW_NR_WM_SURFACES */
-   dw2 |= brw->wm.nr_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT;
-
    /* Use ALT floating point mode for ARB fragment programs, because they
     * require 0^0 == 1.
     */
@@ -198,7 +195,6 @@ const struct brw_tracked_state gen7_ps_state = {
       .mesa  = _NEW_PROGRAM_CONSTANTS,
       .brw   = (BRW_NEW_CURBE_OFFSETS |
                BRW_NEW_FRAGMENT_PROGRAM |
-                BRW_NEW_NR_WM_SURFACES |
                BRW_NEW_PS_BINDING_TABLE |
                BRW_NEW_URB_FENCE |
                BRW_NEW_BATCH),