i965/gen4: Move the GS state to state streaming.
authorEric Anholt <eric@anholt.net>
Mon, 25 Apr 2011 03:36:22 +0000 (20:36 -0700)
committerEric Anholt <eric@anholt.net>
Fri, 29 Apr 2011 22:26:56 +0000 (15:26 -0700)
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_gs_state.c
src/mesa/drivers/dri/i965/brw_misc_state.c
src/mesa/drivers/dri/i965/brw_state_dump.c
src/mesa/drivers/dri/i965/brw_vtbl.c

index 4b97bfb..9747543 100644 (file)
@@ -647,8 +647,8 @@ struct brw_context
       struct brw_gs_prog_data *prog_data;
 
       GLboolean prog_active;
+      uint32_t state_offset;
       drm_intel_bo *prog_bo;
-      drm_intel_bo *state_bo;
    } gs;
 
    struct {
index 69a5f7a..542874b 100644 (file)
 #include "brw_state.h"
 #include "brw_defines.h"
 
-struct brw_gs_unit_key {
-   unsigned int total_grf;
-   unsigned int urb_entry_read_length;
-
-   unsigned int curbe_offset;
-
-   unsigned int nr_urb_entries, urb_size;
-   GLboolean prog_active;
-};
-
 static void
-gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key)
-{
-   memset(key, 0, sizeof(*key));
-
-   /* CACHE_NEW_GS_PROG */
-   key->prog_active = brw->gs.prog_active;
-   if (key->prog_active) {
-      key->total_grf = brw->gs.prog_data->total_grf;
-      key->urb_entry_read_length = brw->gs.prog_data->urb_read_length;
-   } else {
-      key->total_grf = 1;
-      key->urb_entry_read_length = 1;
-   }
-
-   /* BRW_NEW_CURBE_OFFSETS */
-   key->curbe_offset = brw->curbe.clip_start;
-
-   /* BRW_NEW_URB_FENCE */
-   key->nr_urb_entries = brw->urb.nr_gs_entries;
-   key->urb_size = brw->urb.vsize;
-}
-
-static drm_intel_bo *
-gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
+brw_prepare_gs_unit(struct brw_context *brw)
 {
    struct intel_context *intel = &brw->intel;
-   struct brw_gs_unit_state gs;
-   drm_intel_bo *bo;
-
-   memset(&gs, 0, sizeof(gs));
-
-   gs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
-   if (key->prog_active) /* reloc */
-      gs.thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6;
+   struct brw_gs_unit_state *gs;
 
-   gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
-   gs.thread1.single_program_flow = 1;
+   gs = brw_state_batch(brw, sizeof(*gs), 32, &brw->gs.state_offset);
 
-   gs.thread3.dispatch_grf_start_reg = 1;
-   gs.thread3.const_urb_entry_read_offset = 0;
-   gs.thread3.const_urb_entry_read_length = 0;
-   gs.thread3.urb_entry_read_offset = 0;
-   gs.thread3.urb_entry_read_length = key->urb_entry_read_length;
+   memset(gs, 0, sizeof(*gs));
 
-   gs.thread4.nr_urb_entries = key->nr_urb_entries;
-   gs.thread4.urb_entry_allocation_size = key->urb_size - 1;
-
-   if (key->nr_urb_entries >= 8)
-      gs.thread4.max_threads = 1;
-   else
-      gs.thread4.max_threads = 0;
-
-   if (intel->gen == 5)
-      gs.thread4.rendering_enable = 1;
-
-   if (unlikely(INTEL_DEBUG & DEBUG_STATS))
-      gs.thread4.stats_enable = 1;
-
-   bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT,
-                        key, sizeof(*key),
-                        &brw->gs.prog_bo, 1,
-                        &gs, sizeof(gs));
+   /* CACHE_NEW_GS_PROG */
+   if (brw->gs.prog_active) {
+      gs->thread0.grf_reg_count = (ALIGN(brw->gs.prog_data->total_grf, 16) /
+                                  16 - 1);
+      /* reloc */
+      gs->thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6;
+
+      gs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+      gs->thread1.single_program_flow = 1;
+
+      gs->thread3.dispatch_grf_start_reg = 1;
+      gs->thread3.const_urb_entry_read_offset = 0;
+      gs->thread3.const_urb_entry_read_length = 0;
+      gs->thread3.urb_entry_read_offset = 0;
+      gs->thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length;
+
+      /* BRW_NEW_URB_FENCE */
+      gs->thread4.nr_urb_entries = brw->urb.nr_gs_entries;
+      gs->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
+
+      if (brw->urb.nr_gs_entries >= 8)
+        gs->thread4.max_threads = 1;
+      else
+        gs->thread4.max_threads = 0;
 
-   if (key->prog_active) {
       /* Emit GS program relocation */
-      drm_intel_bo_emit_reloc(bo, offsetof(struct brw_gs_unit_state, thread0),
-                             brw->gs.prog_bo, gs.thread0.grf_reg_count << 1,
+      drm_intel_bo_emit_reloc(intel->batch.bo,
+                             (brw->gs.state_offset +
+                              offsetof(struct brw_gs_unit_state, thread0)),
+                             brw->gs.prog_bo, gs->thread0.grf_reg_count << 1,
                              I915_GEM_DOMAIN_INSTRUCTION, 0);
    }
 
-   return bo;
-}
-
-static void prepare_gs_unit(struct brw_context *brw)
-{
-   struct brw_gs_unit_key key;
+   if (intel->gen == 5)
+      gs->thread4.rendering_enable = 1;
 
-   gs_unit_populate_key(brw, &key);
+   if (unlikely(INTEL_DEBUG & DEBUG_STATS))
+      gs->thread4.stats_enable = 1;
 
-   drm_intel_bo_unreference(brw->gs.state_bo);
-   brw->gs.state_bo = brw_search_cache(&brw->cache, BRW_GS_UNIT,
-                                      &key, sizeof(key),
-                                      &brw->gs.prog_bo, 1,
-                                      NULL);
-   if (brw->gs.state_bo == NULL) {
-      brw->gs.state_bo = gs_unit_create_from_key(brw, &key);
-   }
+   brw->state.dirty.cache |= CACHE_NEW_GS_UNIT;
 }
 
 const struct brw_tracked_state brw_gs_unit = {
    .dirty = {
       .mesa  = 0,
-      .brw   = (BRW_NEW_CURBE_OFFSETS |
+      .brw   = (BRW_NEW_BATCH |
+               BRW_NEW_CURBE_OFFSETS |
                BRW_NEW_URB_FENCE),
       .cache = CACHE_NEW_GS_PROG
    },
-   .prepare = prepare_gs_unit,
+   .prepare = brw_prepare_gs_unit,
 };
index 3552cce..7119786 100644 (file)
@@ -146,7 +146,8 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )
    OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
             brw->vs.state_offset);
    if (brw->gs.prog_active)
-      OUT_RELOC(brw->gs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+      OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+               brw->gs.state_offset | 1);
    else
       OUT_BATCH(0);
    OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
@@ -162,12 +163,6 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )
    brw->state.dirty.brw |= BRW_NEW_PSP;
 }
 
-
-static void prepare_psp_urb_cbs(struct brw_context *brw)
-{
-   brw_add_validated_bo(brw, brw->gs.state_bo);
-}
-
 static void upload_psp_urb_cbs(struct brw_context *brw )
 {
    upload_pipelined_state_pointers(brw);
@@ -187,7 +182,6 @@ const struct brw_tracked_state brw_psp_urb_cbs = {
                CACHE_NEW_WM_UNIT | 
                CACHE_NEW_CC_UNIT)
    },
-   .prepare = prepare_psp_urb_cbs,
    .emit = upload_psp_urb_cbs,
 };
 
index ce3ca39..3a3aa8c 100644 (file)
@@ -393,7 +393,8 @@ void brw_debug_batch(struct intel_context *intel)
    brw_debug_prog("VS prog", brw->vs.prog_bo);
 
    if (intel->gen < 6)
-       state_struct_out("GS", brw->gs.state_bo, 0, sizeof(struct brw_gs_unit_state));
+       state_struct_out("GS", intel->batch.bo, brw->gs.state_offset,
+                       sizeof(struct brw_gs_unit_state));
    brw_debug_prog("GS prog", brw->gs.prog_bo);
 
    if (intel->gen < 6) {
index 49d7711..f2c417d 100644 (file)
@@ -78,7 +78,6 @@ static void brw_destroy_context( struct intel_context *intel )
    dri_bo_release(&brw->vs.prog_bo);
    dri_bo_release(&brw->vs.const_bo);
    dri_bo_release(&brw->gs.prog_bo);
-   dri_bo_release(&brw->gs.state_bo);
    dri_bo_release(&brw->clip.prog_bo);
    dri_bo_release(&brw->sf.prog_bo);
    dri_bo_release(&brw->wm.prog_bo);