i965: Remove unneeded VS workaround stalls on Baytrail.
authorGreg Hunt <greg.hunt@mobica.com>
Wed, 25 Jun 2014 13:42:24 +0000 (14:42 +0100)
committerKenneth Graunke <kenneth@whitecape.org>
Thu, 26 Jun 2014 18:31:28 +0000 (11:31 -0700)
According to the workarounds list, these stalls aren't needed on
production Baytrail systems.  Piglit confirms that as well.

These cause a small slowdown when we are sending a large number of small
batches to the GPU.  Removing these improves performance by up to 5% on
some CPU bound SynMark tests (Batch[4-7], DrvState1, HdrBloom,
Multithread, ShMapPcf).

Signed-off-by: Gregory Hunt <greg.hunt@mobica.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/gen6_vs_state.c
src/mesa/drivers/dri/i965/gen7_sampler_state.c
src/mesa/drivers/dri/i965/gen7_urb.c
src/mesa/drivers/dri/i965/gen7_vs_state.c

index 9764645..a46cc48 100644 (file)
@@ -100,7 +100,7 @@ gen6_upload_vs_push_constants(struct brw_context *brw)
                                    stage_state, AUB_TRACE_VS_CONSTANTS);
 
    if (brw->gen >= 7) {
-      if (brw->gen == 7 && !brw->is_haswell)
+      if (brw->gen == 7 && !brw->is_haswell && !brw->is_baytrail)
          gen7_emit_vs_workaround_flush(brw);
 
       gen7_upload_constant_state(brw, stage_state, true /* active */,
index 6077ff2..219a174 100644 (file)
@@ -212,7 +212,7 @@ gen7_upload_sampler_state_table(struct brw_context *brw,
       }
    }
 
-  if (brw->gen == 7 && !brw->is_haswell &&
+  if (brw->gen == 7 && !brw->is_haswell && !brw->is_baytrail &&
       stage_state->stage == MESA_SHADER_VERTEX) {
       gen7_emit_vs_workaround_flush(brw);
   }
index 2653e9c..190d6f0 100644 (file)
@@ -121,9 +121,9 @@ gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
     *     A PIPE_CONTOL command with the CS Stall bit set must be programmed
     *     in the ring after this instruction.
     *
-    * No such restriction exists for Haswell.
+    * No such restriction exists for Haswell or Baytrail.
     */
-   if (brw->gen < 8 && !brw->is_haswell)
+   if (brw->gen < 8 && !brw->is_haswell && !brw->is_baytrail)
       gen7_emit_cs_stall_flush(brw);
 }
 
@@ -263,7 +263,7 @@ gen7_upload_urb(struct brw_context *brw)
    brw->urb.vs_start = push_constant_chunks;
    brw->urb.gs_start = push_constant_chunks + vs_chunks;
 
-   if (brw->gen == 7 && !brw->is_haswell)
+   if (brw->gen == 7 && !brw->is_haswell && !brw->is_baytrail)
       gen7_emit_vs_workaround_flush(brw);
    gen7_emit_urb_state(brw,
                        brw->urb.nr_vs_entries, vs_size, brw->urb.vs_start,
index 4d99150..01be756 100644 (file)
@@ -72,7 +72,7 @@ upload_vs_state(struct brw_context *brw)
    const int max_threads_shift = brw->is_haswell ?
       HSW_VS_MAX_THREADS_SHIFT : GEN6_VS_MAX_THREADS_SHIFT;
 
-   if (!brw->is_haswell)
+   if (!brw->is_haswell && !brw->is_baytrail)
       gen7_emit_vs_workaround_flush(brw);
 
    /* Use ALT floating point mode for ARB vertex programs, because they