i965: Disable thread dispatch when the FS doesn't do any work.
authorEric Anholt <eric@anholt.net>
Mon, 18 Oct 2010 22:43:20 +0000 (15:43 -0700)
committerEric Anholt <eric@anholt.net>
Tue, 19 Oct 2010 17:49:20 +0000 (10:49 -0700)
This should reduce the cost of generating shadow maps, for example.
No performance difference measured in nexuiz, though it does trigger
this path.

src/mesa/drivers/dri/i965/brw_wm.h
src/mesa/drivers/dri/i965/brw_wm_state.c
src/mesa/drivers/dri/i965/gen6_wm_state.c

index a094c45..99bd15c 100644 (file)
@@ -33,6 +33,7 @@
 #ifndef BRW_WM_H
 #define BRW_WM_H
 
+#include <stdbool.h>
 
 #include "program/prog_instruction.h"
 #include "brw_context.h"
@@ -473,4 +474,6 @@ GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog
 struct gl_shader *brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type);
 struct gl_shader_program *brw_new_shader_program(struct gl_context *ctx, GLuint name);
 
+bool brw_color_buffer_write_enabled(struct brw_context *brw);
+
 #endif
index 817adef..411204e 100644 (file)
@@ -52,9 +52,36 @@ struct brw_wm_unit_key {
    unsigned int nr_surfaces, sampler_count;
    GLboolean uses_depth, computes_depth, uses_kill, is_glsl;
    GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable;
+   GLboolean color_write_enable;
    GLfloat offset_units, offset_factor;
 };
 
+bool
+brw_color_buffer_write_enabled(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->intel.ctx;
+   const struct gl_fragment_program *fp = brw->fragment_program;
+   int i;
+
+   /* _NEW_BUFFERS */
+   for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+
+      /* _NEW_COLOR */
+      if (rb &&
+         (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_COLOR) ||
+          fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) &&
+         (ctx->Color.ColorMask[i][0] ||
+          ctx->Color.ColorMask[i][1] ||
+          ctx->Color.ColorMask[i][2] ||
+          ctx->Color.ColorMask[i][3])) {
+        return true;
+      }
+   }
+
+   return false;
+}
+
 static void
 wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
 {
@@ -100,6 +127,9 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
    if (brw->state.depth_region == NULL)
       key->computes_depth = 0;
 
+   /* _NEW_BUFFERS | _NEW_COLOR */
+   key->color_write_enable = brw_color_buffer_write_enabled(brw);
+
    /* _NEW_COLOR */
    key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
    key->is_glsl = bfp->isGLSL;
@@ -188,7 +218,13 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
       wm.wm5.enable_16_pix = 1;
 
    wm.wm5.max_threads = brw->wm_max_threads - 1;
-   wm.wm5.thread_dispatch_enable = 1;  /* AKA: color_write */
+
+   if (key->color_write_enable ||
+       key->uses_kill ||
+       key->computes_depth) {
+      wm.wm5.thread_dispatch_enable = 1;
+   }
+
    wm.wm5.legacy_line_rast = 0;
    wm.wm5.legacy_global_depth_bias = 0;
    wm.wm5.early_depth_test = 1;                /* never need to disable */
@@ -293,7 +329,8 @@ const struct brw_tracked_state brw_wm_unit = {
               _NEW_POLYGONSTIPPLE | 
               _NEW_LINE | 
               _NEW_COLOR |
-              _NEW_DEPTH),
+              _NEW_DEPTH |
+              _NEW_BUFFERS),
 
       .brw = (BRW_NEW_FRAGMENT_PROGRAM | 
              BRW_NEW_CURBE_OFFSETS |
index 7ef99ee..21059be 100644 (file)
@@ -29,6 +29,7 @@
 #include "brw_state.h"
 #include "brw_defines.h"
 #include "brw_util.h"
+#include "brw_wm.h"
 #include "program/prog_parameter.h"
 #include "program/prog_statevars.h"
 #include "intel_batchbuffer.h"
@@ -123,7 +124,6 @@ upload_wm_state(struct brw_context *brw)
           GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
 
    dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT;
-   dw5 |= GEN6_WM_DISPATCH_ENABLE;
 
    /* BRW_NEW_FRAGMENT_PROGRAM */
    if (fp->isGLSL)
@@ -149,6 +149,11 @@ upload_wm_state(struct brw_context *brw)
    if (fp->program.UsesKill || ctx->Color.AlphaEnabled)
       dw5 |= GEN6_WM_KILL_ENABLE;
 
+   if (brw_color_buffer_write_enabled(brw) ||
+       dw5 & (GEN6_WM_KILL_ENABLE | GEN6_WM_COMPUTED_DEPTH)) {
+      dw5 |= GEN6_WM_DISPATCH_ENABLE;
+   }
+
    dw6 |= GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
 
    dw6 |= brw_count_bits(brw->fragment_program->Base.InputsRead) <<
@@ -169,7 +174,7 @@ upload_wm_state(struct brw_context *brw)
 
 const struct brw_tracked_state gen6_wm_state = {
    .dirty = {
-      .mesa  = (_NEW_LINE | _NEW_POLYGONSTIPPLE | _NEW_COLOR |
+      .mesa  = (_NEW_LINE | _NEW_POLYGONSTIPPLE | _NEW_COLOR | _NEW_BUFFERS |
                _NEW_PROGRAM_CONSTANTS),
       .brw   = (BRW_NEW_CURBE_OFFSETS |
                BRW_NEW_FRAGMENT_PROGRAM |