i965: Use a single index per shader for shader_time.
authorJason Ekstrand <jason.ekstrand@intel.com>
Fri, 19 Jun 2015 21:46:03 +0000 (14:46 -0700)
committerJason Ekstrand <jason.ekstrand@intel.com>
Tue, 23 Jun 2015 22:33:16 +0000 (15:33 -0700)
Previously, each shader took 3 shader time indices which were potentially
at arbirary points in the shader time buffer.  Now, each shader gets a
single index which refers to 3 consecutive locations in the buffer.  This
simplifies some of the logic at the cost of having a magic 3 a few places.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs.h
src/mesa/drivers/dri/i965/brw_program.c
src/mesa/drivers/dri/i965/brw_vec4.cpp
src/mesa/drivers/dri/i965/brw_vec4.h
src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp

index d8fcfff..a7d83f8 100644 (file)
@@ -821,20 +821,10 @@ struct brw_tracked_state {
 enum shader_time_shader_type {
    ST_NONE,
    ST_VS,
-   ST_VS_WRITTEN,
-   ST_VS_RESET,
    ST_GS,
-   ST_GS_WRITTEN,
-   ST_GS_RESET,
    ST_FS8,
-   ST_FS8_WRITTEN,
-   ST_FS8_RESET,
    ST_FS16,
-   ST_FS16_WRITTEN,
-   ST_FS16_RESET,
    ST_CS,
-   ST_CS_WRITTEN,
-   ST_CS_RESET,
 };
 
 struct brw_vertex_buffer {
@@ -979,6 +969,8 @@ enum brw_predicate_state {
    BRW_PREDICATE_STATE_USE_BIT
 };
 
+struct shader_times;
+
 /**
  * brw_context is derived from gl_context.
  */
@@ -1503,7 +1495,7 @@ struct brw_context
       const char **names;
       int *ids;
       enum shader_time_shader_type *types;
-      uint64_t *cumulative;
+      struct shader_times *cumulative;
       int num_entries;
       int max_entries;
       double report_time;
index 0c11a9e..2839b9f 100644 (file)
@@ -578,38 +578,30 @@ fs_visitor::emit_shader_time_begin()
 void
 fs_visitor::emit_shader_time_end()
 {
-   enum shader_time_shader_type type, written_type, reset_type;
+   enum shader_time_shader_type type;
    switch (stage) {
    case MESA_SHADER_VERTEX:
       type = ST_VS;
-      written_type = ST_VS_WRITTEN;
-      reset_type = ST_VS_RESET;
       break;
    case MESA_SHADER_GEOMETRY:
       type = ST_GS;
-      written_type = ST_GS_WRITTEN;
-      reset_type = ST_GS_RESET;
       break;
    case MESA_SHADER_FRAGMENT:
       if (dispatch_width == 8) {
          type = ST_FS8;
-         written_type = ST_FS8_WRITTEN;
-         reset_type = ST_FS8_RESET;
       } else {
          assert(dispatch_width == 16);
          type = ST_FS16;
-         written_type = ST_FS16_WRITTEN;
-         reset_type = ST_FS16_RESET;
       }
       break;
    case MESA_SHADER_COMPUTE:
       type = ST_CS;
-      written_type = ST_CS_WRITTEN;
-      reset_type = ST_CS_RESET;
       break;
    default:
       unreachable("fs_visitor::emit_shader_time_end missing code");
    }
+   int shader_time_index = brw_get_shader_time_index(brw, shader_prog, prog,
+                                                     type);
 
    /* Insert our code just before the final SEND with EOT. */
    exec_node *end = this->instructions.get_tail();
@@ -639,20 +631,20 @@ fs_visitor::emit_shader_time_end()
     * trying to determine the time taken for single instructions.
     */
    ibld.ADD(diff, diff, fs_reg(-2u));
-   SHADER_TIME_ADD(ibld, type, diff);
-   SHADER_TIME_ADD(ibld, written_type, fs_reg(1u));
+   SHADER_TIME_ADD(ibld, shader_time_index, 0, diff);
+   SHADER_TIME_ADD(ibld, shader_time_index, 1, fs_reg(1u));
    ibld.emit(BRW_OPCODE_ELSE);
-   SHADER_TIME_ADD(ibld, reset_type, fs_reg(1u));
+   SHADER_TIME_ADD(ibld, shader_time_index, 2, fs_reg(1u));
    ibld.emit(BRW_OPCODE_ENDIF);
 }
 
 void
 fs_visitor::SHADER_TIME_ADD(const fs_builder &bld,
-                            enum shader_time_shader_type type, fs_reg value)
+                            int shader_time_index, int shader_time_subindex,
+                            fs_reg value)
 {
-   int shader_time_index =
-      brw_get_shader_time_index(brw, shader_prog, prog, type);
-   fs_reg offset = fs_reg(shader_time_index * SHADER_TIME_STRIDE);
+   int index = shader_time_index * 3 + shader_time_subindex;
+   fs_reg offset = fs_reg(index * SHADER_TIME_STRIDE);
 
    fs_reg payload;
    if (dispatch_width == 8)
index cffedc0..55a9722 100644 (file)
@@ -278,7 +278,8 @@ public:
    void emit_shader_time_begin();
    void emit_shader_time_end();
    void SHADER_TIME_ADD(const brw::fs_builder &bld,
-                        enum shader_time_shader_type type, fs_reg value);
+                        int shader_time_index, int shader_time_subindex,
+                        fs_reg value);
 
    void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
                             fs_reg dst, fs_reg offset, fs_reg src0,
index 414eab9..2327af7 100644 (file)
@@ -287,18 +287,24 @@ void brwInitFragProgFuncs( struct dd_function_table *functions )
    functions->MemoryBarrier = brw_memory_barrier;
 }
 
+struct shader_times {
+   uint64_t time;
+   uint64_t written;
+   uint64_t reset;
+};
+
 void
 brw_init_shader_time(struct brw_context *brw)
 {
-   const int max_entries = 4096;
-   brw->shader_time.bo = drm_intel_bo_alloc(brw->bufmgr, "shader time",
-                                            max_entries * SHADER_TIME_STRIDE,
-                                            4096);
+   const int max_entries = 2048;
+   brw->shader_time.bo =
+      drm_intel_bo_alloc(brw->bufmgr, "shader time",
+                         max_entries * SHADER_TIME_STRIDE * 3, 4096);
    brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
    brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
    brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
                                           max_entries);
-   brw->shader_time.cumulative = rzalloc_array(brw, uint64_t,
+   brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times,
                                                max_entries);
    brw->shader_time.max_entries = max_entries;
 }
@@ -319,27 +325,6 @@ compare_time(const void *a, const void *b)
 }
 
 static void
-get_written_and_reset(struct brw_context *brw, int i,
-                      uint64_t *written, uint64_t *reset)
-{
-   enum shader_time_shader_type type = brw->shader_time.types[i];
-   assert(type == ST_VS || type == ST_GS || type == ST_FS8 ||
-          type == ST_FS16 || type == ST_CS);
-
-   /* Find where we recorded written and reset. */
-   int wi, ri;
-
-   for (wi = i; brw->shader_time.types[wi] != type + 1; wi++)
-      ;
-
-   for (ri = i; brw->shader_time.types[ri] != type + 2; ri++)
-      ;
-
-   *written = brw->shader_time.cumulative[wi];
-   *reset = brw->shader_time.cumulative[ri];
-}
-
-static void
 print_shader_time_line(const char *stage, const char *name,
                        int shader_num, uint64_t time, uint64_t total)
 {
@@ -374,26 +359,13 @@ brw_report_shader_time(struct brw_context *brw)
       sorted[i] = &scaled[i];
 
       switch (type) {
-      case ST_VS_WRITTEN:
-      case ST_VS_RESET:
-      case ST_GS_WRITTEN:
-      case ST_GS_RESET:
-      case ST_FS8_WRITTEN:
-      case ST_FS8_RESET:
-      case ST_FS16_WRITTEN:
-      case ST_FS16_RESET:
-      case ST_CS_WRITTEN:
-      case ST_CS_RESET:
-         /* We'll handle these when along with the time. */
-         scaled[i] = 0;
-         continue;
-
       case ST_VS:
       case ST_GS:
       case ST_FS8:
       case ST_FS16:
       case ST_CS:
-         get_written_and_reset(brw, i, &written, &reset);
+         written = brw->shader_time.cumulative[i].written;
+         reset = brw->shader_time.cumulative[i].reset;
          break;
 
       default:
@@ -405,7 +377,7 @@ brw_report_shader_time(struct brw_context *brw)
          break;
       }
 
-      uint64_t time = brw->shader_time.cumulative[i];
+      uint64_t time = brw->shader_time.cumulative[i].time;
       if (written) {
          scaled[i] = time / written * (written + reset);
       } else {
@@ -491,16 +463,19 @@ brw_collect_shader_time(struct brw_context *brw)
     * overhead compared to the cost of tracking the time in the first place.
     */
    drm_intel_bo_map(brw->shader_time.bo, true);
-
-   uint32_t *times = brw->shader_time.bo->virtual;
+   void *bo_map = brw->shader_time.bo->virtual;
 
    for (int i = 0; i < brw->shader_time.num_entries; i++) {
-      brw->shader_time.cumulative[i] += times[i * SHADER_TIME_STRIDE / 4];
+      uint32_t *times = bo_map + i * 3 * SHADER_TIME_STRIDE;
+
+      brw->shader_time.cumulative[i].time += times[SHADER_TIME_STRIDE * 0 / 4];
+      brw->shader_time.cumulative[i].written += times[SHADER_TIME_STRIDE * 1 / 4];
+      brw->shader_time.cumulative[i].reset += times[SHADER_TIME_STRIDE * 2 / 4];
    }
 
    /* Zero the BO out to clear it out for our next collection.
     */
-   memset(times, 0, brw->shader_time.bo->size);
+   memset(bo_map, 0, brw->shader_time.bo->size);
    drm_intel_bo_unmap(brw->shader_time.bo);
 }
 
index 82f055f..234ee18 100644 (file)
@@ -1676,20 +1676,21 @@ vec4_visitor::emit_shader_time_end()
     */
    emit(ADD(diff, src_reg(diff), src_reg(-2u)));
 
-   emit_shader_time_write(st_base, src_reg(diff));
-   emit_shader_time_write(st_written, src_reg(1u));
+   int shader_time_index =
+      brw_get_shader_time_index(brw, shader_prog, prog, st_type);
+
+   emit_shader_time_write(shader_time_index, 0, src_reg(diff));
+   emit_shader_time_write(shader_time_index, 1, src_reg(1u));
    emit(BRW_OPCODE_ELSE);
-   emit_shader_time_write(st_reset, src_reg(1u));
+   emit_shader_time_write(shader_time_index, 2, src_reg(1u));
    emit(BRW_OPCODE_ENDIF);
 }
 
 void
-vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type,
+vec4_visitor::emit_shader_time_write(int shader_time_index,
+                                     int shader_time_subindex,
                                      src_reg value)
 {
-   int shader_time_index =
-      brw_get_shader_time_index(brw, shader_prog, prog, type);
-
    dst_reg dst =
       dst_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 2));
 
@@ -1698,7 +1699,8 @@ vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type,
    time.reg_offset++;
 
    offset.type = BRW_REGISTER_TYPE_UD;
-   emit(MOV(offset, src_reg(shader_time_index * SHADER_TIME_STRIDE)));
+   int index = shader_time_index * 3 + shader_time_subindex;
+   emit(MOV(offset, src_reg(index * SHADER_TIME_STRIDE)));
 
    time.type = BRW_REGISTER_TYPE_UD;
    emit(MOV(time, src_reg(value)));
index 2228d47..8d332af 100644 (file)
@@ -85,9 +85,7 @@ public:
                 gl_shader_stage stage,
                void *mem_ctx,
                 bool no_spills,
-                shader_time_shader_type st_base,
-                shader_time_shader_type st_written,
-                shader_time_shader_type st_reset);
+                shader_time_shader_type st_type);
    ~vec4_visitor();
 
    dst_reg dst_null_f()
@@ -345,7 +343,7 @@ public:
 
    void emit_shader_time_begin();
    void emit_shader_time_end();
-   void emit_shader_time_write(enum shader_time_shader_type type,
+   void emit_shader_time_write(int shader_time_index, int shader_time_subindex,
                                src_reg value);
 
    void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
@@ -413,9 +411,7 @@ private:
     */
    const bool no_spills;
 
-   const shader_time_shader_type st_base;
-   const shader_time_shader_type st_written;
-   const shader_time_shader_type st_reset;
+   const shader_time_shader_type st_type;
 };
 
 
index b047aa1..d3754de 100644 (file)
@@ -41,8 +41,7 @@ vec4_gs_visitor::vec4_gs_visitor(struct brw_context *brw,
                                  bool no_spills)
    : vec4_visitor(brw, &c->base, &c->gp->program.Base, &c->key.base,
                   &c->prog_data.base, prog, MESA_SHADER_GEOMETRY, mem_ctx,
-                  no_spills,
-                  ST_GS, ST_GS_WRITTEN, ST_GS_RESET),
+                  no_spills, ST_GS),
      c(c)
 {
 }
index 669f769..5ae572b 100644 (file)
@@ -3688,9 +3688,7 @@ vec4_visitor::vec4_visitor(struct brw_context *brw,
                            gl_shader_stage stage,
                           void *mem_ctx,
                            bool no_spills,
-                           shader_time_shader_type st_base,
-                           shader_time_shader_type st_written,
-                           shader_time_shader_type st_reset)
+                           shader_time_shader_type st_type)
    : backend_shader(brw, mem_ctx, shader_prog, prog, &prog_data->base, stage),
      c(c),
      key(key),
@@ -3700,9 +3698,7 @@ vec4_visitor::vec4_visitor(struct brw_context *brw,
      first_non_payload_grf(0),
      need_all_constants_in_pull_buffer(false),
      no_spills(no_spills),
-     st_base(st_base),
-     st_written(st_written),
-     st_reset(st_reset)
+     st_type(st_type)
 {
    this->failed = false;
 
index 4baf73e..731176a 100644 (file)
@@ -221,7 +221,7 @@ vec4_vs_visitor::vec4_vs_visitor(struct brw_context *brw,
                   &vs_compile->key.base, &vs_prog_data->base, prog,
                   MESA_SHADER_VERTEX,
                   mem_ctx, false /* no_spills */,
-                  ST_VS, ST_VS_WRITTEN, ST_VS_RESET),
+                  ST_VS),
      vs_compile(vs_compile),
      vs_prog_data(vs_prog_data)
 {
index 2ef52e9..8a86736 100644 (file)
@@ -48,8 +48,7 @@ public:
                                   struct gl_shader_program *shader_prog)
       : vec4_visitor(brw, NULL, NULL, NULL, NULL, shader_prog,
                      MESA_SHADER_VERTEX, NULL,
-                     false /* no_spills */,
-                     ST_NONE, ST_NONE, ST_NONE)
+                     false /* no_spills */, ST_NONE)
    {
    }
 
index c8c6757..87ebdfa 100644 (file)
@@ -51,8 +51,7 @@ public:
                                   struct gl_shader_program *shader_prog)
       : vec4_visitor(brw, NULL, NULL, NULL, NULL, shader_prog,
                      MESA_SHADER_VERTEX, NULL,
-                     false /* no_spills */,
-                     ST_NONE, ST_NONE, ST_NONE)
+                     false /* no_spills */, ST_NONE)
    {
    }