freedreno: register usage queries
authorRob Clark <robdclark@gmail.com>
Mon, 25 Jun 2018 12:47:55 +0000 (08:47 -0400)
committerRob Clark <robdclark@gmail.com>
Wed, 18 Jul 2018 14:10:44 +0000 (10:10 -0400)
Avg number of (half) regs per draw, so we can corrolate fps dips to
shader register usage.

Signed-off-by: Rob Clark <robdclark@gmail.com>
src/gallium/drivers/freedreno/a3xx/fd3_draw.c
src/gallium/drivers/freedreno/a4xx/fd4_draw.c
src/gallium/drivers/freedreno/a5xx/fd5_draw.c
src/gallium/drivers/freedreno/freedreno_context.h
src/gallium/drivers/freedreno/freedreno_query.c
src/gallium/drivers/freedreno/freedreno_query.h
src/gallium/drivers/freedreno/freedreno_query_sw.c
src/gallium/drivers/freedreno/ir3/ir3_shader.h

index 761f25b..1ad6955 100644 (file)
@@ -149,12 +149,17 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
        fixup_shader_state(ctx, &emit.key);
 
        unsigned dirty = ctx->dirty;
+       const struct ir3_shader_variant *vp = fd3_emit_get_vp(&emit);
+       const struct ir3_shader_variant *fp = fd3_emit_get_fp(&emit);
 
        /* do regular pass first, since that is more likely to fail compiling: */
 
-       if (!(fd3_emit_get_vp(&emit) && fd3_emit_get_fp(&emit)))
+       if (!vp || !fp)
                return false;
 
+       ctx->stats.vs_regs += ir3_shader_halfregs(vp);
+       ctx->stats.fs_regs += ir3_shader_halfregs(fp);
+
        emit.key.binning_pass = false;
        emit.dirty = dirty;
        draw_impl(ctx, ctx->batch->draw, &emit, index_offset);
index 840e917..1c04a82 100644 (file)
@@ -135,12 +135,17 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
        fixup_shader_state(ctx, &emit.key);
 
        enum fd_dirty_3d_state dirty = ctx->dirty;
+       const struct ir3_shader_variant *vp = fd4_emit_get_vp(&emit);
+       const struct ir3_shader_variant *fp = fd4_emit_get_fp(&emit);
 
        /* do regular pass first, since that is more likely to fail compiling: */
 
-       if (!(fd4_emit_get_vp(&emit) && fd4_emit_get_fp(&emit)))
+       if (!vp || !fp)
                return false;
 
+       ctx->stats.vs_regs += ir3_shader_halfregs(vp);
+       ctx->stats.fs_regs += ir3_shader_halfregs(fp);
+
        emit.key.binning_pass = false;
        emit.dirty = dirty;
 
index 56525e0..9fae44a 100644 (file)
@@ -134,6 +134,9 @@ fd5_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
        if (!vp || !fp)
                return false;
 
+       ctx->stats.vs_regs += ir3_shader_halfregs(vp);
+       ctx->stats.fs_regs += ir3_shader_halfregs(fp);
+
        /* figure out whether we need to disable LRZ write for binning
         * pass using draw pass's fp:
         */
index b81cbd0..bc7ff61 100644 (file)
@@ -219,6 +219,7 @@ struct fd_context {
                uint64_t draw_calls;
                uint64_t batch_total, batch_sysmem, batch_gmem, batch_nondraw, batch_restore;
                uint64_t staging_uploads, shadow_uploads;
+               uint64_t vs_regs, fs_regs;
        } stats;
 
        /* Current batch.. the rule here is that you can deref ctx->batch
index 2a809a3..9d4637a 100644 (file)
@@ -118,29 +118,45 @@ fd_render_condition(struct pipe_context *pctx, struct pipe_query *pq,
        ctx->cond_mode = mode;
 }
 
+#define _Q(_name, _query_type, _type, _result_type) {                \
+       .name        = _name,                                            \
+       .query_type  = _query_type,                                      \
+       .type        = PIPE_DRIVER_QUERY_TYPE_ ## _type,                 \
+       .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_ ## _result_type,   \
+       .group_id    = ~(unsigned)0,                                     \
+}
+
+#define FQ(_name, _query_type, _type, _result_type) \
+       _Q(_name, FD_QUERY_ ## _query_type, _type, _result_type)
+
+#define PQ(_name, _query_type, _type, _result_type) \
+       _Q(_name, PIPE_QUERY_ ## _query_type, _type, _result_type)
+
+static const struct pipe_driver_query_info sw_query_list[] = {
+       FQ("draw-calls", DRAW_CALLS, UINT64, AVERAGE),
+       FQ("batches", BATCH_TOTAL, UINT64, AVERAGE),
+       FQ("batches-sysmem", BATCH_SYSMEM, UINT64, AVERAGE),
+       FQ("batches-gmem", BATCH_GMEM, UINT64, AVERAGE),
+       FQ("batches-nondraw", BATCH_NONDRAW, UINT64, AVERAGE),
+       FQ("restores", BATCH_RESTORE, UINT64, AVERAGE),
+       PQ("prims-emitted", PRIMITIVES_EMITTED, UINT64, AVERAGE),
+       FQ("staging", STAGING_UPLOADS, UINT64, AVERAGE),
+       FQ("shadow", SHADOW_UPLOADS, UINT64, AVERAGE),
+       FQ("vsregs", VS_REGS, FLOAT, AVERAGE),
+       FQ("fsregs", FS_REGS, FLOAT, AVERAGE),
+};
+
 static int
 fd_get_driver_query_info(struct pipe_screen *pscreen,
                unsigned index, struct pipe_driver_query_info *info)
 {
-       struct pipe_driver_query_info list[] = {
-                       {"draw-calls", FD_QUERY_DRAW_CALLS, {0}},
-                       {"batches", FD_QUERY_BATCH_TOTAL, {0}},
-                       {"batches-sysmem", FD_QUERY_BATCH_SYSMEM, {0}},
-                       {"batches-gmem", FD_QUERY_BATCH_GMEM, {0}},
-                       {"batches-nondraw", FD_QUERY_BATCH_NONDRAW, {0}},
-                       {"restores", FD_QUERY_BATCH_RESTORE, {0}},
-                       {"prims-emitted", PIPE_QUERY_PRIMITIVES_EMITTED, {0}},
-                       {"staging", FD_QUERY_STAGING_UPLOADS, {0}},
-                       {"shadow", FD_QUERY_SHADOW_UPLOADS, {0}},
-       };
-
        if (!info)
-               return ARRAY_SIZE(list);
+               return ARRAY_SIZE(sw_query_list);
 
-       if (index >= ARRAY_SIZE(list))
+       if (index >= ARRAY_SIZE(sw_query_list))
                return 0;
 
-       *info = list[index];
+       *info = sw_query_list[index];
        return 1;
 }
 
index 296c356..8150d56 100644 (file)
@@ -64,6 +64,8 @@ fd_query(struct pipe_query *pq)
 #define FD_QUERY_BATCH_RESTORE   (PIPE_QUERY_DRIVER_SPECIFIC + 5)  /* batches requiring GMEM restore */
 #define FD_QUERY_STAGING_UPLOADS (PIPE_QUERY_DRIVER_SPECIFIC + 6)  /* texture/buffer uploads using staging blit */
 #define FD_QUERY_SHADOW_UPLOADS  (PIPE_QUERY_DRIVER_SPECIFIC + 7)  /* texture/buffer uploads that shadowed rsc */
+#define FD_QUERY_VS_REGS         (PIPE_QUERY_DRIVER_SPECIFIC + 8)  /* avg # of VS registers (scaled up by 100x) */
+#define FD_QUERY_FS_REGS         (PIPE_QUERY_DRIVER_SPECIFIC + 9)  /* avg # of VS registers (scaled up by 100x) */
 
 void fd_query_screen_init(struct pipe_screen *pscreen);
 void fd_query_context_init(struct pipe_context *pctx);
index 080b2b1..13ab0e8 100644 (file)
@@ -73,12 +73,16 @@ read_counter(struct fd_context *ctx, int type)
                return ctx->stats.staging_uploads;
        case FD_QUERY_SHADOW_UPLOADS:
                return ctx->stats.shadow_uploads;
+       case FD_QUERY_VS_REGS:
+               return ctx->stats.vs_regs;
+       case FD_QUERY_FS_REGS:
+               return ctx->stats.fs_regs;
        }
        return 0;
 }
 
 static bool
-is_rate_query(struct fd_query *q)
+is_time_rate_query(struct fd_query *q)
 {
        switch (q->type) {
        case FD_QUERY_BATCH_TOTAL:
@@ -94,14 +98,29 @@ is_rate_query(struct fd_query *q)
        }
 }
 
+static bool
+is_draw_rate_query(struct fd_query *q)
+{
+       switch (q->type) {
+       case FD_QUERY_VS_REGS:
+       case FD_QUERY_FS_REGS:
+               return true;
+       default:
+               return false;
+       }
+}
+
 static boolean
 fd_sw_begin_query(struct fd_context *ctx, struct fd_query *q)
 {
        struct fd_sw_query *sq = fd_sw_query(q);
        sq->begin_value = read_counter(ctx, q->type);
-       if (is_rate_query(q))
+       if (is_time_rate_query(q)) {
                sq->begin_time = os_time_get();
-   return true;
+       } else if (is_draw_rate_query(q)) {
+               sq->begin_time = ctx->stats.draw_calls;
+       }
+       return true;
 }
 
 static void
@@ -109,8 +128,11 @@ fd_sw_end_query(struct fd_context *ctx, struct fd_query *q)
 {
        struct fd_sw_query *sq = fd_sw_query(q);
        sq->end_value = read_counter(ctx, q->type);
-       if (is_rate_query(q))
+       if (is_time_rate_query(q)) {
                sq->end_time = os_time_get();
+       } else if (is_draw_rate_query(q)) {
+               sq->end_time = ctx->stats.draw_calls;
+       }
 }
 
 static boolean
@@ -121,10 +143,14 @@ fd_sw_get_query_result(struct fd_context *ctx, struct fd_query *q,
 
        result->u64 = sq->end_value - sq->begin_value;
 
-       if (is_rate_query(q)) {
+       if (is_time_rate_query(q)) {
                double fps = (result->u64 * 1000000) /
                                (double)(sq->end_time - sq->begin_time);
                result->u64 = (uint64_t)fps;
+       } else if (is_draw_rate_query(q)) {
+               double avg = ((double)result->u64) /
+                               (double)(sq->end_time - sq->begin_time);
+               result->f = avg;
        }
 
        return true;
@@ -154,6 +180,8 @@ fd_sw_create_query(struct fd_context *ctx, unsigned query_type)
        case FD_QUERY_BATCH_RESTORE:
        case FD_QUERY_STAGING_UPLOADS:
        case FD_QUERY_SHADOW_UPLOADS:
+       case FD_QUERY_VS_REGS:
+       case FD_QUERY_FS_REGS:
                break;
        default:
                return NULL;
index acbeed5..dba98b9 100644 (file)
@@ -527,4 +527,13 @@ ir3_find_sysval_regid(const struct ir3_shader_variant *so, unsigned slot)
        return regid(63, 0);
 }
 
+/* calculate register footprint in terms of half-regs (ie. one full
+ * reg counts as two half-regs).
+ */
+static inline uint32_t
+ir3_shader_halfregs(const struct ir3_shader_variant *v)
+{
+       return (2 * (v->info.max_reg + 1)) + (v->info.max_half_reg + 1);
+}
+
 #endif /* IR3_SHADER_H_ */