radeonsi: add HUD queries for cache flush stats
authorMarek Olšák <marek.olsak@amd.com>
Sun, 25 Dec 2016 18:48:55 +0000 (19:48 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Fri, 6 Jan 2017 20:05:48 +0000 (21:05 +0100)
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeon/r600_pipe_common.h
src/gallium/drivers/radeon/r600_query.c
src/gallium/drivers/radeon/r600_query.h
src/gallium/drivers/radeonsi/si_state_draw.c

index 2e06556..74f86dc 100644 (file)
@@ -574,6 +574,9 @@ struct r600_common_context {
        unsigned                        num_vs_flushes;
        unsigned                        num_ps_flushes;
        unsigned                        num_cs_flushes;
+       unsigned                        num_fb_cache_flushes;
+       unsigned                        num_L2_invalidates;
+       unsigned                        num_L2_writebacks;
        uint64_t                        num_alloc_tex_transfer_bytes;
        unsigned                        last_tex_ps_draw_ratio; /* for query */
 
index 0e4270a..6b93329 100644 (file)
@@ -113,6 +113,15 @@ static bool r600_query_sw_begin(struct r600_common_context *rctx,
        case R600_QUERY_NUM_CS_FLUSHES:
                query->begin_result = rctx->num_cs_flushes;
                break;
+       case R600_QUERY_NUM_FB_CACHE_FLUSHES:
+               query->begin_result = rctx->num_fb_cache_flushes;
+               break;
+       case R600_QUERY_NUM_L2_INVALIDATES:
+               query->begin_result = rctx->num_L2_invalidates;
+               break;
+       case R600_QUERY_NUM_L2_WRITEBACKS:
+               query->begin_result = rctx->num_L2_writebacks;
+               break;
        case R600_QUERY_REQUESTED_VRAM:
        case R600_QUERY_REQUESTED_GTT:
        case R600_QUERY_MAPPED_VRAM:
@@ -197,6 +206,15 @@ static bool r600_query_sw_end(struct r600_common_context *rctx,
        case R600_QUERY_NUM_CS_FLUSHES:
                query->end_result = rctx->num_cs_flushes;
                break;
+       case R600_QUERY_NUM_FB_CACHE_FLUSHES:
+               query->end_result = rctx->num_fb_cache_flushes;
+               break;
+       case R600_QUERY_NUM_L2_INVALIDATES:
+               query->end_result = rctx->num_L2_invalidates;
+               break;
+       case R600_QUERY_NUM_L2_WRITEBACKS:
+               query->end_result = rctx->num_L2_writebacks;
+               break;
        case R600_QUERY_REQUESTED_VRAM:
        case R600_QUERY_REQUESTED_GTT:
        case R600_QUERY_MAPPED_VRAM:
@@ -1665,6 +1683,9 @@ static struct pipe_driver_query_info r600_driver_query_list[] = {
        X("num-vs-flushes",             NUM_VS_FLUSHES,         UINT64, AVERAGE),
        X("num-ps-flushes",             NUM_PS_FLUSHES,         UINT64, AVERAGE),
        X("num-cs-flushes",             NUM_CS_FLUSHES,         UINT64, AVERAGE),
+       X("num-fb-cache-flushes",       NUM_FB_CACHE_FLUSHES,   UINT64, AVERAGE),
+       X("num-L2-invalidates",         NUM_L2_INVALIDATES,     UINT64, AVERAGE),
+       X("num-L2-writebacks",          NUM_L2_WRITEBACKS,      UINT64, AVERAGE),
        X("requested-VRAM",             REQUESTED_VRAM,         BYTES, AVERAGE),
        X("requested-GTT",              REQUESTED_GTT,          BYTES, AVERAGE),
        X("mapped-VRAM",                MAPPED_VRAM,            BYTES, AVERAGE),
index 2ff586a..af434fa 100644 (file)
@@ -52,6 +52,9 @@ enum {
        R600_QUERY_NUM_VS_FLUSHES,
        R600_QUERY_NUM_PS_FLUSHES,
        R600_QUERY_NUM_CS_FLUSHES,
+       R600_QUERY_NUM_FB_CACHE_FLUSHES,
+       R600_QUERY_NUM_L2_INVALIDATES,
+       R600_QUERY_NUM_L2_WRITEBACKS,
        R600_QUERY_REQUESTED_VRAM,
        R600_QUERY_REQUESTED_GTT,
        R600_QUERY_MAPPED_VRAM,
index cae19dc..b3f664e 100644 (file)
@@ -735,6 +735,9 @@ void si_emit_cache_flush(struct si_context *sctx)
        struct radeon_winsys_cs *cs = rctx->gfx.cs;
        uint32_t cp_coher_cntl = 0;
 
+       if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER)
+               sctx->b.num_fb_cache_flushes++;
+
        /* SI has a bug that it always flushes ICACHE and KCACHE if either
         * bit is set. An alternative way is to write SQC_CACHES, but that
         * doesn't seem to work reliably. Since the bug doesn't affect
@@ -852,6 +855,7 @@ void si_emit_cache_flush(struct si_context *sctx)
                                     S_0085F0_TC_ACTION_ENA(1) |
                                     S_0301F0_TC_WB_ACTION_ENA(rctx->chip_class >= VI));
                cp_coher_cntl = 0;
+               sctx->b.num_L2_invalidates++;
        } else {
                /* L1 invalidation and L2 writeback must be done separately,
                 * because both operations can't be done together.
@@ -867,6 +871,7 @@ void si_emit_cache_flush(struct si_context *sctx)
                                             S_0301F0_TC_WB_ACTION_ENA(1) |
                                             S_0301F0_TC_NC_ACTION_ENA(1));
                        cp_coher_cntl = 0;
+                       sctx->b.num_L2_writebacks++;
                }
                if (rctx->flags & SI_CONTEXT_INV_VMEM_L1) {
                        /* Invalidate per-CU VMEM L1. */