From e8ca1a53a625544ea30b394be905ff7e51d78af6 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 19 Dec 2012 12:23:50 -0500 Subject: [PATCH] r600g: add cs tracing infrastructure for lockup pin pointing It's a build time option you need to set R600_TRACE_CS to 1 and it will print to stderr all cs along as cs trace point value which gave last offset into a cs process by the GPU. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_hw_context.c | 41 +++++++++++++++++++++++++ src/gallium/drivers/r600/r600_hw_context_priv.h | 5 +-- src/gallium/drivers/r600/r600_pipe.c | 20 ++++++++++++ src/gallium/drivers/r600/r600_pipe.h | 16 ++++++++++ src/gallium/drivers/r600/r600_state_common.c | 26 ++++++++++++++++ 5 files changed, 106 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index cdd31a4..6c8cb9d 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -27,6 +27,7 @@ #include "r600d.h" #include "util/u_memory.h" #include +#include /* Get backends mask */ void r600_get_backend_mask(struct r600_context *ctx) @@ -369,6 +370,11 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, for (i = 0; i < R600_NUM_ATOMS; i++) { if (ctx->atoms[i] && ctx->atoms[i]->dirty) { num_dw += ctx->atoms[i]->num_dw; +#if R600_TRACE_CS + if (ctx->screen->trace_bo) { + num_dw += R600_TRACE_CS_DWORDS; + } +#endif } } @@ -376,6 +382,11 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, /* The upper-bound of how much space a draw command would take. */ num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS; +#if R600_TRACE_CS + if (ctx->screen->trace_bo) { + num_dw += R600_TRACE_CS_DWORDS; + } +#endif } /* Count in queries_suspend. */ @@ -717,7 +728,37 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) } /* Flush the CS. */ +#if R600_TRACE_CS + if (ctx->screen->trace_bo) { + struct r600_screen *rscreen = ctx->screen; + unsigned i; + + for (i = 0; i < cs->cdw; i++) { + fprintf(stderr, "[%4d] [%5d] 0x%08x\n", rscreen->cs_count, i, cs->buf[i]); + } + rscreen->cs_count++; + } +#endif ctx->ws->cs_flush(ctx->cs, flags); +#if R600_TRACE_CS + if (ctx->screen->trace_bo) { + struct r600_screen *rscreen = ctx->screen; + unsigned i; + + for (i = 0; i < 10; i++) { + usleep(5); + if (!ctx->ws->buffer_is_busy(rscreen->trace_bo->buf, RADEON_USAGE_READWRITE)) { + break; + } + } + if (i == 10) { + fprintf(stderr, "timeout on cs lockup likely happen at cs %d dw %d\n", + rscreen->trace_ptr[1], rscreen->trace_ptr[0]); + } else { + fprintf(stderr, "cs %d executed in %dms\n", rscreen->trace_ptr[1], i * 5); + } + } +#endif r600_begin_new_cs(ctx); } diff --git a/src/gallium/drivers/r600/r600_hw_context_priv.h b/src/gallium/drivers/r600/r600_hw_context_priv.h index 050c472..692e6ec 100644 --- a/src/gallium/drivers/r600/r600_hw_context_priv.h +++ b/src/gallium/drivers/r600/r600_hw_context_priv.h @@ -29,8 +29,9 @@ #include "r600_pipe.h" /* the number of CS dwords for flushing and drawing */ -#define R600_MAX_FLUSH_CS_DWORDS 12 -#define R600_MAX_DRAW_CS_DWORDS 34 +#define R600_MAX_FLUSH_CS_DWORDS 12 +#define R600_MAX_DRAW_CS_DWORDS 34 +#define R600_TRACE_CS_DWORDS 7 /* these flags are used in register flags and added into block flags */ #define REG_FLAG_NEED_BO 1 diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 290aa51..b02f875 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -723,6 +723,12 @@ static void r600_destroy_screen(struct pipe_screen* pscreen) rscreen->ws->buffer_unmap(rscreen->fences.bo->cs_buf); pipe_resource_reference((struct pipe_resource**)&rscreen->fences.bo, NULL); } +#if R600_TRACE_CS + if (rscreen->trace_bo) { + rscreen->ws->buffer_unmap(rscreen->trace_bo->cs_buf); + pipe_resource_reference((struct pipe_resource**)&rscreen->trace_bo, NULL); + } +#endif pipe_mutex_destroy(rscreen->fences.mutex); rscreen->ws->destroy(rscreen->ws); @@ -1042,5 +1048,19 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws) rscreen->global_pool = compute_memory_pool_new(rscreen); +#if R600_TRACE_CS + rscreen->cs_count = 0; + if (rscreen->info.drm_minor >= 28) { + rscreen->trace_bo = (struct r600_resource*)pipe_buffer_create(&rscreen->screen, + PIPE_BIND_CUSTOM, + PIPE_USAGE_STAGING, + 4096); + if (rscreen->trace_bo) { + rscreen->trace_ptr = rscreen->ws->buffer_map(rscreen->trace_bo->cs_buf, NULL, + PIPE_TRANSFER_UNSYNCHRONIZED); + } + } +#endif + return &rscreen->screen; } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 515174a..8418737 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -37,6 +37,8 @@ #define R600_NUM_ATOMS 37 +#define R600_TRACE_CS 0 + #define R600_MAX_USER_CONST_BUFFERS 1 #define R600_MAX_DRIVER_CONST_BUFFERS 2 #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS) @@ -235,6 +237,11 @@ struct r600_screen { * XXX: Not sure if this is the best place for global_pool. Also, * it's not thread safe, so it won't work with multiple contexts. */ struct compute_memory_pool *global_pool; +#if R600_TRACE_CS + struct r600_resource *trace_bo; + uint32_t *trace_ptr; + unsigned cs_count; +#endif }; struct r600_pipe_sampler_view { @@ -533,10 +540,19 @@ static INLINE void r600_emit_command_buffer(struct radeon_winsys_cs *cs, cs->cdw += cb->num_dw; } +#if R600_TRACE_CS +void r600_trace_emit(struct r600_context *rctx); +#endif + static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom) { atom->emit(rctx, atom); atom->dirty = false; +#if R600_TRACE_CS + if (rctx->screen->trace_bo) { + r600_trace_emit(rctx); + } +#endif } static INLINE void r600_set_cso_state(struct r600_cso_state *state, void *cso) diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index c69149b..f8b64cc 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -1328,6 +1328,12 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info (info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0); } +#if R600_TRACE_CS + if (rctx->screen->trace_bo) { + r600_trace_emit(rctx); + } +#endif + /* Set the depth buffer as dirty. */ if (rctx->framebuffer.state.zsbuf) { struct pipe_surface *surf = rctx->framebuffer.state.zsbuf; @@ -1620,3 +1626,23 @@ void r600_init_common_state_functions(struct r600_context *rctx) rctx->context.set_stream_output_targets = r600_set_so_targets; rctx->context.draw_vbo = r600_draw_vbo; } + +#if R600_TRACE_CS +void r600_trace_emit(struct r600_context *rctx) +{ + struct r600_screen *rscreen = rctx->screen; + struct radeon_winsys_cs *cs = rctx->cs; + uint64_t va; + uint32_t reloc; + + va = r600_resource_va(&rscreen->screen, (void*)rscreen->trace_bo); + reloc = r600_context_bo_reloc(rctx, rscreen->trace_bo, RADEON_USAGE_READWRITE); + r600_write_value(cs, PKT3(PKT3_MEM_WRITE, 3, 0)); + r600_write_value(cs, va & 0xFFFFFFFFUL); + r600_write_value(cs, (va >> 32UL) & 0xFFUL); + r600_write_value(cs, cs->cdw); + r600_write_value(cs, rscreen->cs_count); + r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); + r600_write_value(cs, reloc); +} +#endif -- 2.7.4