From 33c9d4bf3152cdfff694a1572012631b120b6731 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 19 Oct 2020 18:37:26 +0200 Subject: [PATCH] radv: replace RADV_TRACE_FILE by RADV_DEBUG=hang The trace file will be dumped as part of the hang report into $HOME/radv_dumps_/trace.log if a GPU hang is detected. The old and famous RADV_TRACE_FILE envvar is now deprecated. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Part-of: --- docs/envvars.rst | 5 +++-- src/amd/vulkan/radv_debug.c | 21 +++++++++------------ src/amd/vulkan/radv_debug.h | 1 + src/amd/vulkan/radv_device.c | 15 +++++++++++---- 4 files changed, 24 insertions(+), 18 deletions(-) diff --git a/docs/envvars.rst b/docs/envvars.rst index b325746..672b565 100644 --- a/docs/envvars.rst +++ b/docs/envvars.rst @@ -554,6 +554,9 @@ RADV driver environment variables ``forcecompress`` Enables DCC,FMASK,CMASK,HTILE in situations where the driver supports it but normally does not deem it beneficial. + ``hang`` + enable GPU hangs detection and dump a report to $HOME/radv_dumps_ + if a GPU hang is detected ``info`` show GPU-related information ``metashaders`` @@ -624,8 +627,6 @@ RADV driver environment variables ``RADV_TEX_ANISO`` force anisotropy filter (up to 16) -``RADV_TRACE_FILE`` - generate cmdbuffer tracefiles when a GPU hang is detected ``ACO_DEBUG`` a comma-separated list of named flags, which do various things: diff --git a/src/amd/vulkan/radv_debug.c b/src/amd/vulkan/radv_debug.c index c5ab538..7f80381 100644 --- a/src/amd/vulkan/radv_debug.c +++ b/src/amd/vulkan/radv_debug.c @@ -83,19 +83,10 @@ radv_init_trace(struct radv_device *device) } static void -radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs) +radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f) { - const char *filename = getenv("RADV_TRACE_FILE"); - FILE *f = fopen(filename, "w"); - - if (!f) { - fprintf(stderr, "Failed to write trace dump to %s\n", filename); - return; - } - fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr); device->ws->cs_dump(cs, f, (const int*)device->trace_id_ptr, 2); - fclose(f); } static void @@ -625,8 +616,6 @@ radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs) fprintf(stderr, "radv: GPU hang detected...\n"); - radv_dump_trace(queue->device, cs); - /* Create a directory into $HOME/radv_dumps_ to save various * debugging info about that GPU hang. */ @@ -638,6 +627,14 @@ radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs) abort(); } + /* Dump trace file. */ + snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "trace.log"); + f = fopen(dump_path, "w+"); + if (f) { + radv_dump_trace(queue->device, cs, f); + fclose(f); + } + /* Dump pipeline state. */ snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log"); f = fopen(dump_path, "w+"); diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h index 1037164..0985be3 100644 --- a/src/amd/vulkan/radv_debug.h +++ b/src/amd/vulkan/radv_debug.h @@ -57,6 +57,7 @@ enum { RADV_DEBUG_DISCARD_TO_DEMOTE = 1 << 26, RADV_DEBUG_LLVM = 1 << 27, RADV_DEBUG_FORCE_COMPRESS = 1 << 28, + RADV_DEBUG_HANG = 1 << 29, }; enum { diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index c29006f..ce58e99 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -530,6 +530,7 @@ static const struct debug_control radv_debug_options[] = { {"discardtodemote", RADV_DEBUG_DISCARD_TO_DEMOTE}, {"llvm", RADV_DEBUG_LLVM}, {"forcecompress", RADV_DEBUG_FORCE_COMPRESS}, + {"hang", RADV_DEBUG_HANG}, {NULL, 0} }; @@ -2794,19 +2795,25 @@ VkResult radv_CreateDevice( device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192; if (getenv("RADV_TRACE_FILE")) { - const char *filename = getenv("RADV_TRACE_FILE"); + fprintf(stderr, "***********************************************************************************\n"); + fprintf(stderr, "* WARNING: RADV_TRACE_FILE= is deprecated and replaced by RADV_DEBUG=hang *\n"); + fprintf(stderr, "***********************************************************************************\n"); + abort(); + } + if (device->instance->debug_flags & RADV_DEBUG_HANG) { + /* Enable GPU hangs detection and dump logs if a GPU hang is + * detected. + */ keep_shader_info = true; if (!radv_init_trace(device)) goto fail; fprintf(stderr, "*****************************************************************************\n"); - fprintf(stderr, "* WARNING: RADV_TRACE_FILE is costly and should only be used for debugging! *\n"); + fprintf(stderr, "* WARNING: RADV_DEBUG=hang is costly and should only be used for debugging! *\n"); fprintf(stderr, "*****************************************************************************\n"); - fprintf(stderr, "Trace file will be dumped to %s\n", filename); - /* Wait for idle after every draw/dispatch to identify the * first bad call. */ -- 2.7.4