radv: replace RADV_TRACE_FILE by RADV_DEBUG=hang
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 19 Oct 2020 16:37:26 +0000 (18:37 +0200)
committerMarge Bot <eric+marge@anholt.net>
Fri, 23 Oct 2020 07:35:00 +0000 (07:35 +0000)
The trace file will be dumped as part of the hang report into
$HOME/radv_dumps_<pid>/trace.log if a GPU hang is detected.

The old and famous RADV_TRACE_FILE envvar is now deprecated.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7233>

docs/envvars.rst
src/amd/vulkan/radv_debug.c
src/amd/vulkan/radv_debug.h
src/amd/vulkan/radv_device.c

index b325746..672b565 100644 (file)
@@ -554,6 +554,9 @@ RADV driver environment variables
    ``forcecompress``
       Enables DCC,FMASK,CMASK,HTILE in situations where the driver supports it
       but normally does not deem it beneficial.
+   ``hang``
+      enable GPU hangs detection and dump a report to $HOME/radv_dumps_<pid>
+      if a GPU hang is detected
    ``info``
       show GPU-related information
    ``metashaders``
@@ -624,8 +627,6 @@ RADV driver environment variables
 
 ``RADV_TEX_ANISO``
    force anisotropy filter (up to 16)
-``RADV_TRACE_FILE``
-   generate cmdbuffer tracefiles when a GPU hang is detected
 ``ACO_DEBUG``
    a comma-separated list of named flags, which do various things:
 
index c5ab538..7f80381 100644 (file)
@@ -83,19 +83,10 @@ radv_init_trace(struct radv_device *device)
 }
 
 static void
-radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs)
+radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f)
 {
-       const char *filename = getenv("RADV_TRACE_FILE");
-       FILE *f = fopen(filename, "w");
-
-       if (!f) {
-               fprintf(stderr, "Failed to write trace dump to %s\n", filename);
-               return;
-       }
-
        fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
        device->ws->cs_dump(cs, f, (const int*)device->trace_id_ptr, 2);
-       fclose(f);
 }
 
 static void
@@ -625,8 +616,6 @@ radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
 
        fprintf(stderr, "radv: GPU hang detected...\n");
 
-       radv_dump_trace(queue->device, cs);
-
        /* Create a directory into $HOME/radv_dumps_<pid> to save various
         * debugging info about that GPU hang.
         */
@@ -638,6 +627,14 @@ radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
                abort();
        }
 
+       /* Dump trace file. */
+       snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "trace.log");
+       f = fopen(dump_path, "w+");
+       if (f) {
+               radv_dump_trace(queue->device, cs, f);
+               fclose(f);
+       }
+
        /* Dump pipeline state. */
        snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log");
        f = fopen(dump_path, "w+");
index 1037164..0985be3 100644 (file)
@@ -57,6 +57,7 @@ enum {
        RADV_DEBUG_DISCARD_TO_DEMOTE = 1 << 26,
        RADV_DEBUG_LLVM              = 1 << 27,
        RADV_DEBUG_FORCE_COMPRESS    = 1 << 28,
+       RADV_DEBUG_HANG              = 1 << 29,
 };
 
 enum {
index c29006f..ce58e99 100644 (file)
@@ -530,6 +530,7 @@ static const struct debug_control radv_debug_options[] = {
        {"discardtodemote", RADV_DEBUG_DISCARD_TO_DEMOTE},
        {"llvm", RADV_DEBUG_LLVM},
        {"forcecompress", RADV_DEBUG_FORCE_COMPRESS},
+       {"hang", RADV_DEBUG_HANG},
        {NULL, 0}
 };
 
@@ -2794,19 +2795,25 @@ VkResult radv_CreateDevice(
                device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
 
        if (getenv("RADV_TRACE_FILE")) {
-               const char *filename = getenv("RADV_TRACE_FILE");
+               fprintf(stderr, "***********************************************************************************\n");
+               fprintf(stderr, "* WARNING: RADV_TRACE_FILE=<file> is deprecated and replaced by RADV_DEBUG=hang *\n");
+               fprintf(stderr, "***********************************************************************************\n");
+               abort();
+       }
 
+       if (device->instance->debug_flags & RADV_DEBUG_HANG) {
+               /* Enable GPU hangs detection and dump logs if a GPU hang is
+                * detected.
+                */
                keep_shader_info = true;
 
                if (!radv_init_trace(device))
                        goto fail;
 
                fprintf(stderr, "*****************************************************************************\n");
-               fprintf(stderr, "* WARNING: RADV_TRACE_FILE is costly and should only be used for debugging! *\n");
+               fprintf(stderr, "* WARNING: RADV_DEBUG=hang is costly and should only be used for debugging! *\n");
                fprintf(stderr, "*****************************************************************************\n");
 
-               fprintf(stderr, "Trace file will be dumped to %s\n", filename);
-
                /* Wait for idle after every draw/dispatch to identify the
                 * first bad call.
                 */