radv: dump GPU hang report logs into $HOME/radv_dumps_<pid>
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 19 Oct 2020 15:56:14 +0000 (17:56 +0200)
committerMarge Bot <eric+marge@anholt.net>
Fri, 23 Oct 2020 07:35:00 +0000 (07:35 +0000)
This creates a directory and save various logs (dmesg, umr,
pipeline, gpu info, etc) instead of printing stuff to stdout/stderr.

This dumps the following files when a GPU hang is detected:
- dmesg.log
- gpu_info.lo
- options.log
- pipeline.log (shaders including SPIR-V if spirv-dis found)
- registers.log
- trace.log
- vm_fault (if a VM fault is detected)
- umr_ring.log (if UMR found)
- umr_waves.log (if UMR found)

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7233>

src/amd/vulkan/radv_debug.c

index fc9dc90..48727f4 100644 (file)
@@ -28,6 +28,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <sys/utsname.h>
+#include <sys/stat.h>
 
 #include "util/mesa-sha1.h"
 #include "sid.h"
@@ -44,6 +45,8 @@
 #define COLOR_YELLOW   "\033[1;33m"
 #define COLOR_CYAN     "\033[1;36m"
 
+#define RADV_DUMP_DIR "radv_dumps"
+
 /* Trace BO layout (offsets are 4 bytes):
  *
  * [0]: primary trace ID
@@ -605,8 +608,10 @@ void
 radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
 {
        struct radv_device *device = queue->device;
+       char dump_dir[256], dump_path[512];
        enum ring_type ring;
        uint64_t addr;
+       FILE *f;
 
        ring = radv_queue_family_to_ring(queue->queue_family_index);
 
@@ -618,25 +623,90 @@ radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
        if (!hang_occurred && !vm_fault_occurred)
                return;
 
-       radv_dump_trace(queue->device, cs);
+       fprintf(stderr, "radv: GPU hang detected...\n");
 
-       fprintf(stderr, "GPU hang report:\n\n");
-       radv_dump_device_name(device, stderr);
-       ac_print_gpu_info(&device->physical_device->rad_info, stdout);
+       radv_dump_trace(queue->device, cs);
 
-       radv_dump_enabled_options(device, stderr);
-       radv_dump_dmesg(stderr);
-       radv_dump_umr_ring(queue, stderr);
-       radv_dump_umr_waves(queue, stderr);
+       /* Create a directory into $HOME/radv_dumps_<pid> to save various
+        * debugging info about that GPU hang.
+        */
+       snprintf(dump_dir, sizeof(dump_dir), "%s/"RADV_DUMP_DIR"_%d",
+                debug_get_option("HOME", "."), getpid());
+       if (mkdir(dump_dir, 0774) && errno != EEXIST) {
+               fprintf(stderr, "radv: can't create directory '%s' (%i).\n",
+                       dump_dir, errno);
+               abort();
+       }
 
+       /* Dump VM fault info. */
        if (vm_fault_occurred) {
-               fprintf(stderr, "VM fault report.\n\n");
-               fprintf(stderr, "Failing VM page: 0x%08"PRIx64"\n\n", addr);
+               snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "vm_fault.log");
+               f = fopen(dump_path, "w+");
+               if (f) {
+                       fprintf(f, "VM fault report.\n\n");
+                       fprintf(f, "Failing VM page: 0x%08"PRIx64"\n\n", addr);
+                       fclose(f);
+               }
+       }
+
+       /* Dump enabled debug/perftest options. */
+       snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "options.log");
+       f = fopen(dump_path, "w+");
+       if (f) {
+               radv_dump_enabled_options(device, f);
+               fclose(f);
+       }
+
+       /* Dump GPU info. */
+       snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "gpu_info.log");
+       f = fopen(dump_path, "w+");
+       if (f) {
+               radv_dump_device_name(device, f);
+               ac_print_gpu_info(&device->physical_device->rad_info, f);
+               fclose(f);
        }
 
-       radv_dump_debug_registers(device, stderr);
-       radv_dump_queue_state(queue, stderr);
+       /* Dump dmesg. */
+       snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "dmesg.log");
+       f = fopen(dump_path, "w+");
+       if (f) {
+               radv_dump_dmesg(f);
+               fclose(f);
+       }
+
+       /* Dump UMR ring. */
+       snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_ring.log");
+       f = fopen(dump_path, "w+");
+       if (f) {
+               radv_dump_umr_ring(queue, f);
+               fclose(f);
+       }
+
+       /* Dump UMR waves. */
+       snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_waves.log");
+       f = fopen(dump_path, "w+");
+       if (f) {
+               radv_dump_umr_waves(queue, f);
+               fclose(f);
+       }
+
+       /* Dump debug registers. */
+       snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "registers.log");
+       f = fopen(dump_path, "w+");
+       if (f) {
+               radv_dump_debug_registers(device, f);
+               fclose(f);
+       }
+
+       /* Dump pipeline state. */
+       snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log");
+       f = fopen(dump_path, "w+");
+       if (f) {
+               radv_dump_queue_state(queue, f);
+               fclose(f);
+       }
 
+       fprintf(stderr, "radv: GPU hang report saved to '%s'!\n", dump_dir);
        abort();
 }