freedreno: Add 'replay' tool which allows to replay cmdstreams
authorDanylo Piliaiev <dpiliaiev@igalia.com>
Wed, 19 Oct 2022 16:47:36 +0000 (18:47 +0200)
committerMarge Bot <emma+marge@anholt.net>
Tue, 25 Oct 2022 20:26:49 +0000 (20:26 +0000)
Replay command stream obtained from:
- /sys/kernel/debug/dri/0/rd
- /sys/kernel/debug/dri/0/hangrd
!!! Command stream capture should be done with ALL buffers:
- echo 1 > /sys/module/msm/parameters/rd_full

Requires kernel with MSM_INFO_SET_IOVA support.

This tool is intended for reproduction of various GPU issues:
- GPU hangs, note that command stream obtained from hangrd
  may not reproduce (rarely) the hang, since the buffers are
  snapshotted at the moment of the hang and not at the start
  of the hanging command stream.
- TODO: Misrendering, would require marking framebuffer images
  at each renderpass in order to fetch and decode them.

Code from Freedreno/Turnip is not re-used here since the relevant
pieces may introduce additional allocations which cannot be allowed
during the replay.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19171>

src/freedreno/decode/meson.build
src/freedreno/decode/replay.c [new file with mode: 0644]

index 9590ee7..fd8e381 100644 (file)
@@ -69,6 +69,29 @@ if dep_libarchive.found()
     ],
     build_by_default: false,
   )
+
+  replay = executable(
+    'replay',
+    [
+      'replay.c'
+    ],
+    include_directories: [
+      inc_freedreno,
+      inc_include,
+      inc_src,
+    ],
+    c_args : [no_override_init_args],
+    gnu_symbol_visibility: 'hidden',
+    dependencies: [
+      dep_libdrm,
+    ],
+    link_with: [
+      libfreedreno_cffdec,
+      libfreedreno_io,
+    ],
+    build_by_default: with_tools.contains('freedreno'),
+    install: install_fd_decode_tools,
+  )
 endif
 
 if dep_lua.found() and dep_libarchive.found()
diff --git a/src/freedreno/decode/replay.c b/src/freedreno/decode/replay.c
new file mode 100644 (file)
index 0000000..5334235
--- /dev/null
@@ -0,0 +1,570 @@
+/*
+ * Copyright © 2022 Igalia S.L.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <assert.h>
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <xf86drm.h>
+#include "drm-uapi/msm_drm.h"
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "util/os_time.h"
+#include "util/rb_tree.h"
+#include "util/u_vector.h"
+#include "buffers.h"
+#include "cffdec.h"
+#include "io.h"
+#include "redump.h"
+
+/**
+ * Replay command stream obtained from:
+ * - /sys/kernel/debug/dri/0/rd
+ * - /sys/kernel/debug/dri/0/hangrd
+ * !!! Command stream capture should be done with ALL buffers:
+ * - echo 1 > /sys/module/msm/parameters/rd_full
+ *
+ * Requires kernel with MSM_INFO_SET_IOVA support.
+ *
+ * This tool is intended for reproduction of various GPU issues:
+ * - GPU hangs, note that command stream obtained from hangrd
+ *   may not reproduce (rarely) the hang, since the buffers are
+ *   snapshotted at the moment of the hang and not at the start
+ *   of the hanging command stream.
+ * - TODO: Misrendering, would require marking framebuffer images
+ *   at each renderpass in order to fetch and decode them.
+ *
+ * Code from Freedreno/Turnip is not re-used here since the relevant
+ * pieces may introduce additional allocations which cannot be allowed
+ * during the replay.
+ */
+
+static const char *exename = NULL;
+
+static int handle_file(const char *filename);
+
+static void
+print_usage(const char *name)
+{
+   /* clang-format off */
+   fprintf(stderr, "Usage:\n\n"
+           "\t%s [OPTSIONS]... FILE...\n\n"
+           "Options:\n"
+           "\t-e, --exe=NAME   - only use cmdstream from named process\n"
+           "\t-h, --help       - show this message\n"
+           , name);
+   /* clang-format on */
+   exit(2);
+}
+
+/* clang-format off */
+static const struct option opts[] = {
+      /* Long opts that simply set a flag (no corresponding short alias: */
+
+      /* Long opts with short alias: */
+      { "exe",       required_argument, 0, 'e' },
+      { "help",      no_argument,       0, 'h' },
+};
+/* clang-format on */
+
+int
+main(int argc, char **argv)
+{
+   int ret = -1;
+   int c;
+
+   while ((c = getopt_long(argc, argv, "e:h", opts, NULL)) != -1) {
+      switch (c) {
+      case 0:
+         /* option that set a flag, nothing to do */
+         break;
+      case 'e':
+         exename = optarg;
+         break;
+      case 'h':
+      default:
+         print_usage(argv[0]);
+      }
+   }
+
+   while (optind < argc) {
+      ret = handle_file(argv[optind]);
+      if (ret) {
+         fprintf(stderr, "error reading: %s\n", argv[optind]);
+         fprintf(stderr, "continuing..\n");
+      }
+      optind++;
+   }
+
+   if (ret)
+      print_usage(argv[0]);
+
+   return ret;
+}
+
+static void
+parse_addr(uint32_t *buf, int sz, unsigned int *len, uint64_t *gpuaddr)
+{
+   *gpuaddr = buf[0];
+   *len = buf[1];
+   if (sz > 8)
+      *gpuaddr |= ((uint64_t)(buf[2])) << 32;
+}
+
+struct buffer {
+   struct rb_node node;
+
+   uint32_t gem_handle;
+   uint64_t size;
+   uint64_t iova;
+   void *map;
+
+   bool used;
+   uint32_t flags;
+};
+
+struct cmdstream {
+   uint64_t iova;
+   uint64_t size;
+};
+
+struct device {
+   int fd;
+
+   struct rb_tree buffers;
+   struct u_vector cmdstreams;
+};
+
+void
+buffer_mem_free(struct device *dev, struct buffer *buf);
+
+static int
+rb_buffer_insert_cmp(const struct rb_node *n1, const struct rb_node *n2)
+{
+   const struct buffer *buf1 = (const struct buffer *)n1;
+   const struct buffer *buf2 = (const struct buffer *)n2;
+   /* Note that gpuaddr comparisions can overflow an int: */
+   if (buf1->iova > buf2->iova)
+      return 1;
+   else if (buf1->iova < buf2->iova)
+      return -1;
+   return 0;
+}
+
+static int
+rb_buffer_search_cmp(const struct rb_node *node, const void *addrptr)
+{
+   const struct buffer *buf = (const struct buffer *)node;
+   uint64_t iova = *(uint64_t *)addrptr;
+   if (buf->iova + buf->size <= iova)
+      return -1;
+   else if (buf->iova > iova)
+      return 1;
+   return 0;
+}
+
+static struct device *
+device_create()
+{
+   struct device *dev = calloc(sizeof(struct device), 1);
+
+   dev->fd = drmOpenWithType("msm", NULL, DRM_NODE_RENDER);
+   if (dev->fd < 0) {
+      errx(1, "Cannot open MSM fd!");
+   }
+
+   struct drm_msm_param req = {
+      .pipe = MSM_PIPE_3D0,
+      .param = MSM_PARAM_VA_START,
+   };
+
+   int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GET_PARAM, &req,
+                                 sizeof(req));
+
+   if (ret) {
+      err(1, "MSM_INFO_SET_IOVA is unsupported");
+   }
+
+   rb_tree_init(&dev->buffers);
+   u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
+
+   return dev;
+}
+
+static struct buffer *
+device_get_buffer(struct device *dev, uint64_t iova)
+{
+   if (iova == 0)
+      return NULL;
+   return (struct buffer *)rb_tree_search(&dev->buffers, &iova,
+                                          rb_buffer_search_cmp);
+}
+
+static void
+device_mark_buffers(struct device *dev)
+{
+   rb_tree_foreach_safe (struct buffer, buf, &dev->buffers, node) {
+      buf->used = false;
+   }
+}
+
+static void
+device_free_unused_buffers(struct device *dev)
+{
+   rb_tree_foreach_safe (struct buffer, buf, &dev->buffers, node) {
+      if (!buf->used) {
+         buffer_mem_free(dev, buf);
+         rb_tree_remove(&dev->buffers, &buf->node);
+         free(buf);
+      }
+   }
+}
+
+static inline void
+get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns)
+{
+   struct timespec t;
+   clock_gettime(CLOCK_MONOTONIC, &t);
+   tv->tv_sec = t.tv_sec + ns / 1000000000;
+   tv->tv_nsec = t.tv_nsec + ns % 1000000000;
+}
+
+static void
+device_submit_cmdstreams(struct device *dev)
+{
+   device_free_unused_buffers(dev);
+   device_mark_buffers(dev);
+
+   if (!u_vector_length(&dev->cmdstreams))
+      return;
+
+   struct drm_msm_gem_submit_cmd cmds[u_vector_length(&dev->cmdstreams)];
+
+   uint32_t idx = 0;
+   struct cmdstream *cmd;
+   u_vector_foreach(cmd, &dev->cmdstreams) {
+      struct buffer *cmdstream_buf = device_get_buffer(dev, cmd->iova);
+      cmdstream_buf->flags = MSM_SUBMIT_BO_DUMP;
+
+      uint32_t bo_idx = 0;
+      rb_tree_foreach (struct buffer, buf, &dev->buffers, node) {
+         if (buf == cmdstream_buf)
+            break;
+
+         bo_idx++;
+      }
+
+      struct drm_msm_gem_submit_cmd *submit_cmd = &cmds[idx];
+      submit_cmd->type = MSM_SUBMIT_CMD_BUF;
+      submit_cmd->submit_idx = bo_idx;
+      submit_cmd->submit_offset = cmd->iova - cmdstream_buf->iova;
+      submit_cmd->size = cmd->size;
+      submit_cmd->pad = 0;
+      submit_cmd->nr_relocs = 0;
+      submit_cmd->relocs = 0;
+
+      idx++;
+   }
+
+   uint32_t bo_count = 0;
+   rb_tree_foreach (struct buffer, buf, &dev->buffers, node) {
+      if (buf)
+         bo_count++;
+   }
+
+   struct drm_msm_gem_submit_bo *bo_list =
+      calloc(sizeof(struct drm_msm_gem_submit_bo), bo_count);
+
+   uint32_t bo_idx = 0;
+   rb_tree_foreach (struct buffer, buf, &dev->buffers, node) {
+      struct drm_msm_gem_submit_bo *submit_bo = &bo_list[bo_idx++];
+      submit_bo->handle = buf->gem_handle;
+      submit_bo->flags = buf->flags | MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE;
+      submit_bo->presumed = buf->iova;
+
+      buf->flags = 0;
+   }
+
+   struct drm_msm_gem_submit submit_req = {
+      .flags = MSM_PIPE_3D0,
+      .queueid = 0,
+      .bos = (uint64_t)(uintptr_t)bo_list,
+      .nr_bos = bo_count,
+      .cmds = (uint64_t)(uintptr_t)cmds,
+      .nr_cmds = u_vector_length(&dev->cmdstreams),
+      .in_syncobjs = 0,
+      .out_syncobjs = 0,
+      .nr_in_syncobjs = 0,
+      .nr_out_syncobjs = 0,
+      .syncobj_stride = sizeof(struct drm_msm_gem_submit_syncobj),
+   };
+
+   int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GEM_SUBMIT, &submit_req,
+                                 sizeof(submit_req));
+
+   if (ret) {
+      err(1, "DRM_MSM_GEM_SUBMIT failure %d", ret);
+   }
+
+   /* Wait for submission to complete in order to be sure that
+    * freeing buffers would free their VMAs in the kernel.
+    * Makes sure that new allocations won't clash with old ones.
+    */
+   struct drm_msm_wait_fence wait_req = {
+      .fence = submit_req.fence,
+      .queueid = 0,
+   };
+   get_abs_timeout(&wait_req.timeout, 1000000000);
+
+   ret =
+      drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &wait_req, sizeof(wait_req));
+   if (ret && (ret != -ETIMEDOUT)) {
+      err(1, "DRM_MSM_WAIT_FENCE failure %d", ret);
+   }
+
+   u_vector_finish(&dev->cmdstreams);
+   u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
+}
+
+static void
+buffer_mem_alloc(struct device *dev, struct buffer *buf)
+{
+   {
+      struct drm_msm_gem_new req = {.size = buf->size, .flags = MSM_BO_WC};
+
+      int ret =
+         drmCommandWriteRead(dev->fd, DRM_MSM_GEM_NEW, &req, sizeof(req));
+      if (ret) {
+         err(1, "DRM_MSM_GEM_NEW failure %d", ret);
+      }
+
+      buf->gem_handle = req.handle;
+   }
+
+   {
+      struct drm_msm_gem_info req = {
+         .handle = buf->gem_handle,
+         .info = MSM_INFO_SET_IOVA,
+         .value = buf->iova,
+      };
+
+      int ret =
+         drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
+
+      if (ret) {
+         err(1, "MSM_INFO_SET_IOVA failure %d", ret);
+      }
+   }
+
+   {
+      struct drm_msm_gem_info req = {
+         .handle = buf->gem_handle,
+         .info = MSM_INFO_GET_OFFSET,
+      };
+
+      int ret =
+         drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
+      if (ret) {
+         err(1, "MSM_INFO_GET_OFFSET failure %d", ret);
+      }
+
+      void *map = mmap(0, buf->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+                       dev->fd, req.value);
+      if (map == MAP_FAILED) {
+         err(1, "mmap failure");
+      }
+
+      buf->map = map;
+   }
+}
+
+void
+buffer_mem_free(struct device *dev, struct buffer *buf)
+{
+   munmap(buf->map, buf->size);
+
+   struct drm_gem_close req = {
+      .handle = buf->gem_handle,
+   };
+   drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
+}
+
+static void
+upload_buffer(struct device *dev, uint64_t iova, unsigned int size,
+              void *hostptr)
+{
+   struct buffer *buf = device_get_buffer(dev, iova);
+
+   if (!buf) {
+      buf = calloc(sizeof(struct buffer), 1);
+      buf->iova = iova;
+      buf->size = size;
+      rb_tree_insert(&dev->buffers, &buf->node, rb_buffer_insert_cmp);
+
+      buffer_mem_alloc(dev, buf);
+   } else if (buf->size != size) {
+      buffer_mem_free(dev, buf);
+      buf->size = size;
+      buffer_mem_alloc(dev, buf);
+   }
+
+   memcpy(buf->map, hostptr, size);
+
+   buf->used = true;
+}
+
+static int
+handle_file(const char *filename)
+{
+   enum rd_sect_type type = RD_NONE;
+   void *buf = NULL;
+   struct io *io;
+   int submit = 0;
+   int sz, ret = 0;
+   bool skip = false;
+   bool need_submit = false;
+
+   printf("Reading %s...\n", filename);
+
+   if (!strcmp(filename, "-"))
+      io = io_openfd(0);
+   else
+      io = io_open(filename);
+
+   if (!io) {
+      fprintf(stderr, "could not open: %s\n", filename);
+      return -1;
+   }
+
+   struct device *dev = device_create();
+
+   struct {
+      unsigned int len;
+      uint64_t gpuaddr;
+   } gpuaddr = {0};
+
+   while (true) {
+      uint32_t arr[2];
+
+      ret = io_readn(io, arr, 8);
+      if (ret <= 0)
+         goto end;
+
+      while ((arr[0] == 0xffffffff) && (arr[1] == 0xffffffff)) {
+         ret = io_readn(io, arr, 8);
+         if (ret <= 0)
+            goto end;
+      }
+
+      type = arr[0];
+      sz = arr[1];
+
+      if (sz < 0) {
+         ret = -1;
+         goto end;
+      }
+
+      free(buf);
+
+      buf = malloc(sz + 1);
+      ((char *)buf)[sz] = '\0';
+      ret = io_readn(io, buf, sz);
+      if (ret < 0)
+         goto end;
+
+      switch (type) {
+      case RD_TEST:
+      case RD_VERT_SHADER:
+      case RD_FRAG_SHADER:
+         /* no-op */
+         break;
+      case RD_CMD:
+         skip = false;
+         if (exename) {
+            skip |= (strstr(buf, exename) != buf);
+         } else {
+            skip |= (strstr(buf, "fdperf") == buf);
+            skip |= (strstr(buf, "chrome") == buf);
+            skip |= (strstr(buf, "surfaceflinger") == buf);
+            skip |= ((char *)buf)[0] == 'X';
+         }
+         break;
+
+      case RD_GPUADDR:
+         if (need_submit) {
+            need_submit = false;
+            device_submit_cmdstreams(dev);
+         }
+
+         parse_addr(buf, sz, &gpuaddr.len, &gpuaddr.gpuaddr);
+         /* no-op */
+         break;
+      case RD_BUFFER_CONTENTS:
+         upload_buffer(dev, gpuaddr.gpuaddr, gpuaddr.len, buf);
+         buf = NULL;
+         break;
+      case RD_CMDSTREAM_ADDR: {
+         unsigned int sizedwords;
+         uint64_t gpuaddr;
+         parse_addr(buf, sz, &sizedwords, &gpuaddr);
+         printf("cmdstream %d: %d dwords\n", submit, sizedwords);
+
+         if (!skip) {
+            struct cmdstream *cmd = u_vector_add(&dev->cmdstreams);
+            cmd->iova = gpuaddr;
+            cmd->size = sizedwords * sizeof(uint32_t);
+            need_submit = true;
+         }
+
+         submit++;
+         break;
+      }
+      case RD_GPU_ID: {
+         uint32_t gpu_id = *((unsigned int *)buf);
+         if (!gpu_id)
+            break;
+         printf("gpuid: %d\n", gpu_id);
+         break;
+      }
+      case RD_CHIP_ID: {
+         uint64_t chip_id = *((uint64_t *)buf);
+         uint32_t gpu_id = 100 * ((chip_id >> 24) & 0xff) +
+                           10 * ((chip_id >> 16) & 0xff) +
+                           ((chip_id >> 8) & 0xff);
+         printf("gpuid: %d\n", gpu_id);
+         break;
+      }
+      default:
+         break;
+      }
+   }
+
+end:
+   if (need_submit)
+      device_submit_cmdstreams(dev);
+
+   close(dev->fd);
+
+   io_close(io);
+   fflush(stdout);
+
+   if (ret < 0) {
+      printf("corrupt file\n");
+   }
+   return 0;
+}