freedreno/replay: Add "gpu_print" function for command streams
authorDanylo Piliaiev <dpiliaiev@igalia.com>
Wed, 7 Jun 2023 14:53:28 +0000 (16:53 +0200)
committerMarge Bot <emma+marge@anholt.net>
Wed, 13 Sep 2023 22:20:13 +0000 (22:20 +0000)
Allows to dump GPU memory, e.g.
 gpu_print(&ctx, cs, 0x4000086080, 4);

would print 4 dowrds from 0x4000086080:
 CP Log [0]:
         20002000 47340000 20012001 47340001

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25005>

src/freedreno/decode/rdcompiler-utils.h
src/freedreno/decode/redump.h
src/freedreno/decode/replay.c

index 4d583d1..d52d734 100644 (file)
@@ -55,6 +55,7 @@ struct replay_context {
    struct cmdstream *shader_cs;
 
    struct cmdstream *shader_log;
+   struct cmdstream *cp_log;
 
    struct list_head cs_list;
 
@@ -126,18 +127,24 @@ cs_alloc(struct replay_context *ctx, uint32_t size)
 }
 
 static void
-rd_write_cs_buffer(FILE *out, struct cmdstream *cs)
+rd_write_gpu_addr_section(FILE *out, struct cmdstream *cs, enum rd_sect_type section)
 {
-   if (cs->cur == 0)
-      return;
-
    const uint32_t packet[] = {(uint32_t)cs->iova,
                               (uint32_t)(cs->cur * sizeof(uint32_t)),
                               (uint32_t)(cs->iova >> 32)};
-   struct rd_section section_address = {.type = RD_GPUADDR,
+   struct rd_section section_address = {.type = section,
                                         .size = sizeof(packet)};
    fwrite(&section_address, sizeof(section_address), 1, out);
    fwrite(packet, sizeof(packet), 1, out);
+}
+
+static void
+rd_write_cs_buffer(FILE *out, struct cmdstream *cs)
+{
+   if (cs->cur == 0)
+      return;
+
+   rd_write_gpu_addr_section(out, cs, RD_GPUADDR);
 
    struct rd_section section_contents = {.type = RD_BUFFER_CONTENTS,
                                          .size = uint32_t(cs->cur * sizeof(uint32_t))};
@@ -230,6 +237,11 @@ replay_context_init(struct replay_context *ctx, struct fd_dev_id *dev_id,
    ctx->shader_log->mem[1] = ctx->shader_log->iova >> 32;
    ctx->shader_log->cur = ctx->shader_log->total_size;
 
+   ctx->cp_log = cs_alloc(ctx, 8 * 1024 * 1024);
+   ((uint64_t *)ctx->cp_log->mem)[0] = ctx->cp_log->iova + 2 * sizeof(uint64_t);
+   ((uint64_t *)ctx->cp_log->mem)[1] = sizeof(uint64_t);
+   ctx->cp_log->cur = ctx->cp_log->total_size;
+
    struct ir3_compiler_options options{};
    ctx->compiler =
       ir3_compiler_create(NULL, dev_id, &options);
@@ -250,13 +262,8 @@ replay_context_finish(struct replay_context *ctx)
    fwrite(&section_gpu_id, sizeof(section_gpu_id), 1, out);
    fwrite(&gpu_id, sizeof(uint32_t), 1, out);
 
-   const uint32_t packet[] = {(uint32_t)ctx->shader_log->iova,
-                              (uint32_t)(ctx->shader_log->total_size),
-                              (uint32_t)(ctx->shader_log->iova >> 32)};
-   struct rd_section section_shader_log = {.type = RD_SHADER_LOG_BUFFER,
-                                           .size = sizeof(packet)};
-   fwrite(&section_shader_log, sizeof(section_shader_log), 1, out);
-   fwrite(packet, sizeof(packet), 1, out);
+   rd_write_gpu_addr_section(out, ctx->shader_log, RD_SHADER_LOG_BUFFER);
+   rd_write_gpu_addr_section(out, ctx->cp_log, RD_CP_LOG_BUFFER);
 
    list_for_each_entry (struct cmdstream, cs, &ctx->cs_list, link) {
       rd_write_cs_buffer(out, cs);
@@ -317,3 +324,58 @@ emit_shader_iova(struct replay_context *ctx, struct cmdstream *cs, uint64_t id)
    pkt7(prev_cs, CP_INDIRECT_BUFFER, 3);                                       \
    pkt_qw(prev_cs, cs->iova);                                                  \
    pkt(prev_cs, ibcs_size);
+
+static void
+gpu_print(struct replay_context *ctx, struct cmdstream *_cs, uint64_t iova,
+          uint32_t dwords)
+{
+   uint64_t header_iova, body_iova;
+   struct cmdstream *prev_cs = _cs;
+   struct cmdstream *cs = cs_alloc(ctx, 4096);
+   /* Commands that are being modified should be in a separate cmdstream,
+    * otherwise they would be prefetched and writes would not be visible.
+    */
+   {
+      /* Write size into entry's header */
+      pkt7(cs, CP_MEM_WRITE, 4);
+      header_iova = cs_get_cur_iova(cs);
+      pkt_qw(cs, 0xdeadbeef);
+      uint64_t size_iova = cs_get_cur_iova(cs);
+      pkt(cs, dwords * 4);
+      pkt(cs, 0);
+
+      /* Copy the data into entry's body */
+      pkt7(cs, CP_MEMCPY, 5);
+      pkt(cs, dwords);
+      pkt_qw(cs, iova);
+      body_iova = cs_get_cur_iova(cs);
+      pkt_qw(cs, 0xdeadbeef);
+
+      /* iova = iova + body_size + header_size */
+      pkt7(cs, CP_MEM_TO_MEM, 9);
+      pkt(cs, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_WAIT_FOR_MEM_WRITES);
+      pkt_qw(cs, ctx->cp_log->iova);
+      pkt_qw(cs, ctx->cp_log->iova);
+      pkt_qw(cs, size_iova);
+      pkt_qw(cs, ctx->cp_log->iova + sizeof(uint64_t));
+   }
+
+   {
+      struct cmdstream *cs = prev_cs;
+      pkt7(cs, CP_MEM_TO_MEM, 5);
+      pkt(cs, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_WAIT_FOR_MEM_WRITES);
+      pkt_qw(cs, header_iova);
+      pkt_qw(cs, ctx->cp_log->iova);
+
+      pkt7(cs, CP_MEM_TO_MEM, 7);
+      pkt(cs, CP_MEM_TO_MEM_0_DOUBLE);
+      pkt_qw(cs, body_iova);
+      pkt_qw(cs, ctx->cp_log->iova);
+      pkt_qw(cs, ctx->cp_log->iova + sizeof(uint64_t));
+
+      pkt7(cs, CP_WAIT_MEM_WRITES, 0);
+      pkt7(cs, CP_WAIT_FOR_ME, 0);
+   }
+
+   end_ib();
+}
\ No newline at end of file
index 47ca201..1325311 100644 (file)
@@ -43,6 +43,7 @@ enum rd_sect_type {
    RD_GPU_ID,
    RD_CHIP_ID,
    RD_SHADER_LOG_BUFFER, /* Specifies buffer which has logs from shaders */
+   RD_CP_LOG_BUFFER, /* Specifies buffer which has logs from CP */
 };
 
 /* RD_PARAM types: */
index 6c8afbd..0e05847 100644 (file)
@@ -178,6 +178,7 @@ struct device {
    struct u_vector cmdstreams;
 
    uint64_t shader_log_iova;
+   uint64_t cp_log_iova;
 
    bool has_set_iova;
 
@@ -277,6 +278,52 @@ device_print_shader_log(struct device *dev)
    }
 }
 
+static void
+device_print_cp_log(struct device *dev)
+{
+   struct cp_log {
+      uint64_t cur_iova;
+      uint64_t tmp;
+      uint64_t first_entry_size;
+   };
+
+   struct cp_log_entry {
+      uint64_t size;
+      uint32_t data[0];
+   };
+
+   if (dev->cp_log_iova == 0)
+      return;
+
+   struct buffer *buf = device_get_buffer(dev, dev->cp_log_iova);
+   if (!buf)
+      return;
+
+   struct cp_log *log = buf->map + (dev->cp_log_iova - buf->iova);
+   if (log->first_entry_size == 0)
+      return;
+
+   struct cp_log_entry *log_entry =
+      buf->map + offsetof(struct cp_log, first_entry_size);
+   uint32_t idx = 0;
+   while (log_entry->size != 0) {
+      printf("\nCP Log [%u]:\n", idx++);
+      uint32_t dwords = log_entry->size / 4;
+
+      for (uint32_t i = 0; i < dwords; i++) {
+         if (i % 8 == 0)
+            printf("\t");
+         printf("%08x ", log_entry->data[i]);
+         if (i % 8 == 7)
+            printf("\n");
+      }
+      printf("\n");
+
+      log_entry = (void *)log_entry + log_entry->size +
+                  offsetof(struct cp_log_entry, data);
+   }
+}
+
 #if !FD_REPLAY_KGSL
 static inline void
 get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns)
@@ -472,6 +519,7 @@ device_submit_cmdstreams(struct device *dev)
    u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
 
    device_print_shader_log(dev);
+   device_print_cp_log(dev);
 }
 
 static void
@@ -664,6 +712,7 @@ device_submit_cmdstreams(struct device *dev)
    u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
 
    device_print_shader_log(dev);
+   device_print_cp_log(dev);
 }
 
 static void
@@ -779,6 +828,11 @@ override_cmdstream(struct device *dev, struct cmdstream *cs,
          parse_addr(ps.buf, ps.sz, &sizedwords, &dev->shader_log_iova);
          break;
       }
+      case RD_CP_LOG_BUFFER: {
+         unsigned int sizedwords;
+         parse_addr(ps.buf, ps.sz, &sizedwords, &dev->cp_log_iova);
+         break;
+      }
       default:
          break;
       }