From 73d0286eb56931b014c127375e3da14be916eea1 Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Wed, 7 Jun 2023 16:53:28 +0200 Subject: [PATCH] freedreno/replay: Add "gpu_print" function for command streams Allows to dump GPU memory, e.g. gpu_print(&ctx, cs, 0x4000086080, 4); would print 4 dowrds from 0x4000086080: CP Log [0]: 20002000 47340000 20012001 47340001 Signed-off-by: Danylo Piliaiev Part-of: --- src/freedreno/decode/rdcompiler-utils.h | 86 ++++++++++++++++++++++++++++----- src/freedreno/decode/redump.h | 1 + src/freedreno/decode/replay.c | 54 +++++++++++++++++++++ 3 files changed, 129 insertions(+), 12 deletions(-) diff --git a/src/freedreno/decode/rdcompiler-utils.h b/src/freedreno/decode/rdcompiler-utils.h index 4d583d1..d52d734 100644 --- a/src/freedreno/decode/rdcompiler-utils.h +++ b/src/freedreno/decode/rdcompiler-utils.h @@ -55,6 +55,7 @@ struct replay_context { struct cmdstream *shader_cs; struct cmdstream *shader_log; + struct cmdstream *cp_log; struct list_head cs_list; @@ -126,18 +127,24 @@ cs_alloc(struct replay_context *ctx, uint32_t size) } static void -rd_write_cs_buffer(FILE *out, struct cmdstream *cs) +rd_write_gpu_addr_section(FILE *out, struct cmdstream *cs, enum rd_sect_type section) { - if (cs->cur == 0) - return; - const uint32_t packet[] = {(uint32_t)cs->iova, (uint32_t)(cs->cur * sizeof(uint32_t)), (uint32_t)(cs->iova >> 32)}; - struct rd_section section_address = {.type = RD_GPUADDR, + struct rd_section section_address = {.type = section, .size = sizeof(packet)}; fwrite(§ion_address, sizeof(section_address), 1, out); fwrite(packet, sizeof(packet), 1, out); +} + +static void +rd_write_cs_buffer(FILE *out, struct cmdstream *cs) +{ + if (cs->cur == 0) + return; + + rd_write_gpu_addr_section(out, cs, RD_GPUADDR); struct rd_section section_contents = {.type = RD_BUFFER_CONTENTS, .size = uint32_t(cs->cur * sizeof(uint32_t))}; @@ -230,6 +237,11 @@ replay_context_init(struct replay_context *ctx, struct fd_dev_id *dev_id, ctx->shader_log->mem[1] = ctx->shader_log->iova >> 32; ctx->shader_log->cur = ctx->shader_log->total_size; + ctx->cp_log = cs_alloc(ctx, 8 * 1024 * 1024); + ((uint64_t *)ctx->cp_log->mem)[0] = ctx->cp_log->iova + 2 * sizeof(uint64_t); + ((uint64_t *)ctx->cp_log->mem)[1] = sizeof(uint64_t); + ctx->cp_log->cur = ctx->cp_log->total_size; + struct ir3_compiler_options options{}; ctx->compiler = ir3_compiler_create(NULL, dev_id, &options); @@ -250,13 +262,8 @@ replay_context_finish(struct replay_context *ctx) fwrite(§ion_gpu_id, sizeof(section_gpu_id), 1, out); fwrite(&gpu_id, sizeof(uint32_t), 1, out); - const uint32_t packet[] = {(uint32_t)ctx->shader_log->iova, - (uint32_t)(ctx->shader_log->total_size), - (uint32_t)(ctx->shader_log->iova >> 32)}; - struct rd_section section_shader_log = {.type = RD_SHADER_LOG_BUFFER, - .size = sizeof(packet)}; - fwrite(§ion_shader_log, sizeof(section_shader_log), 1, out); - fwrite(packet, sizeof(packet), 1, out); + rd_write_gpu_addr_section(out, ctx->shader_log, RD_SHADER_LOG_BUFFER); + rd_write_gpu_addr_section(out, ctx->cp_log, RD_CP_LOG_BUFFER); list_for_each_entry (struct cmdstream, cs, &ctx->cs_list, link) { rd_write_cs_buffer(out, cs); @@ -317,3 +324,58 @@ emit_shader_iova(struct replay_context *ctx, struct cmdstream *cs, uint64_t id) pkt7(prev_cs, CP_INDIRECT_BUFFER, 3); \ pkt_qw(prev_cs, cs->iova); \ pkt(prev_cs, ibcs_size); + +static void +gpu_print(struct replay_context *ctx, struct cmdstream *_cs, uint64_t iova, + uint32_t dwords) +{ + uint64_t header_iova, body_iova; + struct cmdstream *prev_cs = _cs; + struct cmdstream *cs = cs_alloc(ctx, 4096); + /* Commands that are being modified should be in a separate cmdstream, + * otherwise they would be prefetched and writes would not be visible. + */ + { + /* Write size into entry's header */ + pkt7(cs, CP_MEM_WRITE, 4); + header_iova = cs_get_cur_iova(cs); + pkt_qw(cs, 0xdeadbeef); + uint64_t size_iova = cs_get_cur_iova(cs); + pkt(cs, dwords * 4); + pkt(cs, 0); + + /* Copy the data into entry's body */ + pkt7(cs, CP_MEMCPY, 5); + pkt(cs, dwords); + pkt_qw(cs, iova); + body_iova = cs_get_cur_iova(cs); + pkt_qw(cs, 0xdeadbeef); + + /* iova = iova + body_size + header_size */ + pkt7(cs, CP_MEM_TO_MEM, 9); + pkt(cs, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_WAIT_FOR_MEM_WRITES); + pkt_qw(cs, ctx->cp_log->iova); + pkt_qw(cs, ctx->cp_log->iova); + pkt_qw(cs, size_iova); + pkt_qw(cs, ctx->cp_log->iova + sizeof(uint64_t)); + } + + { + struct cmdstream *cs = prev_cs; + pkt7(cs, CP_MEM_TO_MEM, 5); + pkt(cs, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_WAIT_FOR_MEM_WRITES); + pkt_qw(cs, header_iova); + pkt_qw(cs, ctx->cp_log->iova); + + pkt7(cs, CP_MEM_TO_MEM, 7); + pkt(cs, CP_MEM_TO_MEM_0_DOUBLE); + pkt_qw(cs, body_iova); + pkt_qw(cs, ctx->cp_log->iova); + pkt_qw(cs, ctx->cp_log->iova + sizeof(uint64_t)); + + pkt7(cs, CP_WAIT_MEM_WRITES, 0); + pkt7(cs, CP_WAIT_FOR_ME, 0); + } + + end_ib(); +} \ No newline at end of file diff --git a/src/freedreno/decode/redump.h b/src/freedreno/decode/redump.h index 47ca201..1325311 100644 --- a/src/freedreno/decode/redump.h +++ b/src/freedreno/decode/redump.h @@ -43,6 +43,7 @@ enum rd_sect_type { RD_GPU_ID, RD_CHIP_ID, RD_SHADER_LOG_BUFFER, /* Specifies buffer which has logs from shaders */ + RD_CP_LOG_BUFFER, /* Specifies buffer which has logs from CP */ }; /* RD_PARAM types: */ diff --git a/src/freedreno/decode/replay.c b/src/freedreno/decode/replay.c index 6c8afbd..0e05847 100644 --- a/src/freedreno/decode/replay.c +++ b/src/freedreno/decode/replay.c @@ -178,6 +178,7 @@ struct device { struct u_vector cmdstreams; uint64_t shader_log_iova; + uint64_t cp_log_iova; bool has_set_iova; @@ -277,6 +278,52 @@ device_print_shader_log(struct device *dev) } } +static void +device_print_cp_log(struct device *dev) +{ + struct cp_log { + uint64_t cur_iova; + uint64_t tmp; + uint64_t first_entry_size; + }; + + struct cp_log_entry { + uint64_t size; + uint32_t data[0]; + }; + + if (dev->cp_log_iova == 0) + return; + + struct buffer *buf = device_get_buffer(dev, dev->cp_log_iova); + if (!buf) + return; + + struct cp_log *log = buf->map + (dev->cp_log_iova - buf->iova); + if (log->first_entry_size == 0) + return; + + struct cp_log_entry *log_entry = + buf->map + offsetof(struct cp_log, first_entry_size); + uint32_t idx = 0; + while (log_entry->size != 0) { + printf("\nCP Log [%u]:\n", idx++); + uint32_t dwords = log_entry->size / 4; + + for (uint32_t i = 0; i < dwords; i++) { + if (i % 8 == 0) + printf("\t"); + printf("%08x ", log_entry->data[i]); + if (i % 8 == 7) + printf("\n"); + } + printf("\n"); + + log_entry = (void *)log_entry + log_entry->size + + offsetof(struct cp_log_entry, data); + } +} + #if !FD_REPLAY_KGSL static inline void get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns) @@ -472,6 +519,7 @@ device_submit_cmdstreams(struct device *dev) u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream)); device_print_shader_log(dev); + device_print_cp_log(dev); } static void @@ -664,6 +712,7 @@ device_submit_cmdstreams(struct device *dev) u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream)); device_print_shader_log(dev); + device_print_cp_log(dev); } static void @@ -779,6 +828,11 @@ override_cmdstream(struct device *dev, struct cmdstream *cs, parse_addr(ps.buf, ps.sz, &sizedwords, &dev->shader_log_iova); break; } + case RD_CP_LOG_BUFFER: { + unsigned int sizedwords; + parse_addr(ps.buf, ps.sz, &sizedwords, &dev->cp_log_iova); + break; + } default: break; } -- 2.7.4