From 047c0ba44b24cbc64630390819d075ad72dc96e2 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 4 Aug 2023 11:16:14 +0300 Subject: [PATCH] intel/decoder: implement accumulated prints MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Useful when you want to compare 2 batches with different ordering in instruction emission. Also when the driver tries to avoid re-emitting state. Signed-off-by: Lionel Landwerlin Reviewed-by: Tapani Pälli Part-of: --- src/intel/common/intel_batch_decoder.c | 140 ++++++++++++++++++++++++++------- src/intel/common/intel_decoder.h | 23 ++++-- 2 files changed, 127 insertions(+), 36 deletions(-) diff --git a/src/intel/common/intel_batch_decoder.c b/src/intel/common/intel_batch_decoder.c index 8ec3be2..7539c55 100644 --- a/src/intel/common/intel_batch_decoder.c +++ b/src/intel/common/intel_batch_decoder.c @@ -31,11 +31,12 @@ #include static const struct debug_control debug_control[] = { - { "color", INTEL_BATCH_DECODE_IN_COLOR }, - { "full", INTEL_BATCH_DECODE_FULL }, - { "offsets", INTEL_BATCH_DECODE_OFFSETS }, - { "floats", INTEL_BATCH_DECODE_FLOATS }, - { "surfaces", INTEL_BATCH_DECODE_SURFACES }, + { "color", INTEL_BATCH_DECODE_IN_COLOR }, + { "full", INTEL_BATCH_DECODE_FULL }, + { "offsets", INTEL_BATCH_DECODE_OFFSETS }, + { "floats", INTEL_BATCH_DECODE_FLOATS }, + { "surfaces", INTEL_BATCH_DECODE_SURFACES }, + { "accumulate", INTEL_BATCH_DECODE_ACCUMULATE }, { NULL, 0 } }; @@ -1479,6 +1480,82 @@ struct custom_decoder { { "CONSTANT_BUFFER", decode_gfx4_constant_buffer }, }; +static void +get_inst_color(const struct intel_batch_decode_ctx *ctx, + const struct intel_group *inst, + char **const out_color, + char **const out_reset_color) +{ + const char *inst_name = intel_group_get_name(inst); + if (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) { + *out_reset_color = NORMAL; + if (ctx->flags & INTEL_BATCH_DECODE_FULL) { + if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0 || + strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0) + *out_color = GREEN_HEADER; + else + *out_color = BLUE_HEADER; + } else { + *out_color = NORMAL; + } + } else { + *out_color = ""; + *out_reset_color = ""; + } +} + +struct inst_ptr { + struct intel_group *inst; + uint32_t *ptr; +}; + +static int +compare_inst_ptr(const void *v1, const void *v2) +{ + const struct inst_ptr *i1 = v1, *i2 = v2; + return strcmp(i1->inst->name, i2->inst->name); +} + +static void +intel_print_accumulated_instrs(struct intel_batch_decode_ctx *ctx) +{ + struct util_dynarray arr; + util_dynarray_init(&arr, NULL); + + hash_table_foreach(ctx->commands, entry) { + struct inst_ptr inst = { + .inst = (struct intel_group *)entry->key, + .ptr = entry->data, + }; + util_dynarray_append(&arr, struct inst_ptr, inst); + } + qsort(util_dynarray_begin(&arr), + util_dynarray_num_elements(&arr, struct inst_ptr), + sizeof(struct inst_ptr), + compare_inst_ptr); + + fprintf(ctx->fp, "----\n"); + util_dynarray_foreach(&arr, struct inst_ptr, i) { + char *begin_color; + char *end_color; + get_inst_color(ctx, i->inst, &begin_color, &end_color); + + uint64_t offset = 0; + fprintf(ctx->fp, "%s0x%08"PRIx64": 0x%08x: %-80s%s\n", + begin_color, offset, i->ptr[0], i->inst->name, end_color); + if (ctx->flags & INTEL_BATCH_DECODE_FULL) { + ctx_print_group(ctx, i->inst, 0, i->ptr); + for (int d = 0; d < ARRAY_SIZE(custom_decoders); d++) { + if (strcmp(i->inst->name, custom_decoders[d].cmd_name) == 0) { + custom_decoders[d].decode(ctx, i->ptr); + break; + } + } + } + } + util_dynarray_fini(&arr); +} + void intel_print_batch(struct intel_batch_decode_ctx *ctx, const uint32_t *batch, uint32_t batch_size, @@ -1525,40 +1602,43 @@ intel_print_batch(struct intel_batch_decode_ctx *ctx, continue; } - const char *color; - const char *inst_name = intel_group_get_name(inst); - if (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) { - reset_color = NORMAL; - if (ctx->flags & INTEL_BATCH_DECODE_FULL) { - if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0 || - strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0) - color = GREEN_HEADER; - else - color = BLUE_HEADER; + if (ctx->flags & INTEL_BATCH_DECODE_ACCUMULATE) { + struct hash_entry *entry = _mesa_hash_table_search(ctx->commands, inst); + if (entry != NULL) { + entry->data = (void *)p; } else { - color = NORMAL; + _mesa_hash_table_insert(ctx->commands, inst, (void *)p); + } + + if (!strcmp(inst->name, "3DPRIMITIVE") || + !strcmp(inst->name, "GPGPU_WALKER") || + !strcmp(inst->name, "3DSTATE_WM_HZ_OP") || + !strcmp(inst->name, "COMPUTE_WALKER")) { + intel_print_accumulated_instrs(ctx); } } else { - color = ""; - reset_color = ""; - } + char *begin_color; + char *end_color; + get_inst_color(ctx, inst, &begin_color, &end_color); - fprintf(ctx->fp, "%s0x%08"PRIx64"%s: 0x%08x: %-80s%s\n", color, offset, - ctx->acthd && offset == ctx->acthd ? " (ACTHD)" : "", p[0], - inst_name, reset_color); + fprintf(ctx->fp, "%s0x%08"PRIx64"%s: 0x%08x: %-80s%s\n", + begin_color, offset, + ctx->acthd && offset == ctx->acthd ? " (ACTHD)" : "", p[0], + inst->name, end_color); - if (ctx->flags & INTEL_BATCH_DECODE_FULL) { - ctx_print_group(ctx, inst, offset, p); + if (ctx->flags & INTEL_BATCH_DECODE_FULL) { + ctx_print_group(ctx, inst, offset, p); - for (int i = 0; i < ARRAY_SIZE(custom_decoders); i++) { - if (strcmp(inst_name, custom_decoders[i].cmd_name) == 0) { - custom_decoders[i].decode(ctx, p); - break; + for (int i = 0; i < ARRAY_SIZE(custom_decoders); i++) { + if (strcmp(inst->name, custom_decoders[i].cmd_name) == 0) { + custom_decoders[i].decode(ctx, p); + break; + } } } } - if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0) { + if (strcmp(inst->name, "MI_BATCH_BUFFER_START") == 0) { uint64_t next_batch_addr = 0; bool ppgtt = false; bool second_level = false; @@ -1603,7 +1683,7 @@ intel_print_batch(struct intel_batch_decode_ctx *ctx, break; } } - } else if (strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0) { + } else if (strcmp(inst->name, "MI_BATCH_BUFFER_END") == 0) { break; } } diff --git a/src/intel/common/intel_decoder.h b/src/intel/common/intel_decoder.h index 9025c8a..eb821a6 100644 --- a/src/intel/common/intel_decoder.h +++ b/src/intel/common/intel_decoder.h @@ -209,17 +209,28 @@ void intel_print_group(FILE *out, enum intel_batch_decode_flags { /** Print in color! */ - INTEL_BATCH_DECODE_IN_COLOR = (1 << 0), + INTEL_BATCH_DECODE_IN_COLOR = (1 << 0), /** Print everything, not just headers */ - INTEL_BATCH_DECODE_FULL = (1 << 1), + INTEL_BATCH_DECODE_FULL = (1 << 1), /** Print offsets along with the batch */ - INTEL_BATCH_DECODE_OFFSETS = (1 << 2), + INTEL_BATCH_DECODE_OFFSETS = (1 << 2), /** Guess when a value is a float and print it as such */ - INTEL_BATCH_DECODE_FLOATS = (1 << 3), + INTEL_BATCH_DECODE_FLOATS = (1 << 3), /** Print surface states */ - INTEL_BATCH_DECODE_SURFACES = (1 << 4), + INTEL_BATCH_DECODE_SURFACES = (1 << 4), /** Print sampler states */ - INTEL_BATCH_DECODE_SAMPLERS = (1 << 5), + INTEL_BATCH_DECODE_SAMPLERS = (1 << 5), + /** Print accumulated state + * + * Instead of printing instructions as we parse them, retain a pointer to + * each of the last instruction emitted and print it upon parsing one of + * the following instructions : + * - 3DPRIMITIVE + * - GPGPU_WALKER + * - 3DSTATE_WM_HZ_OP + * - COMPUTE_WALKER + */ + INTEL_BATCH_DECODE_ACCUMULATE = (1 << 6), }; #define INTEL_BATCH_DECODE_DEFAULT_FLAGS \ -- 2.7.4