From 0565c993f9eff9e91ac7d9aea53d4865c44795b8 Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Fri, 16 Jul 2021 15:01:20 +0300 Subject: [PATCH] u_trace: helpers for tracing tiling GPUs and re-usable VK cmdbuffers Re-usable command buffer could be resubmitted any number of times, but tracepoints are written once. u_trace_clone_append allows copying tracepoints and copying timestamps if GPU doesn't support writing timestamps into indirect address. The case of tiling GPUs is similar, command stream for draws is resubmitted for each tile. Signed-off-by: Danylo Piliaiev Reviewed-by: Rob Clark Part-of: --- src/util/perf/u_trace.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++++ src/util/perf/u_trace.h | 48 +++++++++++++++++++++++ 2 files changed, 149 insertions(+) diff --git a/src/util/perf/u_trace.c b/src/util/perf/u_trace.c index 4cc783e..c163e26 100644 --- a/src/util/perf/u_trace.c +++ b/src/util/perf/u_trace.c @@ -262,6 +262,9 @@ process_chunk(void *job, void *gdata, int thread_index) for (unsigned idx = 0; idx < chunk->num_traces; idx++) { const struct u_trace_event *evt = &chunk->traces[idx]; + if (!evt->tp) + continue; + uint64_t ns = utctx->read_timestamp(utctx, chunk->timestamps, idx, chunk->flush_data); int32_t delta; @@ -364,6 +367,104 @@ u_trace_fini(struct u_trace *ut) free_chunks(&ut->trace_chunks); } +bool +u_trace_has_points(struct u_trace *ut) +{ + return !list_is_empty(&ut->trace_chunks); +} + +struct u_trace_iterator +u_trace_begin_iterator(struct u_trace *ut) +{ + if (!ut->enabled) + return (struct u_trace_iterator) {NULL, NULL, 0}; + + struct u_trace_chunk *first_chunk = + list_first_entry(&ut->trace_chunks, struct u_trace_chunk, node); + + return (struct u_trace_iterator) { ut, first_chunk, 0}; +} + +struct u_trace_iterator +u_trace_end_iterator(struct u_trace *ut) +{ + if (!ut->enabled) + return (struct u_trace_iterator) {NULL, NULL, 0}; + + struct u_trace_chunk *last_chunk = + list_last_entry(&ut->trace_chunks, struct u_trace_chunk, node); + + return (struct u_trace_iterator) { ut, last_chunk, last_chunk->num_traces}; +} + +bool +u_trace_iterator_equal(struct u_trace_iterator a, + struct u_trace_iterator b) +{ + return a.ut == b.ut && + a.chunk == b.chunk && + a.event_idx == b.event_idx; +} + +void +u_trace_clone_append(struct u_trace_iterator begin_it, + struct u_trace_iterator end_it, + struct u_trace *into, + void *cmdstream, + u_trace_copy_ts_buffer copy_ts_buffer) +{ + struct u_trace_chunk *from_chunk = begin_it.chunk; + uint32_t from_idx = begin_it.event_idx; + + while (from_chunk != end_it.chunk || from_idx != end_it.event_idx) { + struct u_trace_chunk *to_chunk = get_chunk(into); + + unsigned to_copy = MIN2(TRACES_PER_CHUNK - to_chunk->num_traces, + from_chunk->num_traces - from_idx); + if (from_chunk == end_it.chunk) + to_copy = MIN2(to_copy, end_it.event_idx - from_idx); + + copy_ts_buffer(begin_it.ut->utctx, cmdstream, + from_chunk->timestamps, from_idx, + to_chunk->timestamps, to_chunk->num_traces, + to_copy); + + memcpy(&to_chunk->traces[to_chunk->num_traces], + &from_chunk->traces[from_idx], + to_copy * sizeof(struct u_trace_event)); + + to_chunk->num_traces += to_copy; + from_idx += to_copy; + + assert(from_idx <= from_chunk->num_traces); + if (from_idx == from_chunk->num_traces) { + if (from_chunk == end_it.chunk) + break; + + from_idx = 0; + from_chunk = LIST_ENTRY(struct u_trace_chunk, from_chunk->node.next, node); + } + } +} + +void +u_trace_disable_event_range(struct u_trace_iterator begin_it, + struct u_trace_iterator end_it) +{ + struct u_trace_chunk *current_chunk = begin_it.chunk; + uint32_t start_idx = begin_it.event_idx; + + while(current_chunk != end_it.chunk) { + memset(¤t_chunk->traces[start_idx], 0, + (current_chunk->num_traces - start_idx) * sizeof(struct u_trace_event)); + start_idx = 0; + current_chunk = LIST_ENTRY(struct u_trace_chunk, current_chunk->node.next, node); + } + + memset(¤t_chunk->traces[start_idx], 0, + (end_it.event_idx - start_idx) * sizeof(struct u_trace_event)); +} + /** * Append a trace event, returning pointer to buffer of tp->payload_sz * to be filled in with trace payload. Called by generated tracepoint diff --git a/src/util/perf/u_trace.h b/src/util/perf/u_trace.h index f67a359..177b2ed 100644 --- a/src/util/perf/u_trace.h +++ b/src/util/perf/u_trace.h @@ -204,6 +204,54 @@ void u_trace_context_process(struct u_trace_context *utctx, bool eof); void u_trace_init(struct u_trace *ut, struct u_trace_context *utctx); void u_trace_fini(struct u_trace *ut); +bool u_trace_has_points(struct u_trace *ut); + +struct u_trace_iterator +{ + struct u_trace *ut; + struct u_trace_chunk *chunk; + uint32_t event_idx; +}; + +struct u_trace_iterator +u_trace_begin_iterator(struct u_trace *ut); + +struct u_trace_iterator +u_trace_end_iterator(struct u_trace *ut); + +bool +u_trace_iterator_equal(struct u_trace_iterator a, + struct u_trace_iterator b); + +typedef void (*u_trace_copy_ts_buffer)(struct u_trace_context *utctx, + void *cmdstream, + void *ts_from, uint32_t from_offset, + void *ts_to, uint32_t to_offset, + uint32_t count); + +/** + * Clones tracepoints range into target u_trace. + * Provides callback for driver to copy timestamps on GPU from + * one buffer to another. + * + * The payload is shared and remains owned by the original u_trace + * if tracepoints are being copied between different u_trace! + * + * It allows: + * - Tracing re-usable command buffer in Vulkan, by copying tracepoints + * each time it is submitted. + * - Per-tile tracing for tiling GPUs, by copying a range of tracepoints + * corresponding to a tile. + */ +void u_trace_clone_append(struct u_trace_iterator begin_it, + struct u_trace_iterator end_it, + struct u_trace *into, + void *cmdstream, + u_trace_copy_ts_buffer copy_ts_buffer); + +void u_trace_disable_event_range(struct u_trace_iterator begin_it, + struct u_trace_iterator end_it); + /** * Flush traces to the parent trace-context. At this point, the expectation * is that all the tracepoints are "executed" by the GPU following any previously -- 2.7.4