--- /dev/null
+/*
+ * Copyright © 2021 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <perfetto.h>
+
+#include "util/u_perfetto.h"
+
+#include "freedreno_tracepoints.h"
+
+static uint32_t gpu_clock_id;
+static uint64_t next_clock_sync_ns; /* cpu time of next clk sync */
+
+/**
+ * The timestamp at the point where we first emitted the clock_sync..
+ * this will be a *later* timestamp that the first GPU traces (since
+ * we capture the first clock_sync from the CPU *after* the first GPU
+ * tracepoints happen). To avoid confusing perfetto we need to drop
+ * the GPU traces with timestamps before this.
+ */
+static uint64_t sync_gpu_ts;
+
+struct FdRenderpassIncrementalState {
+ bool was_cleared = true;
+};
+
+struct FdRenderpassTraits : public perfetto::DefaultDataSourceTraits {
+ using IncrementalStateType = FdRenderpassIncrementalState;
+};
+
+class FdRenderpassDataSource : public perfetto::DataSource<FdRenderpassDataSource, FdRenderpassTraits> {
+public:
+ void OnSetup(const SetupArgs &) override
+ {
+ // Use this callback to apply any custom configuration to your data source
+ // based on the TraceConfig in SetupArgs.
+ }
+
+ void OnStart(const StartArgs &) override
+ {
+ // This notification can be used to initialize the GPU driver, enable
+ // counters, etc. StartArgs will contains the DataSourceDescriptor,
+ // which can be extended.
+ u_trace_perfetto_start();
+ PERFETTO_LOG("Tracing started");
+
+ /* Note: clock_id's below 128 are reserved.. for custom clock sources,
+ * using the hash of a namespaced string is the recommended approach.
+ * See: https://perfetto.dev/docs/concepts/clock-sync
+ */
+ gpu_clock_id =
+ _mesa_hash_string("org.freedesktop.mesa.freedreno") | 0x80000000;
+ }
+
+ void OnStop(const StopArgs &) override
+ {
+ PERFETTO_LOG("Tracing stopped");
+
+ // Undo any initialization done in OnStart.
+ u_trace_perfetto_stop();
+ // TODO we should perhaps block until queued traces are flushed?
+
+ Trace([](FdRenderpassDataSource::TraceContext ctx) {
+ auto packet = ctx.NewTracePacket();
+ packet->Finalize();
+ ctx.Flush();
+ });
+ }
+};
+
+PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(FdRenderpassDataSource);
+PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(FdRenderpassDataSource);
+
+static void
+send_descriptors(FdRenderpassDataSource::TraceContext &ctx, uint64_t ts_ns)
+{
+ PERFETTO_LOG("Sending renderstage descriptors");
+
+ auto packet = ctx.NewTracePacket();
+
+ packet->set_timestamp(0);
+// packet->set_timestamp(ts_ns);
+// packet->set_timestamp_clock_id(gpu_clock_id);
+
+ auto event = packet->set_gpu_render_stage_event();
+ event->set_gpu_id(0);
+
+ auto spec = event->set_specifications();
+
+ for (unsigned i = 0; i < ARRAY_SIZE(queues); i++) {
+ auto desc = spec->add_hw_queue();
+
+ desc->set_name(queues[i].name);
+ desc->set_description(queues[i].desc);
+ }
+
+ for (unsigned i = 0; i < ARRAY_SIZE(stages); i++) {
+ auto desc = spec->add_stage();
+
+ desc->set_name(stages[i].name);
+ if (stages[i].desc)
+ desc->set_description(stages[i].desc);
+ }
+}
+
+static void
+stage_start(struct pipe_context *pctx, uint64_t ts_ns, enum fd_stage_id stage)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_perfetto_state *p = &ctx->perfetto;
+
+ p->start_ts[stage] = ts_ns;
+}
+
+static void
+stage_end(struct pipe_context *pctx, uint64_t ts_ns, enum fd_stage_id stage)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_perfetto_state *p = &ctx->perfetto;
+
+ /* If we haven't managed to calibrate the alignment between GPU and CPU
+ * timestamps yet, then skip this trace, otherwise perfetto won't know
+ * what to do with it.
+ */
+ if (!sync_gpu_ts)
+ return;
+
+ FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {
+ if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
+ send_descriptors(tctx, p->start_ts[stage]);
+ state->was_cleared = false;
+ }
+
+ auto packet = tctx.NewTracePacket();
+
+ packet->set_timestamp(p->start_ts[stage]);
+ packet->set_timestamp_clock_id(gpu_clock_id);
+
+ auto event = packet->set_gpu_render_stage_event();
+ event->set_event_id(0); // ???
+ event->set_hw_queue_id(DEFAULT_HW_QUEUE_ID);
+ event->set_duration(ts_ns - p->start_ts[stage]);
+ event->set_stage_id(stage);
+ event->set_context((uintptr_t)pctx);
+
+ /* The "surface" meta-stage has extra info about render target: */
+ if (stage == SURFACE_STAGE_ID) {
+
+ event->set_submission_id(p->submit_id);
+
+ if (p->cbuf0_format) {
+ auto data = event->add_extra_data();
+
+ data->set_name("color0 format");
+ data->set_value(util_format_short_name(p->cbuf0_format));
+ }
+
+ if (p->zs_format) {
+ auto data = event->add_extra_data();
+
+ data->set_name("zs format");
+ data->set_value(util_format_short_name(p->zs_format));
+ }
+
+ {
+ auto data = event->add_extra_data();
+
+ data->set_name("width");
+ data->set_value(std::to_string(p->width));
+ }
+
+ {
+ auto data = event->add_extra_data();
+
+ data->set_name("height");
+ data->set_value(std::to_string(p->height));
+ }
+
+ {
+ auto data = event->add_extra_data();
+
+ data->set_name("MSAA");
+ data->set_value(std::to_string(p->samples));
+ }
+
+ {
+ auto data = event->add_extra_data();
+
+ data->set_name("MRTs");
+ data->set_value(std::to_string(p->mrts));
+ }
+
+ // "renderMode"
+ // "surfaceID"
+
+ if (p->nbins) {
+ auto data = event->add_extra_data();
+
+ data->set_name("numberOfBins");
+ data->set_value(std::to_string(p->nbins));
+ }
+
+ if (p->binw) {
+ auto data = event->add_extra_data();
+
+ data->set_name("binWidth");
+ data->set_value(std::to_string(p->binw));
+ }
+
+ if (p->binh) {
+ auto data = event->add_extra_data();
+
+ data->set_name("binHeight");
+ data->set_value(std::to_string(p->binh));
+ }
+ }
+ });
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void
+fd_perfetto_init(void)
+{
+ util_perfetto_init();
+
+ perfetto::DataSourceDescriptor dsd;
+ dsd.set_name("gpu.renderstages.msm");
+ FdRenderpassDataSource::Register(dsd);
+}
+
+static void
+sync_timestamp(struct fd_context *ctx)
+{
+ uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
+ uint64_t gpu_ts;
+
+ if (cpu_ts < next_clock_sync_ns)
+ return;
+
+ if (fd_pipe_get_param(ctx->pipe, FD_TIMESTAMP, &gpu_ts)) {
+ PERFETTO_ELOG("Could not sync CPU and GPU clocks");
+ return;
+ }
+
+ /* convert GPU ts into ns: */
+ gpu_ts = ctx->ts_to_ns(gpu_ts);
+
+ FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {
+ auto packet = tctx.NewTracePacket();
+
+ packet->set_timestamp(cpu_ts);
+
+ auto event = packet->set_clock_snapshot();
+
+ {
+ auto clock = event->add_clocks();
+
+ clock->set_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
+ clock->set_timestamp(cpu_ts);
+ }
+
+ {
+ auto clock = event->add_clocks();
+
+ clock->set_clock_id(gpu_clock_id);
+ clock->set_timestamp(gpu_ts);
+ }
+
+ sync_gpu_ts = gpu_ts;
+ next_clock_sync_ns = cpu_ts + 30000000;
+ });
+}
+
+static void
+emit_submit_id(struct fd_context *ctx)
+{
+ FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {
+ auto packet = tctx.NewTracePacket();
+
+ packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
+
+ auto event = packet->set_vulkan_api_event();
+ auto submit = event->set_vk_queue_submit();
+
+ submit->set_submission_id(ctx->submit_count);
+ });
+}
+
+void
+fd_perfetto_submit(struct fd_context *ctx)
+{
+ sync_timestamp(ctx);
+ emit_submit_id(ctx);
+}
+
+/*
+ * Trace callbacks, called from u_trace once the timestamps from GPU have been
+ * collected.
+ */
+
+void
+fd_start_render_pass(struct pipe_context *pctx, uint64_t ts_ns,
+ const struct trace_start_render_pass *payload)
+{
+ stage_start(pctx, ts_ns, SURFACE_STAGE_ID);
+
+ struct fd_perfetto_state *p = &fd_context(pctx)->perfetto;
+
+ p->submit_id = payload->submit_id;
+ p->cbuf0_format = payload->cbuf0_format;
+ p->zs_format = payload->zs_format;
+ p->width = payload->width;
+ p->height = payload->height;
+ p->mrts = payload->mrts;
+ p->samples = payload->samples;
+ p->nbins = payload->nbins;
+ p->binw = payload->binw;
+ p->binh = payload->binh;
+}
+
+void
+fd_end_render_pass(struct pipe_context *pctx, uint64_t ts_ns,
+ const struct trace_end_render_pass *payload)
+{
+ stage_end(pctx, ts_ns, SURFACE_STAGE_ID);
+}
+
+void
+fd_start_binning_ib(struct pipe_context *pctx, uint64_t ts_ns,
+ const struct trace_start_binning_ib *payload)
+{
+ stage_start(pctx, ts_ns, BINNING_STAGE_ID);
+}
+
+void
+fd_end_binning_ib(struct pipe_context *pctx, uint64_t ts_ns,
+ const struct trace_end_binning_ib *payload)
+{
+ stage_end(pctx, ts_ns, BINNING_STAGE_ID);
+}
+
+void
+fd_start_draw_ib(struct pipe_context *pctx, uint64_t ts_ns,
+ const struct trace_start_draw_ib *payload)
+{
+ stage_start(
+ pctx, ts_ns,
+ fd_context(pctx)->perfetto.nbins ? GMEM_STAGE_ID : BYPASS_STAGE_ID);
+}
+
+void
+fd_end_draw_ib(struct pipe_context *pctx, uint64_t ts_ns,
+ const struct trace_end_draw_ib *payload)
+{
+ stage_end(
+ pctx, ts_ns,
+ fd_context(pctx)->perfetto.nbins ? GMEM_STAGE_ID : BYPASS_STAGE_ID);
+}
+
+void
+fd_start_blit(struct pipe_context *pctx, uint64_t ts_ns,
+ const struct trace_start_blit *payload)
+{
+ stage_start(pctx, ts_ns, BLIT_STAGE_ID);
+}
+
+void
+fd_end_blit(struct pipe_context *pctx, uint64_t ts_ns,
+ const struct trace_end_blit *payload)
+{
+ stage_end(pctx, ts_ns, BLIT_STAGE_ID);
+}
+
+void
+fd_start_compute(struct pipe_context *pctx, uint64_t ts_ns,
+ const struct trace_start_compute *payload)
+{
+ stage_start(pctx, ts_ns, COMPUTE_STAGE_ID);
+}
+
+void
+fd_end_compute(struct pipe_context *pctx, uint64_t ts_ns,
+ const struct trace_end_compute *payload)
+{
+ stage_end(pctx, ts_ns, COMPUTE_STAGE_ID);
+}
+
+void
+fd_start_clear_restore(struct pipe_context *pctx, uint64_t ts_ns,
+ const struct trace_start_clear_restore *payload)
+{
+ stage_start(pctx, ts_ns, CLEAR_RESTORE_STAGE_ID);
+}
+
+void
+fd_end_clear_restore(struct pipe_context *pctx, uint64_t ts_ns,
+ const struct trace_end_clear_restore *payload)
+{
+ stage_end(pctx, ts_ns, CLEAR_RESTORE_STAGE_ID);
+}
+
+void
+fd_start_resolve(struct pipe_context *pctx, uint64_t ts_ns,
+ const struct trace_start_resolve *payload)
+{
+ stage_start(pctx, ts_ns, RESOLVE_STAGE_ID);
+}
+
+void
+fd_end_resolve(struct pipe_context *pctx, uint64_t ts_ns,
+ const struct trace_end_resolve *payload)
+{
+ stage_end(pctx, ts_ns, RESOLVE_STAGE_ID);
+}
+
+#ifdef __cplusplus
+}
+#endif
--- /dev/null
+/*
+ * Copyright © 2021 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef FREEDRENO_PERFETTO_H_
+#define FREEDRENO_PERFETTO_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef HAVE_PERFETTO
+
+/**
+ * Render-stage id's
+ */
+enum fd_stage_id {
+ SURFACE_STAGE_ID, /* Surface is a sort of meta-stage for render-target info */
+ BINNING_STAGE_ID,
+ GMEM_STAGE_ID,
+ BYPASS_STAGE_ID,
+ BLIT_STAGE_ID,
+ COMPUTE_STAGE_ID,
+ CLEAR_RESTORE_STAGE_ID,
+ RESOLVE_STAGE_ID,
+ // TODO add the rest
+
+ NUM_STAGES
+};
+
+static const struct {
+ const char *name;
+ const char *desc;
+} stages[] = {
+ [SURFACE_STAGE_ID] = {"Surface"},
+ [BINNING_STAGE_ID] = {"Binning", "Perform Visibility pass and determine target bins"},
+ [GMEM_STAGE_ID] = {"Render", "Rendering to GMEM"},
+ [BYPASS_STAGE_ID] = {"Render", "Rendering to system memory"},
+ [BLIT_STAGE_ID] = {"Blit", "Performing a Blit operation"},
+ [COMPUTE_STAGE_ID] = {"Compute", "Compute job"},
+ [CLEAR_RESTORE_STAGE_ID] = {"Clear/Restore", "Clear (sysmem) or per-tile clear or restore (GMEM)"},
+ [RESOLVE_STAGE_ID] = {"Resolve", "Per tile resolve (GMEM to system memory"},
+ // TODO add the rest
+};
+
+/**
+ * Queue-id's
+ */
+enum {
+ DEFAULT_HW_QUEUE_ID,
+};
+
+static const struct {
+ const char *name;
+ const char *desc;
+} queues[] = {
+ [DEFAULT_HW_QUEUE_ID] = {"GPU Queue 0", "Default Adreno Hardware Queue"},
+};
+
+/**
+ * The u_trace tracepoints which are used to capture GPU timestamps and
+ * trigger perfetto events tend to come in begin/end pairs (ie. start
+ * and end of binning pass, etc), but perfetto wants one event for the
+ * whole pass. So we need to buffer up some state at the "begin" trae
+ * callback, and then emit the perfetto event at the "end" event based
+ * on previously recorded timestamp/data. This struct is where we can
+ * accumulate that state.
+ */
+struct fd_perfetto_state {
+ uint64_t start_ts[NUM_STAGES];
+
+ /*
+ * Surface state for the renderpass:
+ */
+ uint32_t submit_id;
+ enum pipe_format cbuf0_format : 16;
+ enum pipe_format zs_format : 16;
+ uint16_t width;
+ uint16_t height;
+ uint8_t mrts;
+ uint8_t samples;
+ uint16_t nbins;
+ uint16_t binw;
+ uint16_t binh;
+ // TODO # of draws and possibly estimated cost might be useful addition..
+};
+
+void fd_perfetto_init(void);
+
+struct fd_context;
+void fd_perfetto_submit(struct fd_context *ctx);
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* FREEDRENO_PERFETTO_H_ */
Tracepoint('render_sysmem')
-Tracepoint('start_binning_ib')
-Tracepoint('end_binning_ib')
+# Note that this doesn't include full information about all of the MRTs
+# but seems to roughly match what I see with a blob trace
+Tracepoint('start_render_pass',
+ args=[['uint32_t', 'submit_id'],
+ ['enum pipe_format', 'cbuf0_format'],
+ ['enum pipe_format', 'zs_format'],
+ ['uint16_t', 'width'],
+ ['uint16_t', 'height'],
+ ['uint8_t', 'mrts'],
+ ['uint8_t', 'samples'],
+ ['uint16_t', 'nbins'],
+ ['uint16_t', 'binw'],
+ ['uint16_t', 'binh']],
+ tp_perfetto='fd_start_render_pass'
+)
+Tracepoint('end_render_pass',
+ tp_perfetto='fd_end_render_pass')
+
+Tracepoint('start_binning_ib',
+ tp_perfetto='fd_start_binning_ib')
+Tracepoint('end_binning_ib',
+ tp_perfetto='fd_end_binning_ib')
Tracepoint('start_vsc_overflow_test')
Tracepoint('end_vsc_overflow_test')
# For GMEM pass, where this could either be a clear or resolve
Tracepoint('start_clear_restore',
args=[['uint16_t', 'fast_cleared']],
- tp_print=['fast_cleared: 0x%x', '__entry->fast_cleared']
+ tp_print=['fast_cleared: 0x%x', '__entry->fast_cleared'],
+ tp_perfetto='fd_start_clear_restore',
)
-Tracepoint('end_clear_restore')
+Tracepoint('end_clear_restore',
+ tp_perfetto='fd_end_clear_restore')
-Tracepoint('start_resolve')
-Tracepoint('end_resolve')
+Tracepoint('start_resolve',
+ tp_perfetto='fd_start_resolve')
+Tracepoint('end_resolve',
+ tp_perfetto='fd_end_resolve')
Tracepoint('start_tile',
args=[['uint16_t', 'bin_h'],
'__entry->bin_h', '__entry->yoff', '__entry->bin_w', '__entry->xoff'],
)
-Tracepoint('start_draw_ib')
-Tracepoint('end_draw_ib')
+Tracepoint('start_draw_ib',
+ tp_perfetto='fd_start_draw_ib')
+Tracepoint('end_draw_ib',
+ tp_perfetto='fd_end_draw_ib')
Tracepoint('start_blit',
args=[['enum pipe_texture_target', 'src_target'],
['enum pipe_texture_target', 'dst_target']],
tp_print=['%s -> %s', 'util_str_tex_target(__entry->src_target, true)',
'util_str_tex_target(__entry->dst_target, true)'],
+ tp_perfetto='fd_start_blit',
)
-Tracepoint('end_blit')
+Tracepoint('end_blit',
+ tp_perfetto='fd_end_blit')
-Tracepoint('start_compute')
-Tracepoint('end_compute')
+Tracepoint('start_compute',
+ tp_perfetto='fd_start_compute')
+Tracepoint('end_compute',
+ tp_perfetto='fd_end_compute')
utrace_generate(cpath=args.src, hpath=args.hdr)