tu/perfetto: Use tu_CmdBeginDebugUtilsLabelEXT as a stage event in perfetto.
authorEmma Anholt <emma@anholt.net>
Wed, 8 Mar 2023 20:30:24 +0000 (12:30 -0800)
committerMarge Bot <emma+marge@anholt.net>
Sun, 16 Apr 2023 15:50:49 +0000 (15:50 +0000)
This lets zink mark points of interest (particularly its barriers and
blits) with some useful data, for presenting in perfetto traces.

Closes: #8487
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22278>

src/freedreno/vulkan/tu_device.cc
src/freedreno/vulkan/tu_perfetto.cc
src/freedreno/vulkan/tu_perfetto.h
src/freedreno/vulkan/tu_tracepoints.py
src/util/perf/u_perfetto_renderpass.h

index 4d503e5..7497a18 100644 (file)
@@ -3459,3 +3459,55 @@ tu_debug_bos_print_stats(struct tu_device *dev)
 
    mtx_unlock(&dev->bo_mutex);
 }
+
+void
+tu_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer,
+                              const VkDebugUtilsLabelEXT *pLabelInfo)
+{
+   VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, _commandBuffer);
+
+   vk_common_CmdBeginDebugUtilsLabelEXT(_commandBuffer, pLabelInfo);
+
+   /* Note that the spec says:
+    *
+    * "An application may open a debug label region in one command buffer and
+    *  close it in another, or otherwise split debug label regions across
+    *  multiple command buffers or multiple queue submissions. When viewed
+    * from the linear series of submissions to a single queue, the calls to
+    *  vkCmdBeginDebugUtilsLabelEXT and vkCmdEndDebugUtilsLabelEXT must be
+    *  matched and balanced."
+    *
+    * But if you're beginning labeling during a renderpass and ending outside
+    * it, or vice versa, these trace ranges in perfetto will be unbalanced.  I
+    * expect that u_trace and perfetto will do something like take just one of
+    * the begins/ends, or drop the event entirely, but not crash.  Similarly,
+    * I think we'll have problems if the tracepoints are split across cmd
+    * buffers. Still, getting the simple case of cmd buffer annotation into
+    * perfetto should prove useful.
+    */
+   const char *label = pLabelInfo->pLabelName;
+   if (cmd_buffer->state.pass) {
+      trace_start_cmd_buffer_annotation_rp(
+         &cmd_buffer->trace, &cmd_buffer->draw_cs, strlen(label), label);
+   } else {
+      trace_start_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs,
+                                        strlen(label), label);
+   }
+}
+
+void
+tu_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer)
+{
+   VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, _commandBuffer);
+
+   if (cmd_buffer->vk.labels.size > 0) {
+      if (cmd_buffer->state.pass) {
+         trace_end_cmd_buffer_annotation_rp(&cmd_buffer->trace,
+                                            &cmd_buffer->draw_cs);
+      } else {
+         trace_end_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs);
+      }
+   }
+
+   vk_common_CmdEndDebugUtilsLabelEXT(_commandBuffer);
+}
index d54e39a..40e2f0e 100644 (file)
@@ -40,7 +40,9 @@ enum {
  */
 enum tu_stage_id {
    CMD_BUFFER_STAGE_ID,
+   CMD_BUFFER_ANNOTATION_STAGE_ID,
    RENDER_PASS_STAGE_ID,
+   CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID,
    BINNING_STAGE_ID,
    GMEM_STAGE_ID,
    BYPASS_STAGE_ID,
@@ -66,7 +68,9 @@ static const struct {
    const char *desc;
 } stages[] = {
    [CMD_BUFFER_STAGE_ID]     = { "Command Buffer" },
+   [CMD_BUFFER_ANNOTATION_STAGE_ID]     = { "Annotation", "Command Buffer Annotation" },
    [RENDER_PASS_STAGE_ID]    = { "Render Pass" },
+   [CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID]    = { "Annotation", "Render Pass Command Buffer Annotation" },
    [BINNING_STAGE_ID]        = { "Binning", "Perform Visibility pass and determine target bins" },
    [GMEM_STAGE_ID]           = { "GMEM", "Rendering to GMEM" },
    [BYPASS_STAGE_ID]         = { "Bypass", "Rendering to system memory" },
@@ -134,6 +138,9 @@ send_descriptors(TuRenderpassDataSource::TraceContext &ctx, uint64_t ts_ns)
 
    auto packet = ctx.NewTracePacket();
 
+   /* This must be set before interned data is sent. */
+   packet->set_sequence_flags(perfetto::protos::pbzero::TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
+
    packet->set_timestamp(0);
 
    auto event = packet->set_gpu_render_stage_event();
@@ -192,6 +199,7 @@ static void
 stage_start(struct tu_device *dev,
             uint64_t ts_ns,
             enum tu_stage_id stage_id,
+            const char *app_event,
             const void *payload = nullptr,
             size_t payload_size = 0,
             trace_payload_as_extra_func payload_as_extra = nullptr)
@@ -214,10 +222,18 @@ stage_start(struct tu_device *dev,
 
    *stage = (struct tu_perfetto_stage) {
       .stage_id = stage_id,
+      .stage_iid = 0,
       .start_ts = ts_ns,
       .payload = payload,
       .start_payload_function = (void *) payload_as_extra,
    };
+
+   if (app_event) {
+      TuRenderpassDataSource::Trace([=](auto tctx) {
+         stage->stage_iid =
+            tctx.GetDataSourceLocked()->debug_marker_stage(tctx, app_event);
+      });
+   }
 }
 
 static void
@@ -265,8 +281,11 @@ stage_end(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage_id,
       event->set_event_id(0); // ???
       event->set_hw_queue_id(DEFAULT_HW_QUEUE_ID);
       event->set_duration(ts_ns - stage->start_ts);
-      event->set_stage_id(stage->stage_id);
-      event->set_context((uintptr_t)dev);
+      if (stage->stage_iid)
+         event->set_stage_iid(stage->stage_iid);
+      else
+         event->set_stage_id(stage->stage_id);
+      event->set_context((uintptr_t) dev);
       event->set_submission_id(submission_id);
 
       if (stage->payload) {
@@ -385,6 +404,13 @@ tu_perfetto_submit(struct tu_device *dev, uint32_t submission_id)
 /*
  * Trace callbacks, called from u_trace once the timestamps from GPU have been
  * collected.
+ *
+ * The default "extra" funcs are code-generated into tu_tracepoints_perfetto.h
+ * and just take the tracepoint's args and add them as name/value pairs in the
+ * perfetto events.  This file can usually just map a tu_perfetto_* to
+ * stage_start/end with a call to that codegenned "extra" func.  But you can
+ * also provide your own entrypoint and extra funcs if you want to change that
+ * mapping.
  */
 
 #define CREATE_EVENT_CALLBACK(event_name, stage_id)                                 \
@@ -393,7 +419,7 @@ tu_perfetto_submit(struct tu_device *dev, uint32_t submission_id)
       const struct trace_start_##event_name *payload)                               \
    {                                                                                \
       stage_start(                                                                  \
-         dev, ts_ns, stage_id, payload, sizeof(*payload),                           \
+         dev, ts_ns, stage_id, NULL, payload, sizeof(*payload),                     \
          (trace_payload_as_extra_func) &trace_payload_as_extra_start_##event_name); \
    }                                                                                \
                                                                                     \
@@ -420,6 +446,58 @@ CREATE_EVENT_CALLBACK(gmem_load, GMEM_LOAD_STAGE_ID)
 CREATE_EVENT_CALLBACK(gmem_store, GMEM_STORE_STAGE_ID)
 CREATE_EVENT_CALLBACK(sysmem_resolve, SYSMEM_RESOLVE_STAGE_ID)
 
+void
+tu_perfetto_start_cmd_buffer_annotation(
+   struct tu_device *dev,
+   uint64_t ts_ns,
+   const void *flush_data,
+   const struct trace_start_cmd_buffer_annotation *payload)
+{
+   /* No extra func necessary, the only arg is in the end payload.*/
+   stage_start(dev, ts_ns, CMD_BUFFER_ANNOTATION_STAGE_ID, payload->str, payload,
+               sizeof(*payload), NULL);
+}
+
+void
+tu_perfetto_end_cmd_buffer_annotation(
+   struct tu_device *dev,
+   uint64_t ts_ns,
+   const void *flush_data,
+   const struct trace_end_cmd_buffer_annotation *payload)
+{
+   /* Pass the payload string as the app_event, which will appear right on the
+    * event block, rather than as metadata inside.
+    */
+   stage_end(dev, ts_ns, CMD_BUFFER_ANNOTATION_STAGE_ID, flush_data,
+             payload, NULL);
+}
+
+void
+tu_perfetto_start_cmd_buffer_annotation_rp(
+   struct tu_device *dev,
+   uint64_t ts_ns,
+   const void *flush_data,
+   const struct trace_start_cmd_buffer_annotation_rp *payload)
+{
+   /* No extra func necessary, the only arg is in the end payload.*/
+   stage_start(dev, ts_ns, CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID,
+               payload->str, payload, sizeof(*payload), NULL);
+}
+
+void
+tu_perfetto_end_cmd_buffer_annotation_rp(
+   struct tu_device *dev,
+   uint64_t ts_ns,
+   const void *flush_data,
+   const struct trace_end_cmd_buffer_annotation_rp *payload)
+{
+   /* Pass the payload string as the app_event, which will appear right on the
+    * event block, rather than as metadata inside.
+    */
+   stage_end(dev, ts_ns, CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID,
+             flush_data, payload, NULL);
+}
+
 #ifdef __cplusplus
 }
 #endif
index 12f2bce..92a56c6 100644 (file)
@@ -22,6 +22,10 @@ struct tu_u_trace_submission_data;
 
 struct tu_perfetto_stage {
    int stage_id;
+   /* dynamically allocated stage iid, for app_events.  0 if stage_id should be
+    * used instead.
+    */
+   uint64_t stage_iid;
    uint64_t start_ts;
    const void* payload;
    void* start_payload_function;
index e3d5da5..5af5ce6 100644 (file)
@@ -123,6 +123,15 @@ begin_end_tp('compute',
           Arg(type='uint16_t', var='num_groups_y',   c_format='%u'),
           Arg(type='uint16_t', var='num_groups_z',   c_format='%u')])
 
+
+# Annotations for Cmd(Begin|End)DebugUtilsLabelEXT
+for suffix in ["", "_rp"]:
+    begin_end_tp('cmd_buffer_annotation' + suffix,
+                    args=[ArgStruct(type='unsigned', var='len'),
+                          ArgStruct(type='const char *', var='str'),],
+                    tp_struct=[Arg(type='uint8_t', name='dummy', var='0', c_format='%hhu'),
+                               Arg(type='char', name='str', var='str', c_format='%s', length_arg='len + 1', copy_func='strncpy'),])
+
 utrace_generate(cpath=args.utrace_src,
                 hpath=args.utrace_hdr,
                 ctx_param='struct tu_device *dev',
index a128cf6..13aad34 100644 (file)
@@ -114,6 +114,9 @@ class MesaRenderpassDataSource
     * event in the UI, rather than needing to click into the event to find the
     * name in the metadata.  Intended for use with
     * vkCmdBeginDebugUtilsLabelEXT() and glPushDebugGroup().
+    *
+    * Note that SEQ_INCREMENTAL_STATE_CLEARED must have been set in the
+    * sequence before this is called.
     */
    uint64_t debug_marker_stage(TraceContext &ctx, const char *name)
    {