turnip: support tracing of gmem/sysmem load/store/clears
authorDanylo Piliaiev <dpiliaiev@igalia.com>
Fri, 16 Jul 2021 15:52:31 +0000 (18:52 +0300)
committerDanylo Piliaiev <dpiliaiev@igalia.com>
Fri, 10 Sep 2021 11:58:28 +0000 (14:58 +0300)
Now we support per-tile tracing.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Reviewed-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Hyunjun Ko <zzoon@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10969>

src/freedreno/vulkan/tu_clear_blit.c
src/freedreno/vulkan/tu_cmd_buffer.c
src/freedreno/vulkan/tu_perfetto.cc
src/freedreno/vulkan/tu_perfetto.h
src/freedreno/vulkan/tu_private.h
src/freedreno/vulkan/tu_tracepoints.py

index 311077e..38d591d 100644 (file)
@@ -2042,7 +2042,7 @@ resolve_sysmem(struct tu_cmd_buffer *cmd,
 {
    const struct blit_ops *ops = &r2d_ops;
 
-   trace_start_resolve(&cmd->trace, cs);
+   trace_start_sysmem_resolve(&cmd->trace, cs);
 
    ops->setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT,
               0, false, dst->ubwc_enabled, VK_SAMPLE_COUNT_1_BIT);
@@ -2061,7 +2061,7 @@ resolve_sysmem(struct tu_cmd_buffer *cmd,
 
    ops->teardown(cmd, cs);
 
-   trace_end_resolve(&cmd->trace, cs);
+   trace_end_sysmem_resolve(&cmd->trace, cs, format);
 }
 
 void
@@ -2198,6 +2198,8 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd,
    bool z_clear = false;
    bool s_clear = false;
 
+   trace_start_sysmem_clear_all(&cmd->trace, cs);
+
    for (uint32_t i = 0; i < attachment_count; i++) {
       uint32_t a;
       if (attachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
@@ -2328,6 +2330,9 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd,
          r3d_run(cmd, cs);
       }
    }
+
+   trace_end_sysmem_clear_all(&cmd->trace,
+                              cs, mrt_count, rect_count);
 }
 
 static void
@@ -2445,6 +2450,8 @@ tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
    const struct tu_render_pass_attachment *att =
       &cmd->state.pass->attachments[attachment];
 
+   trace_start_gmem_clear(&cmd->trace, cs);
+
    if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
       if (mask & VK_IMAGE_ASPECT_DEPTH_BIT)
          clear_gmem_attachment(cmd, cs, VK_FORMAT_D32_SFLOAT, 0xf, att->gmem_offset, value);
@@ -2454,6 +2461,8 @@ tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
    }
 
    clear_gmem_attachment(cmd, cs, att->format, aspect_write_mask(att->format, mask), att->gmem_offset, value);
+
+   trace_end_gmem_clear(&cmd->trace, cs, att->format, att->samples);
 }
 
 static void
@@ -2556,6 +2565,8 @@ clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
    if (cmd->state.pass->attachments[a].samples > 1)
       ops = &r3d_ops;
 
+   trace_start_sysmem_clear(&cmd->trace, cs);
+
    ops->setup(cmd, cs, format, clear_mask, 0, true, iview->ubwc_enabled,
               cmd->state.pass->attachments[a].samples);
    ops->coords(cs, &info->renderArea.offset, NULL, &info->renderArea.extent);
@@ -2574,6 +2585,10 @@ clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
    }
 
    ops->teardown(cmd, cs);
+
+   trace_end_sysmem_clear(&cmd->trace, cs,
+                          format, ops == &r3d_ops,
+                          cmd->state.pass->attachments[a].samples);
 }
 
 void
@@ -2726,11 +2741,15 @@ tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
    const struct tu_render_pass_attachment *attachment =
       &cmd->state.pass->attachments[a];
 
+   trace_start_gmem_load(&cmd->trace, cs);
+
    if (attachment->load || force_load)
       tu_emit_blit(cmd, cs, iview, attachment, false, false);
 
    if (attachment->load_stencil || (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT && force_load))
       tu_emit_blit(cmd, cs, iview, attachment, false, true);
+
+   trace_end_gmem_load(&cmd->trace, cs, attachment->format, force_load);
 }
 
 static void
@@ -2857,12 +2876,16 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
       src->format == VK_FORMAT_D32_SFLOAT_S8_UINT &&
       dst->format == VK_FORMAT_S8_UINT;
 
+   trace_start_gmem_store(&cmd->trace, cs);
+
    /* use fast path when render area is aligned, except for unsupported resolve cases */
    if (!unaligned && (a == gmem_a || blit_can_resolve(dst->format))) {
       if (dst->store)
          tu_emit_blit(cmd, cs, iview, src, true, resolve_d32s8_s8);
       if (dst->store_stencil)
          tu_emit_blit(cmd, cs, iview, src, true, true);
+
+      trace_end_gmem_store(&cmd->trace, cs, dst->format, true, false);
       return;
    }
 
@@ -2901,4 +2924,6 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
                        src->gmem_offset_stencil, src->samples);
       }
    }
+
+   trace_end_gmem_store(&cmd->trace, cs, dst->format, false, unaligned);
 }
index ba212f9..f6430fd 100644 (file)
@@ -1306,6 +1306,15 @@ tu6_render_tile(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
 
    tu_cs_emit_call(cs, &cmd->tile_store_cs);
 
+   if (!u_trace_iterator_equal(cmd->trace_renderpass_start, cmd->trace_renderpass_end)) {
+      tu_cs_emit_wfi(cs);
+      tu_cs_emit_pkt7(&cmd->cs, CP_WAIT_FOR_ME, 0);
+      u_trace_clone_append(cmd->trace_renderpass_start,
+                           cmd->trace_renderpass_end,
+                           &cmd->trace,
+                           cs, tu_copy_timestamp_buffer);
+   }
+
    tu_cs_sanity_check(cs);
 }
 
@@ -1354,6 +1363,10 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd)
    tu6_tile_render_end(cmd, &cmd->cs);
 
    trace_end_render_pass(&cmd->trace, &cmd->cs, fb);
+
+   if (!u_trace_iterator_equal(cmd->trace_renderpass_start, cmd->trace_renderpass_end))
+      u_trace_disable_event_range(cmd->trace_renderpass_start,
+                                  cmd->trace_renderpass_end);
 }
 
 static void
@@ -3133,6 +3146,8 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
       cmd->state.dirty |= TU_CMD_DIRTY_LRZ;
    }
 
+   cmd->trace_renderpass_start = u_trace_end_iterator(&cmd->trace);
+
    tu_emit_renderpass_begin(cmd, pRenderPassBegin);
 
    tu6_emit_zs(cmd, cmd->state.subpass, &cmd->draw_cs);
@@ -4580,6 +4595,8 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
    tu_cs_end(&cmd_buffer->tile_store_cs);
    tu_cs_end(&cmd_buffer->draw_epilogue_cs);
 
+   cmd_buffer->trace_renderpass_end = u_trace_end_iterator(&cmd_buffer->trace);
+
    if (use_sysmem_rendering(cmd_buffer))
       tu_cmd_render_sysmem(cmd_buffer);
    else
index cfd421a..656b0b9 100644 (file)
@@ -284,7 +284,12 @@ CREATE_EVENT_CALLBACK(draw_ib_gmem, GMEM_STAGE_ID)
 CREATE_EVENT_CALLBACK(draw_ib_sysmem, BYPASS_STAGE_ID)
 CREATE_EVENT_CALLBACK(blit, BLIT_STAGE_ID)
 CREATE_EVENT_CALLBACK(compute, COMPUTE_STAGE_ID)
-CREATE_EVENT_CALLBACK(resolve, RESOLVE_STAGE_ID)
+CREATE_EVENT_CALLBACK(gmem_clear, CLEAR_GMEM_STAGE_ID)
+CREATE_EVENT_CALLBACK(sysmem_clear, CLEAR_SYSMEM_STAGE_ID)
+CREATE_EVENT_CALLBACK(sysmem_clear_all, CLEAR_SYSMEM_STAGE_ID)
+CREATE_EVENT_CALLBACK(gmem_load, GMEM_LOAD_STAGE_ID)
+CREATE_EVENT_CALLBACK(gmem_store, GMEM_STORE_STAGE_ID)
+CREATE_EVENT_CALLBACK(sysmem_resolve, SYSMEM_RESOLVE_STAGE_ID)
 
 #ifdef __cplusplus
 }
index d02a165..b6c5b1d 100644 (file)
@@ -40,8 +40,11 @@ enum tu_stage_id {
    BYPASS_STAGE_ID,
    BLIT_STAGE_ID,
    COMPUTE_STAGE_ID,
-   CLEAR_RESTORE_STAGE_ID,
-   RESOLVE_STAGE_ID,
+   CLEAR_SYSMEM_STAGE_ID,
+   CLEAR_GMEM_STAGE_ID,
+   GMEM_LOAD_STAGE_ID,
+   GMEM_STORE_STAGE_ID,
+   SYSMEM_RESOLVE_STAGE_ID,
    // TODO add the rest
 
    NUM_STAGES
@@ -57,8 +60,11 @@ static const struct {
    [BYPASS_STAGE_ID]  = {"Render", "Rendering to system memory"},
    [BLIT_STAGE_ID]    = {"Blit", "Performing a Blit operation"},
    [COMPUTE_STAGE_ID] = {"Compute", "Compute job"},
-   [CLEAR_RESTORE_STAGE_ID] = {"Clear/Restore", "Clear (sysmem) or per-tile clear or restore (GMEM)"},
-   [RESOLVE_STAGE_ID] = {"Resolve", "Per tile resolve (GMEM to system memory"},
+   [CLEAR_SYSMEM_STAGE_ID] = {"Clear Sysmem", ""},
+   [CLEAR_GMEM_STAGE_ID] = {"Clear GMEM", "Per-tile (GMEM) clear"},
+   [GMEM_LOAD_STAGE_ID] = {"GMEM Load", "Per tile system memory to GMEM load"},
+   [GMEM_STORE_STAGE_ID] = {"GMEM Store", "Per tile GMEM to system memory store"},
+   [SYSMEM_RESOLVE_STAGE_ID] = {"SysMem Resolve", "System memory MSAA resolve"},
    // TODO add the rest
 };
 
index 150b6d2..cd261b5 100644 (file)
@@ -1060,6 +1060,8 @@ struct tu_cmd_buffer
    struct list_head pool_link;
 
    struct u_trace trace;
+   struct u_trace_iterator trace_renderpass_start;
+   struct u_trace_iterator trace_renderpass_end;
 
    VkCommandBufferUsageFlags usage_flags;
    VkCommandBufferLevel level;
index 9ec5f18..0017f1b 100644 (file)
@@ -88,6 +88,49 @@ Tracepoint('start_draw_ib_gmem',
 Tracepoint('end_draw_ib_gmem',
     tp_perfetto='tu_end_draw_ib_gmem')
 
+Tracepoint('start_gmem_clear',
+    tp_perfetto='tu_start_gmem_clear')
+Tracepoint('end_gmem_clear',
+    args=[Arg(type='enum VkFormat',  var='format',  c_format='%s', to_prim_type='vk_format_description({})->short_name'),
+          Arg(type='uint8_t',        var='samples', c_format='%u')],
+    tp_perfetto='tu_end_gmem_clear')
+
+Tracepoint('start_sysmem_clear',
+    tp_perfetto='tu_start_sysmem_clear')
+Tracepoint('end_sysmem_clear',
+    args=[Arg(type='enum VkFormat',  var='format',      c_format='%s', to_prim_type='vk_format_description({})->short_name'),
+          Arg(type='uint8_t',        var='uses_3d_ops', c_format='%u'),
+          Arg(type='uint8_t',        var='samples',     c_format='%u')],
+    tp_perfetto='tu_end_sysmem_clear')
+
+Tracepoint('start_sysmem_clear_all',
+    tp_perfetto='tu_start_sysmem_clear_all')
+Tracepoint('end_sysmem_clear_all',
+    args=[Arg(type='uint8_t',        var='mrt_count',   c_format='%u'),
+          Arg(type='uint8_t',        var='rect_count',  c_format='%u')],
+    tp_perfetto='tu_end_sysmem_clear_all')
+
+Tracepoint('start_gmem_load',
+    tp_perfetto='tu_start_gmem_load')
+Tracepoint('end_gmem_load',
+    args=[Arg(type='enum VkFormat',  var='format',   c_format='%s', to_prim_type='vk_format_description({})->short_name'),
+          Arg(type='uint8_t',        var='force_load', c_format='%u')],
+    tp_perfetto='tu_end_gmem_load')
+
+Tracepoint('start_gmem_store',
+    tp_perfetto='tu_start_gmem_store')
+Tracepoint('end_gmem_store',
+    args=[Arg(type='enum VkFormat',  var='format',   c_format='%s', to_prim_type='vk_format_description({})->short_name'),
+          Arg(type='uint8_t',        var='fast_path', c_format='%u'),
+          Arg(type='uint8_t',        var='unaligned', c_format='%u')],
+    tp_perfetto='tu_end_gmem_store')
+
+Tracepoint('start_sysmem_resolve',
+    tp_perfetto='tu_start_sysmem_resolve')
+Tracepoint('end_sysmem_resolve',
+    args=[Arg(type='enum VkFormat',  var='format',   c_format='%s', to_prim_type='vk_format_description({})->short_name')],
+    tp_perfetto='tu_end_sysmem_resolve')
+
 Tracepoint('start_blit',
     tp_perfetto='tu_start_blit',
 )