intel/ds: track number of tracepoint timestamp copies
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Tue, 29 Aug 2023 20:00:17 +0000 (23:00 +0300)
committerMarge Bot <emma+marge@anholt.net>
Mon, 25 Sep 2023 13:05:45 +0000 (13:05 +0000)
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24744>

src/intel/ds/intel_driver_ds.cc
src/intel/ds/intel_driver_ds.h
src/intel/ds/intel_tracepoints.py
src/intel/vulkan/anv_utrace.c
src/intel/vulkan/genX_cmd_buffer.c

index 395df67..25f63b9 100644 (file)
@@ -68,8 +68,8 @@ static const struct {
       INTEL_DS_QUEUE_STAGE_CMD_BUFFER,
    },
    {
-      "generate-draws",
-      INTEL_DS_QUEUE_STAGE_GENERATE_DRAWS,
+      "internal-ops",
+      INTEL_DS_QUEUE_STAGE_INTERNAL_OPS,
    },
    {
       "stall",
@@ -391,12 +391,13 @@ CREATE_DUAL_EVENT_CALLBACK(draw_mesh_indirect, INTEL_DS_QUEUE_STAGE_DRAW_MESH)
 CREATE_DUAL_EVENT_CALLBACK(draw_mesh_indirect_count, INTEL_DS_QUEUE_STAGE_DRAW_MESH)
 CREATE_DUAL_EVENT_CALLBACK(xfb, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
 CREATE_DUAL_EVENT_CALLBACK(compute, INTEL_DS_QUEUE_STAGE_COMPUTE)
-CREATE_DUAL_EVENT_CALLBACK(generate_draws, INTEL_DS_QUEUE_STAGE_GENERATE_DRAWS)
-CREATE_DUAL_EVENT_CALLBACK(trace_copy, INTEL_DS_QUEUE_STAGE_BLORP)
-CREATE_DUAL_EVENT_CALLBACK(query_clear_blorp, INTEL_DS_QUEUE_STAGE_BLORP)
-CREATE_DUAL_EVENT_CALLBACK(query_clear_cs, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
-CREATE_DUAL_EVENT_CALLBACK(query_copy_cs, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
-CREATE_DUAL_EVENT_CALLBACK(query_copy_shader, INTEL_DS_QUEUE_STAGE_BLORP)
+CREATE_DUAL_EVENT_CALLBACK(generate_draws, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
+CREATE_DUAL_EVENT_CALLBACK(trace_copy, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
+CREATE_DUAL_EVENT_CALLBACK(trace_copy_cb, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
+CREATE_DUAL_EVENT_CALLBACK(query_clear_blorp, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
+CREATE_DUAL_EVENT_CALLBACK(query_clear_cs, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
+CREATE_DUAL_EVENT_CALLBACK(query_copy_cs, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
+CREATE_DUAL_EVENT_CALLBACK(query_copy_shader, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
 
 void
 intel_ds_begin_cmd_buffer_annotation(struct intel_ds_device *device,
index bc5f4a8..456b154 100644 (file)
@@ -68,7 +68,7 @@ enum intel_ds_queue_stage {
    INTEL_DS_QUEUE_STAGE_QUEUE,
    INTEL_DS_QUEUE_STAGE_FRAME,
    INTEL_DS_QUEUE_STAGE_CMD_BUFFER,
-   INTEL_DS_QUEUE_STAGE_GENERATE_DRAWS,
+   INTEL_DS_QUEUE_STAGE_INTERNAL_OPS,
    INTEL_DS_QUEUE_STAGE_STALL,
    INTEL_DS_QUEUE_STAGE_COMPUTE,
    INTEL_DS_QUEUE_STAGE_RENDER_PASS,
index b11c789..2048a6b 100644 (file)
@@ -172,7 +172,11 @@ def define_tracepoints(args):
                  tp_print=['group=%ux%ux%u', '__entry->group_x', '__entry->group_y', '__entry->group_z'])
 
     # Used to identify copies generated by utrace
-    begin_end_tp('trace_copy', end_pipelined=True)
+    begin_end_tp('trace_copy',
+                 tp_args=[Arg(type='uint32_t', var='count', c_format='%u'),])
+    begin_end_tp('trace_copy_cb',
+                 tp_args=[Arg(type='uint32_t', var='count', c_format='%u'),],
+                 need_cs_param=True)
 
     def flag_bits(args):
         bits = [Arg(type='enum intel_ds_stall_flag', name='flags', var='decode_cb(flags)', c_format='0x%x')]
index 042d36c..166595a 100644 (file)
@@ -227,14 +227,19 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
       assert(queue->family->engine_class == INTEL_ENGINE_CLASS_RENDER ||
              queue->family->engine_class == INTEL_ENGINE_CLASS_COMPUTE);
       if (queue->family->engine_class == INTEL_ENGINE_CLASS_RENDER) {
+
+         trace_intel_begin_trace_copy_cb(&submit->ds.trace, &submit->batch);
+
          anv_genX(device->info, emit_so_memcpy_init)(&submit->memcpy_state,
                                                      device,
                                                      &submit->batch);
+         uint32_t num_traces = 0;
          for (uint32_t i = 0; i < cmd_buffer_count; i++) {
             if (cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) {
                intel_ds_queue_flush_data(&queue->ds, &cmd_buffers[i]->trace,
                                          &submit->ds, false);
             } else {
+               num_traces += cmd_buffers[i]->trace.num_traces;
                u_trace_clone_append(u_trace_begin_iterator(&cmd_buffers[i]->trace),
                                     u_trace_end_iterator(&cmd_buffers[i]->trace),
                                     &submit->ds.trace,
@@ -244,8 +249,13 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
          }
          anv_genX(device->info, emit_so_memcpy_fini)(&submit->memcpy_state);
 
+         trace_intel_end_trace_copy_cb(&submit->ds.trace, &submit->batch,
+                                       num_traces);
+
          anv_genX(device->info, emit_so_memcpy_end)(&submit->memcpy_state);
       } else {
+         trace_intel_begin_trace_copy_cb(&submit->ds.trace, &submit->batch);
+
          submit->simple_state = (struct anv_simple_shader) {
             .device               = device,
             .dynamic_state_stream = &submit->dynamic_state_stream,
@@ -257,11 +267,14 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
          };
          anv_genX(device->info, emit_simple_shader_init)(&submit->simple_state);
 
+         uint32_t num_traces = 0;
          for (uint32_t i = 0; i < cmd_buffer_count; i++) {
+            num_traces += cmd_buffers[i]->trace.num_traces;
             if (cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) {
                intel_ds_queue_flush_data(&queue->ds, &cmd_buffers[i]->trace,
                                          &submit->ds, false);
             } else {
+               num_traces += cmd_buffers[i]->trace.num_traces;
                u_trace_clone_append(u_trace_begin_iterator(&cmd_buffers[i]->trace),
                                     u_trace_end_iterator(&cmd_buffers[i]->trace),
                                     &submit->ds.trace,
@@ -270,6 +283,9 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
             }
          }
 
+         trace_intel_end_trace_copy_cb(&submit->ds.trace, &submit->batch,
+                                       num_traces);
+
          anv_genX(device->info, emit_simple_shader_end)(&submit->simple_state);
       }
 
index 71a5d02..92f59ba 100644 (file)
@@ -3794,9 +3794,11 @@ genX(CmdExecuteCommands)(
 
       struct anv_memcpy_state memcpy_state;
       genX(emit_so_memcpy_init)(&memcpy_state, device, &primary->batch);
+      uint32_t num_traces = 0;
       for (uint32_t i = 0; i < commandBufferCount; i++) {
          ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]);
 
+         num_traces += secondary->trace.num_traces;
          u_trace_clone_append(u_trace_begin_iterator(&secondary->trace),
                               u_trace_end_iterator(&secondary->trace),
                               &primary->trace,
@@ -3805,7 +3807,7 @@ genX(CmdExecuteCommands)(
       }
       genX(emit_so_memcpy_fini)(&memcpy_state);
 
-      trace_intel_end_trace_copy(&primary->trace);
+      trace_intel_end_trace_copy(&primary->trace, num_traces);
 
       /* Memcpy is done using the 3D pipeline. */
       primary->state.current_pipeline = _3D;