anv: add perfetto source
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Sun, 21 Nov 2021 16:23:57 +0000 (18:23 +0200)
committerMarge Bot <emma+marge@anholt.net>
Fri, 14 Jan 2022 20:17:44 +0000 (20:17 +0000)
v2: Increase custom stall data (Felix)
    Fixup build (Felix)

v3: Add API enum (Rohan)
    Fixup old comment (Rohan)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Rohan Garg <rohan.garg@intel.com>
Acked-by: Antonio Caggiano <antonio.caggiano@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13996>

16 files changed:
docs/perfetto.rst
src/intel/ds/intel_driver_ds.cc [new file with mode: 0644]
src/intel/ds/intel_driver_ds.h [new file with mode: 0644]
src/intel/ds/intel_tracepoints.py [moved from src/intel/vulkan/anv_tracepoints.py with 70% similarity]
src/intel/ds/meson.build
src/intel/meson.build
src/intel/vulkan/anv_batch_chain.c
src/intel/vulkan/anv_cmd_buffer.c
src/intel/vulkan/anv_device.c
src/intel/vulkan/anv_private.h
src/intel/vulkan/anv_queue.c
src/intel/vulkan/anv_utrace.c
src/intel/vulkan/anv_wsi.c
src/intel/vulkan/genX_blorp_exec.c
src/intel/vulkan/genX_cmd_buffer.c
src/intel/vulkan/meson.build

index 689031d..c9b39ec 100644 (file)
@@ -161,6 +161,14 @@ set of HW counters :
 
    INTEL_PERFETTO_METRIC_SET=RasterizerAndPixelBackend ./build/src/tool/pps/pps-producer
 
+Vulkan applications can also be instrumented to be Perfetto producers.
+To enable this for given application, set the environment variable as
+follow :
+
+.. code-block:: console
+
+   PERFETTO_TRACE=1 my_vulkan_app
+
 Panfrost
 ^^^^^^^^
 
diff --git a/src/intel/ds/intel_driver_ds.cc b/src/intel/ds/intel_driver_ds.cc
new file mode 100644 (file)
index 0000000..d8b1a69
--- /dev/null
@@ -0,0 +1,585 @@
+/*
+ * Copyright © 2021 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+
+#include "common/intel_gem.h"
+#include "perf/intel_perf.h"
+
+#include "util/hash_table.h"
+#include "util/u_process.h"
+
+#include "intel_driver_ds.h"
+#include "intel_pps_priv.h"
+#include "intel_tracepoints.h"
+
+#ifdef HAVE_PERFETTO
+
+#include "util/u_perfetto.h"
+
+#include "intel_tracepoints_perfetto.h"
+
+/* Just naming stages */
+static const struct {
+   const char *name;
+
+   /* Tells us if a given stage is pipelined. This is used to build stacks of
+    * pipelined elements so that the perfetto UI doesn't get confused by elements
+    * ending out of order.
+    */
+   bool pipelined;
+
+   /* The perfetto UI requires that there is a parent-child relationship
+    * within a row of elements. Which means that all children elements must
+    * end within the lifespan of their parent.
+    *
+    * Some elements like stalls and command buffers follow that relationship,
+    * but not all. This tells us in which UI row the elements should live.
+    */
+   enum intel_ds_queue_stage draw_stage;
+} intel_queue_stage_desc[INTEL_DS_QUEUE_STAGE_N_STAGES] = {
+   /* Order must match the enum! */
+   {
+      "cmd-buffer",
+      false,
+      INTEL_DS_QUEUE_STAGE_CMD_BUFFER,
+   },
+   {
+      "stall",
+      false,
+      INTEL_DS_QUEUE_STAGE_STALL,
+   },
+   {
+      "compute",
+      true,
+      INTEL_DS_QUEUE_STAGE_COMPUTE,
+   },
+   {
+      "render-pass",
+      true,
+      INTEL_DS_QUEUE_STAGE_RENDER_PASS,
+   },
+   {
+      "blorp",
+      true,
+      INTEL_DS_QUEUE_STAGE_BLORP,
+   },
+   {
+      "draw",
+      true,
+      INTEL_DS_QUEUE_STAGE_DRAW,
+   },
+};
+
+struct IntelRenderpassIncrementalState {
+   bool was_cleared = true;
+};
+
+struct IntelRenderpassTraits : public perfetto::DefaultDataSourceTraits {
+   using IncrementalStateType = IntelRenderpassIncrementalState;
+};
+
+class IntelRenderpassDataSource : public perfetto::DataSource<IntelRenderpassDataSource,
+                                                            IntelRenderpassTraits> {
+public:
+   void OnSetup(const SetupArgs &) override
+   {
+      // Use this callback to apply any custom configuration to your data source
+      // based on the TraceConfig in SetupArgs.
+   }
+
+   void OnStart(const StartArgs &) override
+   {
+      // This notification can be used to initialize the GPU driver, enable
+      // counters, etc. StartArgs will contains the DataSourceDescriptor,
+      // which can be extended.
+      u_trace_perfetto_start();
+      PERFETTO_LOG("Tracing started");
+   }
+
+   void OnStop(const StopArgs &) override
+   {
+      PERFETTO_LOG("Tracing stopped");
+
+      // Undo any initialization done in OnStart.
+      u_trace_perfetto_stop();
+      // TODO we should perhaps block until queued traces are flushed?
+
+      Trace([](IntelRenderpassDataSource::TraceContext ctx) {
+         auto packet = ctx.NewTracePacket();
+         packet->Finalize();
+         ctx.Flush();
+      });
+   }
+};
+
+PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(IntelRenderpassDataSource);
+PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(IntelRenderpassDataSource);
+
+using perfetto::protos::pbzero::InternedGpuRenderStageSpecification_RenderStageCategory;
+
+enum InternedGpuRenderStageSpecification_RenderStageCategory
+i915_engine_class_to_category(enum drm_i915_gem_engine_class engine_class)
+{
+   switch (engine_class) {
+   case I915_ENGINE_CLASS_RENDER:
+      return InternedGpuRenderStageSpecification_RenderStageCategory::
+         InternedGpuRenderStageSpecification_RenderStageCategory_GRAPHICS;
+   default:
+      return InternedGpuRenderStageSpecification_RenderStageCategory::InternedGpuRenderStageSpecification_RenderStageCategory_OTHER;
+   }
+}
+
+static void
+sync_timestamp(IntelRenderpassDataSource::TraceContext &ctx,
+               struct intel_ds_device *device)
+{
+   uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
+   uint64_t gpu_ts = intel_device_info_timebase_scale(&device->info,
+                                                      intel_read_gpu_timestamp(device->fd));
+
+   if (cpu_ts < device->next_clock_sync_ns)
+      return;
+
+   PERFETTO_LOG("sending clocks gpu=0x%08x", device->gpu_clock_id);
+
+   device->sync_gpu_ts = gpu_ts;
+   device->next_clock_sync_ns = cpu_ts + 1000000000ull;
+
+   auto packet = ctx.NewTracePacket();
+
+   packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
+   packet->set_timestamp(cpu_ts);
+
+   auto event = packet->set_clock_snapshot();
+   {
+      auto clock = event->add_clocks();
+
+      clock->set_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
+      clock->set_timestamp(cpu_ts);
+   }
+   {
+      auto clock = event->add_clocks();
+
+      clock->set_clock_id(device->gpu_clock_id);
+      clock->set_timestamp(gpu_ts);
+   }
+}
+
+static void
+send_descriptors(IntelRenderpassDataSource::TraceContext &ctx,
+                 struct intel_ds_device *device)
+{
+   struct intel_ds_queue *queue;
+
+   PERFETTO_LOG("Sending renderstage descriptors");
+
+   device->event_id = 0;
+   u_vector_foreach(queue, &device->queues) {
+      for (uint32_t s = 0; s < ARRAY_SIZE(queue->stages); s++) {
+         queue->stages[s].start_ns = 0;
+      }
+   }
+
+   {
+      auto packet = ctx.NewTracePacket();
+
+      packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
+      packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
+      packet->set_sequence_flags(perfetto::protos::pbzero::TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
+
+      auto interned_data = packet->set_interned_data();
+
+      {
+         auto desc = interned_data->add_graphics_contexts();
+         desc->set_iid(device->iid);
+         desc->set_pid(getpid());
+         switch (device->api) {
+         case INTEL_DS_API_OPENGL:
+            desc->set_api(perfetto::protos::pbzero::InternedGraphicsContext_Api_OPEN_GL);
+            break;
+         case INTEL_DS_API_VULKAN:
+            desc->set_api(perfetto::protos::pbzero::InternedGraphicsContext_Api_VULKAN);
+            break;
+         default:
+            break;
+         }
+      }
+
+      /* Emit all the IID picked at device/queue creation. */
+      u_vector_foreach(queue, &device->queues) {
+         for (unsigned s = 0; s < INTEL_DS_QUEUE_STAGE_N_STAGES; s++) {
+            {
+               /* We put the stage number in there so that all rows are order
+                * by intel_ds_queue_stage.
+                */
+               char name[100];
+               snprintf(name, sizeof(name), "%.10s-%s-%u-%s",
+                        util_get_process_name(),
+                        queue->name, s, intel_queue_stage_desc[s].name);
+
+               auto desc = interned_data->add_gpu_specifications();
+               desc->set_iid(queue->stages[s].queue_iid);
+               desc->set_name(name);
+            }
+            {
+               auto desc = interned_data->add_gpu_specifications();
+               desc->set_iid(queue->stages[s].stage_iid);
+               desc->set_name(intel_queue_stage_desc[s].name);
+            }
+         }
+      }
+   }
+
+   device->next_clock_sync_ns = 0;
+   sync_timestamp(ctx, device);
+}
+
+typedef void (*trace_payload_as_extra_func)(perfetto::protos::pbzero::GpuRenderStageEvent *, const void*);
+
+static void
+begin_event(struct intel_ds_queue *queue, uint64_t ts_ns,
+            enum intel_ds_queue_stage stage_id)
+{
+   /* If we haven't managed to calibrate the alignment between GPU and CPU
+    * timestamps yet, then skip this trace, otherwise perfetto won't know
+    * what to do with it.
+    */
+   if (!queue->device->sync_gpu_ts) {
+      queue->stages[stage_id].start_ns = 0;
+      return;
+   }
+
+   queue->stages[stage_id].start_ns = ts_ns;
+}
+
+static void
+end_event(struct intel_ds_queue *queue, uint64_t ts_ns,
+          enum intel_ds_queue_stage stage_id,
+          uint32_t submission_id, const void* payload = nullptr,
+          trace_payload_as_extra_func payload_as_extra = nullptr)
+{
+   struct intel_ds_device *device = queue->device;
+
+   /* If we haven't managed to calibrate the alignment between GPU and CPU
+    * timestamps yet, then skip this trace, otherwise perfetto won't know
+    * what to do with it.
+    */
+   if (!device->sync_gpu_ts)
+      return;
+
+   struct intel_ds_stage *stage = &queue->stages[stage_id];
+   uint64_t start_ns = stage->start_ns;
+
+   if (!start_ns)
+      return;
+
+   uint64_t evt_id = device->event_id++;
+
+   IntelRenderpassDataSource::Trace([=](IntelRenderpassDataSource::TraceContext tctx) {
+      if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
+         send_descriptors(tctx, queue->device);
+         state->was_cleared = false;
+      }
+
+      sync_timestamp(tctx, queue->device);
+
+      auto packet = tctx.NewTracePacket();
+
+      packet->set_timestamp(start_ns);
+      packet->set_timestamp_clock_id(queue->device->gpu_clock_id);
+
+      assert(ts_ns >= start_ns);
+
+      auto event = packet->set_gpu_render_stage_event();
+      event->set_gpu_id(queue->device->gpu_id);
+
+      event->set_hw_queue_iid(stage->queue_iid);
+      event->set_stage_iid(stage->stage_iid);
+      event->set_context(queue->device->iid);
+      event->set_event_id(evt_id);
+      event->set_duration(ts_ns - start_ns);
+      event->set_submission_id(submission_id);
+
+      if (payload && payload_as_extra) {
+         payload_as_extra(event, payload);
+      }
+   });
+
+   stage->start_ns = 0;
+}
+
+static void
+custom_trace_payload_as_extra_end_stall(perfetto::protos::pbzero::GpuRenderStageEvent *event,
+                                        const struct trace_intel_end_stall *payload)
+{
+   char buf[256];
+
+   {
+      auto data = event->add_extra_data();
+      data->set_name("stall_reason");
+
+      snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s : %s",
+              (payload->flags & INTEL_DS_DEPTH_CACHE_FLUSH_BIT) ? "+depth_flush" : "",
+              (payload->flags & INTEL_DS_DATA_CACHE_FLUSH_BIT) ? "+dc_flush" : "",
+              (payload->flags & INTEL_DS_HDC_PIPELINE_FLUSH_BIT) ? "+hdc_flush" : "",
+              (payload->flags & INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT) ? "+rt_flush" : "",
+              (payload->flags & INTEL_DS_TILE_CACHE_FLUSH_BIT) ? "+tile_flush" : "",
+              (payload->flags & INTEL_DS_STATE_CACHE_INVALIDATE_BIT) ? "+state_inv" : "",
+              (payload->flags & INTEL_DS_CONST_CACHE_INVALIDATE_BIT) ? "+const_inv" : "",
+              (payload->flags & INTEL_DS_VF_CACHE_INVALIDATE_BIT) ? "+vf_inv" : "",
+              (payload->flags & INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT) ? "+tex_inv" : "",
+              (payload->flags & INTEL_DS_INST_CACHE_INVALIDATE_BIT) ? "+inst_inv" : "",
+              (payload->flags & INTEL_DS_STALL_AT_SCOREBOARD_BIT) ? "+pb_stall" : "",
+              (payload->flags & INTEL_DS_DEPTH_STALL_BIT) ? "+depth_stall" : "",
+              (payload->flags & INTEL_DS_HDC_PIPELINE_FLUSH_BIT) ? "+hdc_flush" : "",
+              (payload->flags & INTEL_DS_CS_STALL_BIT) ? "+cs_stall" : "",
+              payload->reason ? payload->reason : "unknown");
+
+      assert(strlen(buf) > 0);
+
+      data->set_value(buf);
+   }
+}
+
+#endif /* HAVE_PERFETTO */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef HAVE_PERFETTO
+
+/*
+ * Trace callbacks, called from u_trace once the timestamps from GPU have been
+ * collected.
+ */
+
+#define CREATE_DUAL_EVENT_CALLBACK(event_name, stage)                   \
+   void                                                                 \
+   intel_ds_begin_##event_name(struct intel_ds_device *device,          \
+                               uint64_t ts_ns,                          \
+                               const void *flush_data,                  \
+                               const struct trace_intel_begin_##event_name *payload) \
+   {                                                                    \
+      const struct intel_ds_flush_data *flush =                         \
+         (const struct intel_ds_flush_data *) flush_data;               \
+      begin_event(flush->queue, ts_ns, stage);                          \
+   }                                                                    \
+                                                                        \
+   void                                                                 \
+   intel_ds_end_##event_name(struct intel_ds_device *device,            \
+                             uint64_t ts_ns,                            \
+                             const void *flush_data,                    \
+                             const struct trace_intel_end_##event_name *payload) \
+   {                                                                    \
+      const struct intel_ds_flush_data *flush =                         \
+         (const struct intel_ds_flush_data *) flush_data;               \
+      end_event(flush->queue, ts_ns, stage, flush->submission_id,       \
+                payload,                                                \
+                (trace_payload_as_extra_func)                           \
+                &trace_payload_as_extra_intel_end_##event_name);        \
+   }                                                                    \
+
+
+CREATE_DUAL_EVENT_CALLBACK(cmd_buffer, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
+CREATE_DUAL_EVENT_CALLBACK(render_pass, INTEL_DS_QUEUE_STAGE_RENDER_PASS)
+CREATE_DUAL_EVENT_CALLBACK(blorp, INTEL_DS_QUEUE_STAGE_BLORP)
+CREATE_DUAL_EVENT_CALLBACK(draw, INTEL_DS_QUEUE_STAGE_DRAW)
+CREATE_DUAL_EVENT_CALLBACK(draw_indexed, INTEL_DS_QUEUE_STAGE_DRAW)
+CREATE_DUAL_EVENT_CALLBACK(draw_indexed_multi, INTEL_DS_QUEUE_STAGE_DRAW)
+CREATE_DUAL_EVENT_CALLBACK(draw_indexed_indirect, INTEL_DS_QUEUE_STAGE_DRAW)
+CREATE_DUAL_EVENT_CALLBACK(draw_multi, INTEL_DS_QUEUE_STAGE_DRAW)
+CREATE_DUAL_EVENT_CALLBACK(draw_indirect, INTEL_DS_QUEUE_STAGE_DRAW)
+CREATE_DUAL_EVENT_CALLBACK(draw_indirect_count, INTEL_DS_QUEUE_STAGE_DRAW)
+CREATE_DUAL_EVENT_CALLBACK(draw_indirect_byte_count, INTEL_DS_QUEUE_STAGE_DRAW)
+CREATE_DUAL_EVENT_CALLBACK(draw_indexed_indirect_count, INTEL_DS_QUEUE_STAGE_DRAW)
+CREATE_DUAL_EVENT_CALLBACK(compute, INTEL_DS_QUEUE_STAGE_COMPUTE)
+
+void
+intel_ds_begin_stall(struct intel_ds_device *device,
+                     uint64_t ts_ns,
+                     const void *flush_data,
+                     const struct trace_intel_begin_stall *payload)
+{
+   const struct intel_ds_flush_data *flush =
+      (const struct intel_ds_flush_data *) flush_data;
+   begin_event(flush->queue, ts_ns, INTEL_DS_QUEUE_STAGE_STALL);
+}
+
+void
+intel_ds_end_stall(struct intel_ds_device *device,
+                   uint64_t ts_ns,
+                   const void *flush_data,
+                   const struct trace_intel_end_stall *payload)
+{
+   const struct intel_ds_flush_data *flush =
+      (const struct intel_ds_flush_data *) flush_data;
+   end_event(flush->queue, ts_ns, INTEL_DS_QUEUE_STAGE_STALL, flush->submission_id,
+             payload,
+             (trace_payload_as_extra_func)custom_trace_payload_as_extra_end_stall);
+}
+
+uint64_t
+intel_ds_begin_submit(struct intel_ds_queue *queue)
+{
+   return perfetto::base::GetBootTimeNs().count();
+}
+
+void
+intel_ds_end_submit(struct intel_ds_queue *queue,
+                    uint64_t start_ts)
+{
+   if (!u_trace_context_actively_tracing(&queue->device->trace_context)) {
+      queue->device->sync_gpu_ts = 0;
+      queue->device->next_clock_sync_ns = 0;
+      return;
+   }
+
+   uint64_t end_ts = perfetto::base::GetBootTimeNs().count();
+   uint32_t submission_id = queue->submission_id++;
+
+   IntelRenderpassDataSource::Trace([=](IntelRenderpassDataSource::TraceContext tctx) {
+      if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
+         send_descriptors(tctx, queue->device);
+         state->was_cleared = false;
+      }
+
+      sync_timestamp(tctx, queue->device);
+
+      auto packet = tctx.NewTracePacket();
+
+      packet->set_timestamp(start_ts);
+
+      auto event = packet->set_vulkan_api_event();
+      auto submit = event->set_vk_queue_submit();
+
+      // submit->set_pid(os_get_pid());
+      // submit->set_tid(os_get_tid());
+      submit->set_duration_ns(end_ts - start_ts);
+      submit->set_vk_queue((uintptr_t) queue);
+      submit->set_submission_id(submission_id);
+   });
+}
+
+#endif /* HAVE_PERFETTO */
+
+static void
+intel_driver_ds_init_once(void)
+{
+#ifdef HAVE_PERFETTO
+   util_perfetto_init();
+   perfetto::DataSourceDescriptor dsd;
+   dsd.set_name("gpu.renderstages.intel");
+   IntelRenderpassDataSource::Register(dsd);
+#endif
+}
+
+static once_flag intel_driver_ds_once_flag = ONCE_FLAG_INIT;
+
+static uint64_t get_iid()
+{
+   static uint64_t iid = 1;
+   return iid++;
+}
+
+void
+intel_driver_ds_init(void)
+{
+   call_once(&intel_driver_ds_once_flag,
+             intel_driver_ds_init_once);
+}
+
+void
+intel_ds_device_init(struct intel_ds_device *device,
+                     struct intel_device_info *devinfo,
+                     int drm_fd,
+                     uint32_t gpu_id,
+                     enum intel_ds_api api)
+{
+   memset(device, 0, sizeof(*device));
+
+   assert(gpu_id < 128);
+   device->gpu_id = gpu_id;
+   device->gpu_clock_id = intel_pps_clock_id(gpu_id);
+   device->fd = drm_fd;
+   device->info = *devinfo;
+   device->iid = get_iid();
+   device->api = api;
+   u_vector_init(&device->queues, 4, sizeof(struct intel_ds_queue));
+}
+
+void
+intel_ds_device_fini(struct intel_ds_device *device)
+{
+   u_trace_context_fini(&device->trace_context);
+   u_vector_finish(&device->queues);
+}
+
+struct intel_ds_queue *
+intel_ds_device_add_queue(struct intel_ds_device *device,
+                          const char *fmt_name,
+                          ...)
+{
+   struct intel_ds_queue *queue =
+      (struct intel_ds_queue *) u_vector_add(&device->queues);
+   va_list ap;
+
+   memset(queue, 0, sizeof(*queue));
+
+   queue->device = device;
+   queue->queue_id = u_vector_length(&device->queues) - 1;
+
+   va_start(ap, fmt_name);
+   vsnprintf(queue->name, sizeof(queue->name), fmt_name, ap);
+   va_end(ap);
+
+   for (unsigned s = 0; s < INTEL_DS_QUEUE_STAGE_N_STAGES; s++) {
+      queue->stages[s].queue_iid = get_iid();
+      queue->stages[s].stage_iid = get_iid();
+   }
+
+   return queue;
+}
+
+void intel_ds_flush_data_init(struct intel_ds_flush_data *data,
+                              struct intel_ds_queue *queue,
+                              uint64_t submission_id)
+{
+   memset(data, 0, sizeof(*data));
+
+   data->queue = queue;
+   data->submission_id = submission_id;
+
+   u_trace_init(&data->trace, &queue->device->trace_context);
+}
+
+void intel_ds_flush_data_fini(struct intel_ds_flush_data *data)
+{
+   u_trace_fini(&data->trace);
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/intel/ds/intel_driver_ds.h b/src/intel/ds/intel_driver_ds.h
new file mode 100644 (file)
index 0000000..8b833d1
--- /dev/null
@@ -0,0 +1,193 @@
+/*
+ * Copyright © 2021 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef INTEL_DRIVER_DS_H
+#define INTEL_DRIVER_DS_H
+
+#include <stdint.h>
+
+#include "util/macros.h"
+#include "util/perf/u_trace.h"
+#include "util/u_vector.h"
+
+#include "dev/intel_device_info.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum intel_ds_api {
+   INTEL_DS_API_OPENGL,
+   INTEL_DS_API_VULKAN,
+};
+
+enum intel_ds_stall_flag {
+   INTEL_DS_DEPTH_CACHE_FLUSH_BIT         = BITFIELD_BIT(0),
+   INTEL_DS_DATA_CACHE_FLUSH_BIT          = BITFIELD_BIT(1),
+   INTEL_DS_HDC_PIPELINE_FLUSH_BIT        = BITFIELD_BIT(2),
+   INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT = BITFIELD_BIT(3),
+   INTEL_DS_TILE_CACHE_FLUSH_BIT          = BITFIELD_BIT(4),
+   INTEL_DS_STATE_CACHE_INVALIDATE_BIT    = BITFIELD_BIT(5),
+   INTEL_DS_CONST_CACHE_INVALIDATE_BIT    = BITFIELD_BIT(6),
+   INTEL_DS_VF_CACHE_INVALIDATE_BIT       = BITFIELD_BIT(7),
+   INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT  = BITFIELD_BIT(8),
+   INTEL_DS_INST_CACHE_INVALIDATE_BIT     = BITFIELD_BIT(9),
+   INTEL_DS_STALL_AT_SCOREBOARD_BIT       = BITFIELD_BIT(10),
+   INTEL_DS_DEPTH_STALL_BIT               = BITFIELD_BIT(11),
+   INTEL_DS_CS_STALL_BIT                  = BITFIELD_BIT(12),
+};
+
+/* Convert internal driver PIPE_CONTROL stall bits to intel_ds_stall_flag. */
+typedef enum intel_ds_stall_flag (*intel_ds_stall_cb_t)(uint32_t flags);
+
+enum intel_ds_queue_stage {
+   INTEL_DS_QUEUE_STAGE_CMD_BUFFER,
+   INTEL_DS_QUEUE_STAGE_STALL,
+   INTEL_DS_QUEUE_STAGE_COMPUTE,
+   INTEL_DS_QUEUE_STAGE_RENDER_PASS,
+   INTEL_DS_QUEUE_STAGE_BLORP,
+   INTEL_DS_QUEUE_STAGE_DRAW,
+   INTEL_DS_QUEUE_STAGE_N_STAGES,
+};
+
+struct intel_ds_device {
+   struct intel_device_info info;
+
+   /* DRM fd */
+   int fd;
+
+   /* API of this device */
+   enum intel_ds_api api;
+
+   /* GPU identifier (minor number) */
+   uint32_t gpu_id;
+
+   /* Clock identifier for this device. */
+   uint32_t gpu_clock_id;
+
+   /* The timestamp at the point where we first emitted the clock_sync..
+    * this  will be a *later* timestamp that the first GPU traces (since
+    * we capture the first clock_sync from the CPU *after* the first GPU
+    * tracepoints happen).  To avoid confusing perfetto we need to drop
+    * the GPU traces with timestamps before this.
+    */
+   uint64_t sync_gpu_ts;
+
+   /* Next timestamp after which we should resend a clock correlation. */
+   uint64_t next_clock_sync_ns;
+
+   /* Unique perfetto identifier for the context */
+   uint64_t iid;
+
+   /* Event ID generator */
+   uint64_t event_id;
+
+   struct u_trace_context trace_context;
+
+   /* List of intel_ds_queue */
+   struct u_vector queues;
+};
+
+struct intel_ds_stage {
+   /* Unique hw_queue IID */
+   uint64_t queue_iid;
+
+   /* Unique stage IID */
+   uint64_t stage_iid;
+
+   /* Start timestamp of the last work element */
+   uint64_t start_ns;
+};
+
+struct intel_ds_queue {
+   /* Device this queue belongs to */
+   struct intel_ds_device *device;
+
+   /* Unique queue ID across the device */
+   uint32_t queue_id;
+
+   /* Unique name of the queue */
+   char name[80];
+
+   /* Counter incremented on each intel_ds_end_submit() call */
+   uint64_t submission_id;
+
+   struct intel_ds_stage stages[INTEL_DS_QUEUE_STAGE_N_STAGES];
+};
+
+struct intel_ds_flush_data {
+   struct intel_ds_queue *queue;
+
+   /* u_trace element in which we copy other traces in case we deal with
+    * reusable command buffers.
+    */
+   struct u_trace trace;
+
+   /* Unique submission ID associated with the trace */
+   uint64_t submission_id;
+};
+
+void intel_driver_ds_init(void);
+
+void intel_ds_device_init(struct intel_ds_device *device,
+                          struct intel_device_info *devinfo,
+                          int drm_fd,
+                          uint32_t gpu_id,
+                          enum intel_ds_api api);
+void intel_ds_device_fini(struct intel_ds_device *device);
+
+struct intel_ds_queue *intel_ds_device_add_queue(struct intel_ds_device *device,
+                                                 const char *fmt_name,
+                                                 ...);
+
+void intel_ds_flush_data_init(struct intel_ds_flush_data *data,
+                              struct intel_ds_queue *queue,
+                              uint64_t submission_id);
+
+void intel_ds_flush_data_fini(struct intel_ds_flush_data *data);
+
+#ifdef HAVE_PERFETTO
+
+uint64_t intel_ds_begin_submit(struct intel_ds_queue *queue);
+void intel_ds_end_submit(struct intel_ds_queue *queue,
+                         uint64_t start_ts);
+
+#else
+
+static inline uint64_t intel_ds_begin_submit(struct intel_ds_queue *queue)
+{
+   return 0;
+}
+
+static inline void intel_ds_end_submit(struct intel_ds_queue *queue,
+                                       uint64_t start_ts)
+{
+}
+
+#endif /* HAVE_PERFETTO */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* INTEL_DRIVER_DS_H */
similarity index 70%
rename from src/intel/vulkan/anv_tracepoints.py
rename to src/intel/ds/intel_tracepoints.py
index ef9a373..9a776a2 100644 (file)
@@ -34,14 +34,18 @@ def define_tracepoints(args):
     from u_trace import TracepointArg as Arg
     from u_trace import TracepointArgStruct as ArgStruct
 
-    Header('anv_private.h', scope=HeaderScope.SOURCE)
+    Header('intel_driver_ds.h', scope=HeaderScope.SOURCE)
     Header('blorp/blorp_priv.h', scope=HeaderScope.HEADER)
+    Header('ds/intel_driver_ds.h', scope=HeaderScope.HEADER)
 
-    def begin_end_tp(name, tp_args=[], tp_struct=None, end_pipelined=True):
-        Tracepoint('begin_{0}'.format(name))
-        Tracepoint('end_{0}'.format(name),
+    def begin_end_tp(name, tp_args=[], tp_struct=None, tp_print=None, end_pipelined=True):
+        Tracepoint('intel_begin_{0}'.format(name),
+                   tp_perfetto='intel_ds_begin_{0}'.format(name))
+        Tracepoint('intel_end_{0}'.format(name),
                    args=tp_args,
                    tp_struct=tp_struct,
+                   tp_perfetto='intel_ds_end_{0}'.format(name),
+                   tp_print=tp_print,
                    end_of_pipe=end_pipelined)
 
 
@@ -73,8 +77,8 @@ def define_tracepoints(args):
                         Arg(type='uint32_t', name='height', var='height', c_format='%u'),
                             Arg(type='enum isl_aux_op', name='hiz_op', var='hiz_op', c_format='%s', to_prim_type='isl_aux_op_to_name({})'),
                             Arg(type='enum isl_aux_op', name='fast_clear_op', var='fast_clear_op', c_format='%s', to_prim_type='isl_aux_op_to_name({})'),
-                            Arg(type='enum blorp_shader_type', name='type', var='shader_type', c_format='%s', to_prim_type='blorp_shader_type_to_name({})'),
-                            Arg(type='enum blorp_shader_pipeline', name='pipe', var='shader_pipe', c_format='%s', to_prim_type='blorp_shader_pipeline_to_name({})'),])
+                            Arg(type='enum blorp_shader_type', name='blorp_type', var='shader_type', c_format='%s', to_prim_type='blorp_shader_type_to_name({})'),
+                            Arg(type='enum blorp_shader_pipeline', name='blorp_pipe', var='shader_pipe', c_format='%s', to_prim_type='blorp_shader_pipeline_to_name({})'),])
 
     begin_end_tp('draw',
                  tp_args=[ArgStruct(type='uint32_t', var='count'),],
@@ -110,42 +114,58 @@ def define_tracepoints(args):
                           ArgStruct(type='uint32_t', var='group_z'),],
                  tp_struct=[Arg(type='uint32_t', name='group_x', var='group_x', c_format='%u'),
                             Arg(type='uint32_t', name='group_y', var='group_y', c_format='%u'),
-                            Arg(type='uint32_t', name='group_z', var='group_z', c_format='%u'),])
+                            Arg(type='uint32_t', name='group_z', var='group_z', c_format='%u'),],
+                 tp_print=['group=%ux%ux%u', '__entry->group_x', '__entry->group_y', '__entry->group_z'])
+
+    def flag_bits(args):
+        bits = [Arg(type='enum intel_ds_stall_flag', name='flags', var='decode_cb(flags)', c_format='0x%x')]
+        for a in args:
+            bits.append(Arg(type='bool', name=a[1], var='__entry->flags & INTEL_DS_{0}_BIT'.format(a[0]), c_format='%u'))
+        return bits
 
     def stall_args(args):
         fmt = ''
         exprs = []
         for a in args:
             fmt += '%s'
-            exprs.append('(__entry->flags & ANV_PIPE_{0}_BIT) ? "+{1}" : ""'.format(a[0], a[1]))
+            exprs.append('(__entry->flags & INTEL_DS_{0}_BIT) ? "+{1}" : ""'.format(a[0], a[1]))
+        fmt += ' : %s'
+        exprs.append('__entry->reason ? __entry->reason : "unknown"')
         fmt = [fmt]
         fmt += exprs
         return fmt
 
-    Tracepoint('stall',
-               args=[ArgStruct(type='uint32_t', var='flags'),],
-               tp_struct=[Arg(type='uint32_t', name='flags', var='flags', c_format='0x%x'),],
-               tp_print=stall_args([['DEPTH_CACHE_FLUSH', 'depth_flush'],
-                                    ['DATA_CACHE_FLUSH', 'dc_flush'],
-                                    ['HDC_PIPELINE_FLUSH', 'hdc_flush'],
-                                    ['RENDER_TARGET_CACHE_FLUSH', 'rt_flush'],
-                                    ['TILE_CACHE_FLUSH', 'tile_flush'],
-                                    ['STATE_CACHE_INVALIDATE', 'state_inval'],
-                                    ['CONSTANT_CACHE_INVALIDATE', 'const_inval'],
-                                    ['VF_CACHE_INVALIDATE', 'vf_inval'],
-                                    ['TEXTURE_CACHE_INVALIDATE', 'tex_inval'],
-                                    ['INSTRUCTION_CACHE_INVALIDATE', 'ic_inval'],
-                                    ['STALL_AT_SCOREBOARD', 'pb_stall'],
-                                    ['DEPTH_STALL', 'depth_stall'],
-                                    ['CS_STALL', 'cs_stall'],
-                                    ]))
-
+    stall_flags = [['DEPTH_CACHE_FLUSH',         'depth_flush'],
+                   ['DATA_CACHE_FLUSH',          'dc_flush'],
+                   ['HDC_PIPELINE_FLUSH',        'hdc_flush'],
+                   ['RENDER_TARGET_CACHE_FLUSH', 'rt_flush'],
+                   ['TILE_CACHE_FLUSH',          'tile_flush'],
+                   ['STATE_CACHE_INVALIDATE',    'state_inval'],
+                   ['CONST_CACHE_INVALIDATE',    'const_inval'],
+                   ['VF_CACHE_INVALIDATE',       'vf_inval'],
+                   ['TEXTURE_CACHE_INVALIDATE',  'tex_inval'],
+                   ['INST_CACHE_INVALIDATE',     'ic_inval'],
+                   ['STALL_AT_SCOREBOARD',       'pb_stall'],
+                   ['DEPTH_STALL',               'depth_stall'],
+                   ['CS_STALL',                  'cs_stall']]
+
+    begin_end_tp('stall',
+                 tp_args=[ArgStruct(type='uint32_t', var='flags'),
+                          ArgStruct(type='intel_ds_stall_cb_t', var='decode_cb'),
+                          ArgStruct(type='const char *', var='reason'),],
+                 tp_struct=[Arg(type='uint32_t', name='flags', var='decode_cb(flags)', c_format='0x%x'),
+                            Arg(type='const char *', name='reason', var='reason', c_format='%s'),],
+                 tp_print=stall_args(stall_flags),
+                 end_pipelined=False)
 
 
 def generate_code(args):
     from u_trace import utrace_generate
+    from u_trace import utrace_generate_perfetto_utils
 
-    utrace_generate(cpath=args.utrace_src, hpath=args.utrace_hdr, ctx_param='struct anv_device *dev')
+    utrace_generate(cpath=args.utrace_src, hpath=args.utrace_hdr,
+                    ctx_param='struct intel_ds_device *dev')
+    utrace_generate_perfetto_utils(hpath=args.perfetto_hdr)
 
 
 def main():
@@ -153,6 +173,7 @@ def main():
     parser.add_argument('-p', '--import-path', required=True)
     parser.add_argument('--utrace-src', required=True)
     parser.add_argument('--utrace-hdr', required=True)
+    parser.add_argument('--perfetto-hdr', required=True)
     args = parser.parse_args()
     sys.path.insert(0, args.import_path)
     define_tracepoints(args)
index b518bb7..15a4cd9 100644 (file)
@@ -3,30 +3,76 @@
 #
 # SPDX-License-Identifier: MIT
 
-pps_intel_sources = [
-  'intel_pps_perf.cc',
-  'intel_pps_driver.cc',
+intel_tracepoint_files = custom_target(
+  'intel_tracepoints.[ch]',
+  input : 'intel_tracepoints.py',
+  output : ['intel_tracepoints.h',
+            'intel_tracepoints_perfetto.h',
+            'intel_tracepoints.c'],
+  command : [
+    prog_python, '@INPUT@',
+    '-p', join_paths(meson.source_root(), 'src/util/perf/'),
+    '--utrace-hdr', '@OUTPUT0@',
+    '--perfetto-hdr', '@OUTPUT1@',
+    '--utrace-src', '@OUTPUT2@',
+  ],
+  depend_files : u_trace_py,
+)
+
+libintel_driver_ds_deps = [
+  idep_mesautil,
+  idep_nir_headers,
 ]
 
+if with_perfetto
+  libintel_driver_ds_deps += dep_perfetto
+endif
 
-pps_intel_lib = static_library(
-  'pps-intel',
-  sources: pps_intel_sources,
-  include_directories: [inc_tool, inc_src, inc_include, inc_intel],
-  link_with: [libintel_perf, libintel_dev],
-  dependencies: [dep_perfetto, dep_libdrm, idep_mesautil],
-  cpp_args: '-std=c++17'
+idep_intel_driver_ds_headers = declare_dependency(
+  sources : intel_tracepoint_files[0],
+  include_directories : [inc_include, inc_mapi, inc_mesa, inc_intel],
 )
 
-compile_args_pps_intel = ['-DPPS_INTEL']
+libintel_driver_ds = static_library(
+  'intel-driver-ds',
+  sources : ['intel_driver_ds.cc', intel_tracepoint_files],
+  include_directories : [inc_src, inc_include, inc_intel, inc_mapi, inc_mesa],
+  link_with : [libintel_perf, libintel_dev],
+  dependencies : libintel_driver_ds_deps,
+  cpp_args : '-std=c++17',
+  gnu_symbol_visibility : 'hidden',
+)
 
-pps_intel_dep = declare_dependency(
-  link_with: pps_intel_lib,
-  include_directories: [inc_tool, inc_include],
-  compile_args: compile_args_pps_intel,
+idep_intel_driver_ds = declare_dependency(
+  link_with : libintel_driver_ds,
+  include_directories : [inc_include, inc_mapi, inc_mesa, inc_intel],
 )
 
-pps_datasources += pps_intel_dep
-if not with_datasources.contains('intel')
-  with_datasources += 'intel'
+if with_perfetto and (with_datasources.contains('intel') or with_datasources.contains('auto'))
+  pps_intel_sources = files(
+    'intel_pps_perf.cc',
+    'intel_pps_driver.cc',
+  )
+
+  pps_intel_lib = static_library(
+    'pps-intel',
+    sources: pps_intel_sources,
+    include_directories: [inc_tool, inc_src, inc_include, inc_intel],
+    link_with: [libintel_perf, libintel_dev],
+    dependencies: [dep_perfetto, dep_libdrm, idep_mesautil],
+    cpp_args: '-std=c++17'
+  )
+
+  compile_args_pps_intel = ['-DPPS_INTEL']
+
+  pps_intel_dep = declare_dependency(
+    link_with: pps_intel_lib,
+    include_directories: [inc_tool, inc_include],
+    compile_args: compile_args_pps_intel,
+  )
+
+  pps_datasources += pps_intel_dep
+  if not with_datasources.contains('intel')
+    with_datasources += 'intel'
+  endif
 endif
index 4389de5..5d17755 100644 (file)
@@ -28,6 +28,7 @@ subdir('isl')
 subdir('common')
 subdir('compiler')
 subdir('perf')
+subdir('ds')
 if with_intel_tools
   subdir('tools')
 endif
@@ -37,6 +38,3 @@ endif
 if with_intel_vk
   subdir('vulkan')
 endif
-if with_perfetto and (with_datasources.contains('intel') or with_datasources.contains('auto'))
-  subdir('ds')
-endif
index 85062f1..1567732 100644 (file)
@@ -2374,10 +2374,15 @@ anv_queue_submit(struct vk_queue *vk_queue,
       return VK_SUCCESS;
    }
 
+   uint64_t start_ts = intel_ds_begin_submit(queue->ds);
+
    pthread_mutex_lock(&device->mutex);
    result = anv_queue_submit_locked(queue, submit);
+   /* Take submission ID under lock */
    pthread_mutex_unlock(&device->mutex);
 
+   intel_ds_end_submit(queue->ds, start_ts);
+
    return result;
 }
 
index d404b3a..d197f3e 100644 (file)
@@ -302,7 +302,7 @@ static VkResult anv_create_cmd_buffer(
 
    anv_measure_init(cmd_buffer);
 
-   u_trace_init(&cmd_buffer->trace, &device->trace_context);
+   u_trace_init(&cmd_buffer->trace, &device->ds.trace_context);
 
    *pCommandBuffer = anv_cmd_buffer_to_handle(cmd_buffer);
 
@@ -407,7 +407,7 @@ anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer)
    anv_measure_reset(cmd_buffer);
 
    u_trace_fini(&cmd_buffer->trace);
-   u_trace_init(&cmd_buffer->trace, &cmd_buffer->device->trace_context);
+   u_trace_init(&cmd_buffer->trace, &cmd_buffer->device->ds.trace_context);
 
    return VK_SUCCESS;
 }
index a11f35f..f3dad23 100644 (file)
@@ -1126,6 +1126,8 @@ VkResult anv_CreateInstance(
 
    anv_init_dri_options(instance);
 
+   intel_driver_ds_init();
+
    *pInstance = anv_instance_to_handle(instance);
 
    return VK_SUCCESS;
index 5e028f1..e2ae395 100644 (file)
@@ -52,6 +52,7 @@
 #include "blorp/blorp.h"
 #include "compiler/brw_compiler.h"
 #include "compiler/brw_rt.h"
+#include "ds/intel_driver_ds.h"
 #include "util/bitset.h"
 #include "util/bitscan.h"
 #include "util/macros.h"
@@ -1061,7 +1062,11 @@ struct anv_queue {
 
    const struct anv_queue_family *           family;
 
+   uint32_t                                  index_in_family;
+
    uint32_t                                  exec_flags;
+
+   struct intel_ds_queue *                   ds;
 };
 
 struct anv_pipeline_cache {
@@ -1216,7 +1221,7 @@ struct anv_device {
 
     struct intel_debug_block_frame              *debug_frame_desc;
 
-    struct u_trace_context                      trace_context;
+    struct intel_ds_device                       ds;
 };
 
 #if defined(GFX_VERx10) && GFX_VERx10 >= 90
@@ -2395,6 +2400,9 @@ enum anv_pipe_bits {
    ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | \
    ANV_PIPE_AUX_TABLE_INVALIDATE_BIT)
 
+enum intel_ds_stall_flag
+anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits);
+
 static inline enum anv_pipe_bits
 anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
                                      VkAccessFlags2KHR flags)
@@ -4549,16 +4557,25 @@ struct anv_memcpy_state {
 };
 
 struct anv_utrace_flush_copy {
-   struct u_trace trace;
+   /* Needs to be the first field */
+   struct intel_ds_flush_data ds;
 
+   /* Batch stuff to implement of copy of timestamps recorded in another
+    * buffer.
+    */
    struct anv_reloc_list relocs;
    struct anv_batch batch;
    struct anv_bo *batch_bo;
 
+   /* Buffer of 64bits timestamps */
    struct anv_bo *trace_bo;
 
+   /* Syncobj to be signaled when the batch completes */
    struct vk_sync *sync;
 
+   /* Queue on which all the recorded traces are submitted */
+   struct anv_queue *queue;
+
    struct anv_memcpy_state memcpy_state;
 };
 
@@ -4570,6 +4587,25 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
                                     struct anv_cmd_buffer **cmd_buffers,
                                     struct anv_utrace_flush_copy **out_flush_data);
 
+#ifdef HAVE_PERFETTO
+void anv_perfetto_init(void);
+uint64_t anv_perfetto_begin_submit(struct anv_queue *queue);
+void anv_perfetto_end_submit(struct anv_queue *queue, uint32_t submission_id,
+                             uint64_t start_ts);
+#else
+static inline void anv_perfetto_init(void)
+{
+}
+static inline uint64_t anv_perfetto_begin_submit(struct anv_queue *queue)
+{
+   return 0;
+}
+static inline void anv_perfetto_end_submit(struct anv_queue *queue,
+                                           uint32_t submission_id,
+                                           uint64_t start_ts)
+{}
+#endif
+
 
 #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \
    VK_FROM_HANDLE(__anv_type, __name, __handle)
index ccf799e..93d835c 100644 (file)
@@ -48,6 +48,8 @@ anv_queue_init(struct anv_device *device, struct anv_queue *queue,
    assert(queue->vk.queue_family_index < pdevice->queue.family_count);
    queue->family = &pdevice->queue.families[queue->vk.queue_family_index];
 
+   queue->index_in_family = index_in_family;
+
    queue->exec_flags = exec_flags;
 
    return VK_SUCCESS;
index 349fa55..8fbabbb 100644 (file)
@@ -31,7 +31,7 @@ command_buffers_count_utraces(struct anv_device *device,
                               struct anv_cmd_buffer **cmd_buffers,
                               uint32_t *utrace_copies)
 {
-   if (!u_trace_context_actively_tracing(&device->trace_context))
+   if (!u_trace_context_actively_tracing(&device->ds.trace_context))
       return 0;
 
    uint32_t utraces = 0;
@@ -51,10 +51,10 @@ anv_utrace_delete_flush_data(struct u_trace_context *utctx,
                              void *flush_data)
 {
    struct anv_device *device =
-      container_of(utctx, struct anv_device, trace_context);
+      container_of(utctx, struct anv_device, ds.trace_context);
    struct anv_utrace_flush_copy *flush = flush_data;
 
-   u_trace_fini(&flush->trace);
+   intel_ds_flush_data_fini(&flush->ds);
 
    if (flush->trace_bo) {
       assert(flush->batch_bo);
@@ -76,7 +76,7 @@ anv_device_utrace_emit_copy_ts_buffer(struct u_trace_context *utctx,
                                       uint32_t count)
 {
    struct anv_device *device =
-      container_of(utctx, struct anv_device, trace_context);
+      container_of(utctx, struct anv_device, ds.trace_context);
    struct anv_utrace_flush_copy *flush = cmdstream;
    struct anv_address from_addr = (struct anv_address) {
       .bo = ts_from, .offset = from_offset * sizeof(uint64_t) };
@@ -111,7 +111,7 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
    if (!flush)
       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
 
-   u_trace_init(&flush->trace, &device->trace_context);
+   intel_ds_flush_data_init(&flush->ds, queue->ds, queue->ds->submission_id);
 
    result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type,
                            0, 0, &flush->sync);
@@ -155,14 +155,14 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
          } else {
             u_trace_clone_append(u_trace_begin_iterator(&cmd_buffers[i]->trace),
                                  u_trace_end_iterator(&cmd_buffers[i]->trace),
-                                 &flush->trace,
+                                 &flush->ds.trace,
                                  flush,
                                  anv_device_utrace_emit_copy_ts_buffer);
          }
       }
       anv_genX(&device->info, emit_so_memcpy_fini)(&flush->memcpy_state);
 
-      u_trace_flush(&flush->trace, flush, true);
+      u_trace_flush(&flush->ds.trace, flush, true);
 
       if (flush->batch.status != VK_SUCCESS) {
          result = flush->batch.status;
@@ -175,6 +175,8 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
       }
    }
 
+   flush->queue = queue;
+
    *out_flush_data = flush;
 
    return VK_SUCCESS;
@@ -196,7 +198,7 @@ static void *
 anv_utrace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size_b)
 {
    struct anv_device *device =
-      container_of(utctx, struct anv_device, trace_context);
+      container_of(utctx, struct anv_device, ds.trace_context);
 
    struct anv_bo *bo = NULL;
    UNUSED VkResult result =
@@ -211,7 +213,7 @@ static void
 anv_utrace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
 {
    struct anv_device *device =
-      container_of(utctx, struct anv_device, trace_context);
+      container_of(utctx, struct anv_device, ds.trace_context);
    struct anv_bo *bo = timestamps;
 
    anv_device_release_bo(device, bo);
@@ -237,7 +239,7 @@ anv_utrace_read_ts(struct u_trace_context *utctx,
                    void *timestamps, unsigned idx, void *flush_data)
 {
    struct anv_device *device =
-      container_of(utctx, struct anv_device, trace_context);
+      container_of(utctx, struct anv_device, ds.trace_context);
    struct anv_bo *bo = timestamps;
    struct anv_utrace_flush_copy *flush = flush_data;
 
@@ -261,19 +263,80 @@ anv_utrace_read_ts(struct u_trace_context *utctx,
    return intel_device_info_timebase_scale(&device->info, ts[idx]);
 }
 
+static const char *
+queue_family_to_name(const struct anv_queue_family *family)
+{
+   switch (family->engine_class) {
+   case I915_ENGINE_CLASS_RENDER:
+      return "render";
+   case I915_ENGINE_CLASS_COPY:
+      return "copy";
+   case I915_ENGINE_CLASS_VIDEO:
+      return "video";
+   case I915_ENGINE_CLASS_VIDEO_ENHANCE:
+      return "video-enh";
+   default:
+      return "unknown";
+   }
+}
+
 void
 anv_device_utrace_init(struct anv_device *device)
 {
-   u_trace_context_init(&device->trace_context, device,
+   intel_ds_device_init(&device->ds, &device->info, device->fd,
+                        device->physical->local_minor - 128,
+                        INTEL_DS_API_VULKAN);
+   u_trace_context_init(&device->ds.trace_context,
+                        &device->ds,
                         anv_utrace_create_ts_buffer,
                         anv_utrace_destroy_ts_buffer,
                         anv_utrace_record_ts,
                         anv_utrace_read_ts,
                         anv_utrace_delete_flush_data);
+
+   for (uint32_t q = 0; q < device->queue_count; q++) {
+      struct anv_queue *queue = &device->queues[q];
+
+      queue->ds =
+         intel_ds_device_add_queue(&device->ds, "%s%u",
+                                   queue_family_to_name(queue->family),
+                                   queue->index_in_family);
+   }
 }
 
 void
 anv_device_utrace_finish(struct anv_device *device)
 {
-   u_trace_context_fini(&device->trace_context);
+   intel_ds_device_fini(&device->ds);
+}
+
+enum intel_ds_stall_flag
+anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits)
+{
+   static const struct {
+      enum anv_pipe_bits anv;
+      enum intel_ds_stall_flag ds;
+   } anv_to_ds_flags[] = {
+      { .anv = ANV_PIPE_DEPTH_CACHE_FLUSH_BIT,            .ds = INTEL_DS_DEPTH_CACHE_FLUSH_BIT, },
+      { .anv = ANV_PIPE_DATA_CACHE_FLUSH_BIT,             .ds = INTEL_DS_DATA_CACHE_FLUSH_BIT, },
+      { .anv = ANV_PIPE_TILE_CACHE_FLUSH_BIT,             .ds = INTEL_DS_TILE_CACHE_FLUSH_BIT, },
+      { .anv = ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT,    .ds = INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT, },
+      { .anv = ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,       .ds = INTEL_DS_STATE_CACHE_INVALIDATE_BIT, },
+      { .anv = ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT,    .ds = INTEL_DS_CONST_CACHE_INVALIDATE_BIT, },
+      { .anv = ANV_PIPE_VF_CACHE_INVALIDATE_BIT,          .ds = INTEL_DS_VF_CACHE_INVALIDATE_BIT, },
+      { .anv = ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,     .ds = INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT, },
+      { .anv = ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_INST_CACHE_INVALIDATE_BIT, },
+      { .anv = ANV_PIPE_DEPTH_STALL_BIT,                  .ds = INTEL_DS_DEPTH_STALL_BIT, },
+      { .anv = ANV_PIPE_CS_STALL_BIT,                     .ds = INTEL_DS_CS_STALL_BIT, },
+      { .anv = ANV_PIPE_HDC_PIPELINE_FLUSH_BIT,           .ds = INTEL_DS_HDC_PIPELINE_FLUSH_BIT, },
+      { .anv = ANV_PIPE_STALL_AT_SCOREBOARD_BIT,          .ds = INTEL_DS_STALL_AT_SCOREBOARD_BIT, },
+   };
+
+   enum intel_ds_stall_flag ret = 0;
+   for (uint32_t i = 0; i < ARRAY_SIZE(anv_to_ds_flags); i++) {
+      if (anv_to_ds_flags[i].anv & bits)
+         ret |= anv_to_ds_flags[i].ds;
+   }
+
+   return ret;
 }
index 037965a..67ffefd 100644 (file)
@@ -107,7 +107,7 @@ VkResult anv_QueuePresentKHR(
       vk_semaphore_reset_temporary(&queue->device->vk, semaphore);
    }
 
-   u_trace_context_process(&queue->device->trace_context, true);
+   u_trace_context_process(&device->ds.trace_context, true);
 
    return result;
 }
index bbb90b4..7cd7c5f 100644 (file)
 #include "common/intel_l3_config.h"
 #include "blorp/blorp_genX_exec.h"
 
-#include "anv_tracepoints.h"
+#include "ds/intel_tracepoints.h"
 
 static void blorp_measure_start(struct blorp_batch *_batch,
                                 const struct blorp_params *params)
 {
    struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch;
-   trace_begin_blorp(&cmd_buffer->trace, cmd_buffer);
+   trace_intel_begin_blorp(&cmd_buffer->trace, cmd_buffer);
    anv_measure_snapshot(cmd_buffer,
                         params->snapshot_type,
                         NULL, 0);
@@ -50,13 +50,13 @@ static void blorp_measure_end(struct blorp_batch *_batch,
                               const struct blorp_params *params)
 {
    struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch;
-   trace_end_blorp(&cmd_buffer->trace, cmd_buffer,
-                   params->x1 - params->x0,
-                   params->y1 - params->y0,
-                   params->hiz_op,
-                   params->fast_clear_op,
-                   params->shader_type,
-                   params->shader_pipeline);
+   trace_intel_end_blorp(&cmd_buffer->trace, cmd_buffer,
+                         params->x1 - params->x0,
+                         params->y1 - params->y0,
+                         params->hiz_op,
+                         params->fast_clear_op,
+                         params->shader_type,
+                         params->shader_pipeline);
 }
 
 static void *
index 1aa79f3..fda86b3 100644 (file)
@@ -38,7 +38,7 @@
 
 #include "nir/nir_xfb_info.h"
 
-#include "anv_tracepoints.h"
+#include "ds/intel_tracepoints.h"
 
 /* We reserve :
  *    - GPR 14 for secondary command buffer returns
@@ -1763,7 +1763,7 @@ genX(BeginCommandBuffer)(
    if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
       cmd_buffer->usage_flags &= ~VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
 
-   trace_begin_cmd_buffer(&cmd_buffer->trace, cmd_buffer);
+   trace_intel_begin_cmd_buffer(&cmd_buffer->trace, cmd_buffer);
 
    genX(cmd_buffer_emit_state_base_address)(cmd_buffer);
 
@@ -1938,7 +1938,7 @@ genX(EndCommandBuffer)(
 
    emit_isp_disable(cmd_buffer);
 
-   trace_end_cmd_buffer(&cmd_buffer->trace, cmd_buffer, cmd_buffer->level);
+   trace_intel_end_cmd_buffer(&cmd_buffer->trace, cmd_buffer, cmd_buffer->level);
 
    anv_cmd_buffer_end_batch_buffer(cmd_buffer);
 
@@ -2405,8 +2405,10 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
    else if (bits == 0)
       return;
 
-   if (bits & (ANV_PIPE_FLUSH_BITS | ANV_PIPE_STALL_BITS | ANV_PIPE_INVALIDATE_BITS))
-      trace_stall(&cmd_buffer->trace, cmd_buffer, bits);
+   bool trace_flush =
+      (bits & (ANV_PIPE_FLUSH_BITS | ANV_PIPE_STALL_BITS | ANV_PIPE_INVALIDATE_BITS)) != 0;
+   if (trace_flush)
+      trace_intel_begin_stall(&cmd_buffer->trace, cmd_buffer);
 
    if ((GFX_VER >= 8 && GFX_VER <= 9) &&
        (bits & ANV_PIPE_CS_STALL_BIT) &&
@@ -2425,6 +2427,11 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
                                     cmd_buffer->device,
                                     cmd_buffer->state.current_pipeline,
                                     bits);
+
+   if (trace_flush) {
+      trace_intel_end_stall(&cmd_buffer->trace, cmd_buffer, bits,
+                            anv_pipe_flush_bit_to_ds_stall_flag, NULL);
+   }
 }
 
 static void
@@ -3963,7 +3970,7 @@ void genX(CmdDraw)(
    anv_measure_snapshot(cmd_buffer,
                         INTEL_SNAPSHOT_DRAW,
                         "draw", count);
-   trace_begin_draw(&cmd_buffer->trace, cmd_buffer);
+   trace_intel_begin_draw(&cmd_buffer->trace, cmd_buffer);
 
    genX(cmd_buffer_flush_state)(cmd_buffer);
 
@@ -3993,7 +4000,7 @@ void genX(CmdDraw)(
 
    update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL);
 
-   trace_end_draw(&cmd_buffer->trace, cmd_buffer, count);
+   trace_intel_end_draw(&cmd_buffer->trace, cmd_buffer, count);
 }
 
 void genX(CmdDrawMultiEXT)(
@@ -4018,7 +4025,7 @@ void genX(CmdDrawMultiEXT)(
    anv_measure_snapshot(cmd_buffer,
                         INTEL_SNAPSHOT_DRAW,
                         "draw_multi", count);
-   trace_begin_draw_multi(&cmd_buffer->trace, cmd_buffer);
+   trace_intel_begin_draw_multi(&cmd_buffer->trace, cmd_buffer);
 
    genX(cmd_buffer_flush_state)(cmd_buffer);
 
@@ -4051,7 +4058,7 @@ void genX(CmdDrawMultiEXT)(
 
    update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL);
 
-   trace_end_draw_multi(&cmd_buffer->trace, cmd_buffer, count);
+   trace_intel_end_draw_multi(&cmd_buffer->trace, cmd_buffer, count);
 }
 
 void genX(CmdDrawIndexed)(
@@ -4077,7 +4084,7 @@ void genX(CmdDrawIndexed)(
                         INTEL_SNAPSHOT_DRAW,
                         "draw indexed",
                         count);
-   trace_begin_draw_indexed(&cmd_buffer->trace, cmd_buffer);
+   trace_intel_begin_draw_indexed(&cmd_buffer->trace, cmd_buffer);
 
    genX(cmd_buffer_flush_state)(cmd_buffer);
 
@@ -4105,7 +4112,7 @@ void genX(CmdDrawIndexed)(
 
    update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, RANDOM);
 
-   trace_end_draw_indexed(&cmd_buffer->trace, cmd_buffer, count);
+   trace_intel_end_draw_indexed(&cmd_buffer->trace, cmd_buffer, count);
 }
 
 void genX(CmdDrawMultiIndexedEXT)(
@@ -4132,7 +4139,7 @@ void genX(CmdDrawMultiIndexedEXT)(
                         INTEL_SNAPSHOT_DRAW,
                         "draw indexed_multi",
                         count);
-   trace_begin_draw_indexed_multi(&cmd_buffer->trace, cmd_buffer);
+   trace_intel_begin_draw_indexed_multi(&cmd_buffer->trace, cmd_buffer);
 
    genX(cmd_buffer_flush_state)(cmd_buffer);
 
@@ -4220,7 +4227,7 @@ void genX(CmdDrawMultiIndexedEXT)(
 
    update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, RANDOM);
 
-   trace_end_draw_indexed_multi(&cmd_buffer->trace, cmd_buffer, count);
+   trace_intel_end_draw_indexed_multi(&cmd_buffer->trace, cmd_buffer, count);
 }
 
 /* Auto-Draw / Indirect Registers */
@@ -4256,7 +4263,7 @@ void genX(CmdDrawIndirectByteCountEXT)(
                         INTEL_SNAPSHOT_DRAW,
                         "draw indirect byte count",
                         instanceCount);
-   trace_begin_draw_indirect_byte_count(&cmd_buffer->trace, cmd_buffer);
+   trace_intel_begin_draw_indirect_byte_count(&cmd_buffer->trace, cmd_buffer);
 
    genX(cmd_buffer_flush_state)(cmd_buffer);
 
@@ -4300,8 +4307,8 @@ void genX(CmdDrawIndirectByteCountEXT)(
 
    update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL);
 
-   trace_end_draw_indirect_byte_count(&cmd_buffer->trace, cmd_buffer,
-                                      instanceCount);
+   trace_intel_end_draw_indirect_byte_count(&cmd_buffer->trace, cmd_buffer,
+                                            instanceCount);
 #endif /* GFX_VERx10 >= 75 */
 }
 
@@ -4358,7 +4365,7 @@ void genX(CmdDrawIndirect)(
    if (anv_batch_has_error(&cmd_buffer->batch))
       return;
 
-   trace_begin_draw_indirect(&cmd_buffer->trace, cmd_buffer);
+   trace_intel_begin_draw_indirect(&cmd_buffer->trace, cmd_buffer);
 
    genX(cmd_buffer_flush_state)(cmd_buffer);
 
@@ -4393,7 +4400,7 @@ void genX(CmdDrawIndirect)(
       offset += stride;
    }
 
-   trace_end_draw_indirect(&cmd_buffer->trace, cmd_buffer, drawCount);
+   trace_intel_end_draw_indirect(&cmd_buffer->trace, cmd_buffer, drawCount);
 }
 
 void genX(CmdDrawIndexedIndirect)(
@@ -4411,7 +4418,7 @@ void genX(CmdDrawIndexedIndirect)(
    if (anv_batch_has_error(&cmd_buffer->batch))
       return;
 
-   trace_begin_draw_indexed_indirect(&cmd_buffer->trace, cmd_buffer);
+   trace_intel_begin_draw_indexed_indirect(&cmd_buffer->trace, cmd_buffer);
 
    genX(cmd_buffer_flush_state)(cmd_buffer);
 
@@ -4447,7 +4454,7 @@ void genX(CmdDrawIndexedIndirect)(
       offset += stride;
    }
 
-   trace_end_draw_indexed_indirect(&cmd_buffer->trace, cmd_buffer, drawCount);
+   trace_intel_end_draw_indexed_indirect(&cmd_buffer->trace, cmd_buffer, drawCount);
 }
 
 static struct mi_value
@@ -4574,7 +4581,7 @@ void genX(CmdDrawIndirectCount)(
    if (anv_batch_has_error(&cmd_buffer->batch))
       return;
 
-   trace_begin_draw_indirect_count(&cmd_buffer->trace, cmd_buffer);
+   trace_intel_begin_draw_indirect_count(&cmd_buffer->trace, cmd_buffer);
 
    genX(cmd_buffer_flush_state)(cmd_buffer);
 
@@ -4616,7 +4623,7 @@ void genX(CmdDrawIndirectCount)(
 
    mi_value_unref(&b, max);
 
-   trace_end_draw_indirect_count(&cmd_buffer->trace, cmd_buffer, maxDrawCount);
+   trace_intel_end_draw_indirect_count(&cmd_buffer->trace, cmd_buffer, maxDrawCount);
 }
 
 void genX(CmdDrawIndexedIndirectCount)(
@@ -4638,7 +4645,7 @@ void genX(CmdDrawIndexedIndirectCount)(
    if (anv_batch_has_error(&cmd_buffer->batch))
       return;
 
-   trace_begin_draw_indexed_indirect_count(&cmd_buffer->trace, cmd_buffer);
+   trace_intel_begin_draw_indexed_indirect_count(&cmd_buffer->trace, cmd_buffer);
 
    genX(cmd_buffer_flush_state)(cmd_buffer);
 
@@ -4681,7 +4688,8 @@ void genX(CmdDrawIndexedIndirectCount)(
 
    mi_value_unref(&b, max);
 
-   trace_end_draw_indexed_indirect_count(&cmd_buffer->trace, cmd_buffer, maxDrawCount);
+   trace_intel_end_draw_indexed_indirect_count(&cmd_buffer->trace,
+                                               cmd_buffer, maxDrawCount);
 
 }
 
@@ -5058,7 +5066,7 @@ void genX(CmdDispatchBase)(
                         prog_data->local_size[0] * prog_data->local_size[1] *
                         prog_data->local_size[2]);
 
-   trace_begin_compute(&cmd_buffer->trace, cmd_buffer);
+   trace_intel_begin_compute(&cmd_buffer->trace, cmd_buffer);
 
    if (prog_data->uses_num_work_groups) {
       struct anv_state state =
@@ -5084,8 +5092,8 @@ void genX(CmdDispatchBase)(
    emit_cs_walker(cmd_buffer, pipeline, false, prog_data, groupCountX,
                   groupCountY, groupCountZ);
 
-   trace_end_compute(&cmd_buffer->trace, cmd_buffer,
-                     groupCountX, groupCountY, groupCountZ);
+   trace_intel_end_compute(&cmd_buffer->trace, cmd_buffer,
+                           groupCountX, groupCountY, groupCountZ);
 }
 
 #define GPGPU_DISPATCHDIMX 0x2500
@@ -5119,7 +5127,7 @@ void genX(CmdDispatchIndirect)(
                         INTEL_SNAPSHOT_COMPUTE,
                         "compute indirect",
                         0);
-   trace_begin_compute(&cmd_buffer->trace, cmd_buffer);
+   trace_intel_begin_compute(&cmd_buffer->trace, cmd_buffer);
 
    if (prog_data->uses_num_work_groups) {
       cmd_buffer->state.compute.num_workgroups = addr;
@@ -5194,7 +5202,7 @@ void genX(CmdDispatchIndirect)(
 
    emit_cs_walker(cmd_buffer, pipeline, true, prog_data, 0, 0, 0);
 
-   trace_end_compute(&cmd_buffer->trace, cmd_buffer, 0, 0, 0);
+   trace_intel_end_compute(&cmd_buffer->trace, cmd_buffer, 0, 0, 0);
 }
 
 #if GFX_VERx10 >= 125
@@ -6800,7 +6808,7 @@ void genX(CmdBeginRenderPass2)(
    cmd_buffer->state.render_area = pRenderPassBeginInfo->renderArea;
 
    anv_measure_beginrenderpass(cmd_buffer);
-   trace_begin_render_pass(&cmd_buffer->trace, cmd_buffer);
+   trace_intel_begin_render_pass(&cmd_buffer->trace, cmd_buffer);
 
    result = genX(cmd_buffer_setup_attachments)(cmd_buffer, pass,
                                                framebuffer,
@@ -6843,13 +6851,13 @@ void genX(CmdEndRenderPass2)(
 
    cmd_buffer_end_subpass(cmd_buffer);
 
-   trace_end_render_pass(&cmd_buffer->trace, cmd_buffer,
-                         cmd_buffer->state.render_area.extent.width,
-                         cmd_buffer->state.render_area.extent.height,
-                         cmd_buffer->state.pass->attachment_count,
-                         cmd_buffer->state.pass->attachment_count > 0 ?
-                         cmd_buffer->state.pass->attachments[0].samples : 0,
-                         cmd_buffer->state.pass->subpass_count);
+   trace_intel_end_render_pass(&cmd_buffer->trace, cmd_buffer,
+                               cmd_buffer->state.render_area.extent.width,
+                               cmd_buffer->state.render_area.extent.height,
+                               cmd_buffer->state.pass->attachment_count,
+                               cmd_buffer->state.pass->attachment_count > 0 ?
+                               cmd_buffer->state.pass->attachments[0].samples : 0,
+                               cmd_buffer->state.pass->subpass_count);
 
    cmd_buffer->state.hiz_enabled = false;
 
index be0308c..8e6c702 100644 (file)
@@ -33,19 +33,6 @@ anv_entrypoints = custom_target(
   depend_files : vk_entrypoints_gen_depend_files,
 )
 
-anv_tracepoints = custom_target(
-  'anv_tracepoints.[ch]',
-  input: 'anv_tracepoints.py',
-  output: ['anv_tracepoints.h', 'anv_tracepoints.c'],
-  command: [
-    prog_python, '@INPUT@',
-    '-p', join_paths(meson.source_root(), 'src/util/perf/'),
-    '--utrace-hdr', '@OUTPUT0@',
-    '--utrace-src', '@OUTPUT1@',
-  ],
-  depend_files: u_trace_py,
-)
-
 intel_icd = custom_target(
   'intel_icd',
   input : [vk_icd_gen, vk_api_xml],
@@ -78,7 +65,7 @@ foreach g : [['70', ['gfx7_cmd_buffer.c']], ['75', ['gfx7_cmd_buffer.c']],
   _gfx_ver = g[0]
   libanv_per_hw_ver_libs += static_library(
     'anv_per_hw_ver@0@'.format(_gfx_ver),
-    [anv_per_hw_ver_files, g[1], anv_entrypoints[0], anv_tracepoints[0]],
+    [anv_per_hw_ver_files, g[1], anv_entrypoints[0]],
     include_directories : [
       inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_compiler, inc_intel,
     ],
@@ -90,7 +77,7 @@ foreach g : [['70', ['gfx7_cmd_buffer.c']], ['75', ['gfx7_cmd_buffer.c']],
     dependencies : [
       dep_libdrm, dep_valgrind, idep_nir_headers, idep_genxml,
       idep_vulkan_util_headers, idep_vulkan_wsi_headers,
-      idep_vulkan_runtime_headers,
+      idep_vulkan_runtime_headers, idep_intel_driver_ds_headers,
     ],
   )
 endforeach
@@ -142,6 +129,8 @@ anv_flags = [
   c_sse2_args,
 ]
 
+anv_cpp_flags = []
+
 if with_platform_x11
   anv_deps += dep_xcb_dri3
 endif
@@ -164,24 +153,27 @@ else
   libanv_files += files('anv_android_stubs.c')
 endif
 
+anv_deps += idep_intel_driver_ds_headers
+
 libanv_common = static_library(
   'anv_common',
   [
     libanv_files, anv_entrypoints, sha1_h,
-    gen_xml_pack
+    gen_xml_pack,
   ],
   include_directories : [
     inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
     inc_util,
   ],
   c_args : anv_flags,
+  cpp_args : anv_cpp_flags,
   gnu_symbol_visibility : 'hidden',
   dependencies : anv_deps,
 )
 
 libvulkan_intel = shared_library(
   'vulkan_intel',
-  [files('anv_gem.c'), anv_entrypoints[0], anv_tracepoints],
+  [files('anv_gem.c'), anv_entrypoints[0]],
   include_directories : [
     inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
   ],
@@ -193,6 +185,7 @@ libvulkan_intel = shared_library(
     dep_thread, dep_dl, dep_m, anv_deps, idep_libintel_common,
     idep_nir, idep_genxml, idep_vulkan_util, idep_vulkan_wsi,
     idep_vulkan_runtime, idep_mesautil, idep_xmlconfig,
+    idep_intel_driver_ds,
   ],
   c_args : anv_flags,
   gnu_symbol_visibility : 'hidden',
@@ -216,7 +209,7 @@ endif
 if with_tests
   libvulkan_intel_test = static_library(
     'vulkan_intel_test',
-    [files('anv_gem_stubs.c'), anv_entrypoints[0], anv_tracepoints[0]],
+    [files('anv_gem_stubs.c'), anv_entrypoints[0]],
     include_directories : [
       inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
     ],
@@ -241,13 +234,13 @@ if with_tests
       'anv_@0@'.format(t),
       executable(
         t,
-        ['tests/@0@.c'.format(t), anv_entrypoints[0], anv_tracepoints[0]],
+        ['tests/@0@.c'.format(t), anv_entrypoints[0]],
         c_args : [ c_sse2_args ],
         link_with : libvulkan_intel_test,
         dependencies : [
           dep_libdrm, dep_thread, dep_m, dep_valgrind,
           idep_vulkan_util, idep_vulkan_wsi_headers,
-          idep_vulkan_runtime,
+          idep_vulkan_runtime, idep_intel_driver_ds,
         ],
         include_directories : [
           inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,