anv: Use vk_query_pool

author Faith Ekstrand <faith.ekstrand@collabora.com>

Mon, 31 Jul 2023 16:30:37 +0000 (11:30 -0500)

committer Marge Bot <emma+marge@anholt.net>

Tue, 1 Aug 2023 19:17:05 +0000 (19:17 +0000)
author Faith Ekstrand <faith.ekstrand@collabora.com>
Mon, 31 Jul 2023 16:30:37 +0000 (11:30 -0500)
committer Marge Bot <emma+marge@anholt.net>
Tue, 1 Aug 2023 19:17:05 +0000 (19:17 +0000)
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h

index e9a4c04..b2727e8 100644 (file)
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -91,6 +91,7 @@
  #include "vk_sync.h"
  #include "vk_sync_timeline.h"
  #include "vk_util.h"
+#include "vk_query_pool.h"
  #include "vk_queue.h"
  #include "vk_log.h"
  #include "vk_ycbcr_conversion.h"
@@ -4546,14 +4547,11 @@ struct anv_sampler {
  #define ANV_PIPELINE_STATISTICS_MASK 0x000007ff
  
  struct anv_query_pool {
-   struct vk_object_base                        base;
+   struct vk_query_pool                         vk;
  
-   VkQueryType                                  type;
-   VkQueryPipelineStatisticFlags                pipeline_statistics;
     /** Stride between slots, in bytes */
     uint32_t                                     stride;
     /** Number of slots in this query pool */
-   uint32_t                                     slots;
     struct anv_bo *                              bo;
  
     /** Location for the KHR_performance_query small batch updating
@@ -4775,7 +4773,7 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, base, VkPipeline,
                                 VK_OBJECT_TYPE_PIPELINE)
  VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, base, VkPipelineLayout,
                                 VK_OBJECT_TYPE_PIPELINE_LAYOUT)
-VK_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, base, VkQueryPool,
+VK_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, vk.base, VkQueryPool,
                                 VK_OBJECT_TYPE_QUERY_POOL)
  VK_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, vk.base, VkSampler,
                                 VK_OBJECT_TYPE_SAMPLER)
diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c

index e0270ee..f9b1d8e 100644 (file)
--- a/src/intel/vulkan/genX_query.c
+++ b/src/intel/vulkan/genX_query.c
@@ -195,20 +195,18 @@ VkResult genX(CreateQueryPool)(
        assert(!"Invalid query type");
     }
  
-   if (!vk_object_multialloc(&device->vk, &ma, pAllocator,
-                             VK_OBJECT_TYPE_QUERY_POOL))
+   if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator,
+                              VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
        return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
  
-   pool->type = pCreateInfo->queryType;
-   pool->pipeline_statistics = pipeline_statistics;
+   vk_query_pool_init(&device->vk, &pool->vk, pCreateInfo);
     pool->stride = uint64s_per_slot * sizeof(uint64_t);
-   pool->slots = pCreateInfo->queryCount;
  
-   if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL) {
+   if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL) {
        pool->data_offset = data_offset;
        pool->snapshot_size = (pool->stride - data_offset) / 2;
     }
-   else if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
+   else if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
        pool->pass_size = pool->stride / n_passes;
        pool->data_offset = data_offset;
        pool->snapshot_size = (pool->pass_size - data_offset) / 2;
@@ -226,12 +224,12 @@ VkResult genX(CreateQueryPool)(
                                pool->pass_query);
     }
  
-   uint64_t size = pool->slots * (uint64_t)pool->stride;
+   uint64_t size = pool->vk.query_count * (uint64_t)pool->stride;
  
     /* For KHR_performance_query we need some space in the buffer for a small
      * batch updating ANV_PERF_QUERY_OFFSET_REG.
      */
-   if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
+   if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
        pool->khr_perf_preamble_stride = 32;
        pool->khr_perf_preambles_offset = size;
        size += (uint64_t)pool->n_passes * pool->khr_perf_preamble_stride;
@@ -245,7 +243,7 @@ VkResult genX(CreateQueryPool)(
     if (result != VK_SUCCESS)
        goto fail;
  
-   if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
+   if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
        for (uint32_t p = 0; p < pool->n_passes; p++) {
           struct mi_builder b;
           struct anv_batch batch = {
@@ -422,7 +420,7 @@ query_slot(struct anv_query_pool *pool, uint32_t query)
  static bool
  query_is_available(struct anv_query_pool *pool, uint32_t query)
  {
-   if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
+   if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
        for (uint32_t p = 0; p < pool->n_passes; p++) {
           volatile uint64_t *slot =
              pool->bo->map + khr_perf_query_availability_offset(pool, query, p);
@@ -441,7 +439,7 @@ wait_for_available(struct anv_device *device,
  {
     /* By default we leave a 2s timeout before declaring the device lost. */
     uint64_t rel_timeout = 2 * NSEC_PER_SEC;
-   if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
+   if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
        /* With performance queries, there is an additional 500us reconfiguration
         * time in i915.
         */
@@ -479,19 +477,19 @@ VkResult genX(GetQueryPoolResults)(
  
     assert(
  #if GFX_VERx10 >= 125
-   pool->type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR ||
-   pool->type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR ||
-   pool->type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR ||
-   pool->type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR ||
+   pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR ||
+   pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR ||
+   pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR ||
+   pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR ||
  #endif
-   pool->type == VK_QUERY_TYPE_OCCLUSION ||
-   pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS ||
-   pool->type == VK_QUERY_TYPE_TIMESTAMP ||
-   pool->type == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT ||
-   pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR ||
-   pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL ||
-   pool->type == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT ||
-   pool->type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR);
+   pool->vk.query_type == VK_QUERY_TYPE_OCCLUSION ||
+   pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS ||
+   pool->vk.query_type == VK_QUERY_TYPE_TIMESTAMP ||
+   pool->vk.query_type == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT ||
+   pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR ||
+   pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL ||
+   pool->vk.query_type == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT ||
+   pool->vk.query_type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR);
  
     if (vk_device_is_lost(&device->vk))
        return VK_ERROR_DEVICE_LOST;
@@ -532,7 +530,7 @@ VkResult genX(GetQueryPoolResults)(
        bool write_results = available || (flags & VK_QUERY_RESULT_PARTIAL_BIT);
  
        uint32_t idx = 0;
-      switch (pool->type) {
+      switch (pool->vk.query_type) {
        case VK_QUERY_TYPE_OCCLUSION:
        case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
           uint64_t *slot = query_slot(pool, firstQuery + i);
@@ -553,7 +551,7 @@ VkResult genX(GetQueryPoolResults)(
  
        case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
           uint64_t *slot = query_slot(pool, firstQuery + i);
-         uint32_t statistics = pool->pipeline_statistics;
+         uint32_t statistics = pool->vk.pipeline_statistics;
           while (statistics) {
              UNUSED uint32_t stat = u_bit_scan(&statistics);
              if (write_results) {
@@ -562,7 +560,7 @@ VkResult genX(GetQueryPoolResults)(
              }
              idx++;
           }
-         assert(idx == util_bitcount(pool->pipeline_statistics));
+         assert(idx == util_bitcount(pool->vk.pipeline_statistics));
           break;
        }
  
@@ -710,7 +708,7 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
                    struct mi_builder *b, struct anv_query_pool *pool,
                    uint32_t first_index, uint32_t num_queries)
  {
-   switch (pool->type) {
+   switch (pool->vk.query_type) {
     case VK_QUERY_TYPE_OCCLUSION:
     case VK_QUERY_TYPE_TIMESTAMP:
        /* These queries are written with a PIPE_CONTROL so clear them using the
@@ -801,7 +799,7 @@ void genX(CmdResetQueryPool)(
  
     trace_intel_begin_query_clear_cs(&cmd_buffer->trace);
  
-   switch (pool->type) {
+   switch (pool->vk.query_type) {
     case VK_QUERY_TYPE_OCCLUSION:
  #if GFX_VERx10 >= 125
     case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
@@ -887,7 +885,7 @@ void genX(ResetQueryPool)(
     ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
  
     for (uint32_t i = 0; i < queryCount; i++) {
-      if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
+      if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
           for (uint32_t p = 0; p < pool->n_passes; p++) {
              uint64_t *pass_slot = pool->bo->map +
                 khr_perf_query_availability_offset(pool, firstQuery + i, p);
@@ -1023,7 +1021,7 @@ void genX(CmdBeginQueryIndexedEXT)(
     struct mi_builder b;
     mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
  
-   switch (pool->type) {
+   switch (pool->vk.query_type) {
     case VK_QUERY_TYPE_OCCLUSION:
        cmd_buffer->state.gfx.n_occlusion_queries++;
        emit_ps_depth_count(cmd_buffer, anv_address_add(query_addr, 8));
@@ -1045,7 +1043,7 @@ void genX(CmdBeginQueryIndexedEXT)(
                                      ANV_PIPE_CS_STALL_BIT |
                                      ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
  
-      uint32_t statistics = pool->pipeline_statistics;
+      uint32_t statistics = pool->vk.pipeline_statistics;
        uint32_t offset = 8;
        while (statistics) {
           uint32_t stat = u_bit_scan(&statistics);
@@ -1213,7 +1211,7 @@ void genX(CmdEndQueryIndexedEXT)(
     struct mi_builder b;
     mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
  
-   switch (pool->type) {
+   switch (pool->vk.query_type) {
     case VK_QUERY_TYPE_OCCLUSION:
        emit_ps_depth_count(cmd_buffer, anv_address_add(query_addr, 16));
        emit_query_pc_availability(cmd_buffer, query_addr, true);
@@ -1241,7 +1239,7 @@ void genX(CmdEndQueryIndexedEXT)(
                                      ANV_PIPE_CS_STALL_BIT |
                                      ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
  
-      uint32_t statistics = pool->pipeline_statistics;
+      uint32_t statistics = pool->vk.pipeline_statistics;
        uint32_t offset = 16;
        while (statistics) {
           uint32_t stat = u_bit_scan(&statistics);
@@ -1388,7 +1386,7 @@ void genX(CmdWriteTimestamp2)(
     ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
     struct anv_address query_addr = anv_query_address(pool, query);
  
-   assert(pool->type == VK_QUERY_TYPE_TIMESTAMP);
+   assert(pool->vk.query_type == VK_QUERY_TYPE_TIMESTAMP);
  
     emit_query_clear_flush(cmd_buffer, pool,
                            "CmdWriteTimestamp flush query clears");
@@ -1541,8 +1539,8 @@ copy_query_results_with_cs(struct anv_cmd_buffer *cmd_buffer,
      *     previous uses of vkCmdResetQueryPool in the same queue, without any
      *     additional synchronization."
      */
-   if (pool->type == VK_QUERY_TYPE_OCCLUSION ||
-       pool->type == VK_QUERY_TYPE_TIMESTAMP)
+   if (pool->vk.query_type == VK_QUERY_TYPE_OCCLUSION ||
+       pool->vk.query_type == VK_QUERY_TYPE_TIMESTAMP)
        needed_flushes |= ANV_PIPE_CS_STALL_BIT;
  
     if (needed_flushes) {
@@ -1570,7 +1568,7 @@ copy_query_results_with_cs(struct anv_cmd_buffer *cmd_buffer,
        }
  
        uint32_t idx = 0;
-      switch (pool->type) {
+      switch (pool->vk.query_type) {
        case VK_QUERY_TYPE_OCCLUSION:
        case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
           result = compute_query_result(&b, anv_address_add(query_addr, 8));
@@ -1589,14 +1587,14 @@ copy_query_results_with_cs(struct anv_cmd_buffer *cmd_buffer,
           break;
  
        case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
-         uint32_t statistics = pool->pipeline_statistics;
+         uint32_t statistics = pool->vk.pipeline_statistics;
           while (statistics) {
              UNUSED uint32_t stat = u_bit_scan(&statistics);
              result = compute_query_result(&b, anv_address_add(query_addr,
                                                                idx * 16 + 8));
              gpu_write_query_result(&b, dest_addr, flags, idx++, result);
           }
-         assert(idx == util_bitcount(pool->pipeline_statistics));
+         assert(idx == util_bitcount(pool->vk.pipeline_statistics));
           break;
        }
  
@@ -1680,10 +1678,10 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer,
        /* Some queries are done with shaders, so we need to have them flush
         * high level caches writes. The L3 should be shared across the GPU.
         */
-      if (pool->type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR ||
-          pool->type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR ||
-          pool->type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR ||
-          pool->type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR) {
+      if (pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR ||
+          pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR ||
+          pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR ||
+          pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR) {
           needed_flushes |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
        }
        /* And we need to stall for previous CS writes to land or the flushes to
@@ -1703,8 +1701,8 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer,
      *     previous uses of vkCmdResetQueryPool in the same queue, without any
      *     additional synchronization."
      */
-   if (pool->type == VK_QUERY_TYPE_OCCLUSION ||
-       pool->type == VK_QUERY_TYPE_TIMESTAMP)
+   if (pool->vk.query_type == VK_QUERY_TYPE_OCCLUSION ||
+       pool->vk.query_type == VK_QUERY_TYPE_TIMESTAMP)
        needed_flushes |= ANV_PIPE_CS_STALL_BIT;
  
     if (needed_flushes) {
@@ -1736,7 +1734,7 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer,
  
     uint32_t num_items = 1;
     uint32_t data_offset = 8 /* behind availability */;
-   switch (pool->type) {
+   switch (pool->vk.query_type) {
     case VK_QUERY_TYPE_OCCLUSION:
     case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
        copy_flags |= ANV_COPY_QUERY_FLAG_DELTA;
@@ -1749,7 +1747,7 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer,
        break;
  
     case VK_QUERY_TYPE_PIPELINE_STATISTICS:
-      num_items = util_bitcount(pool->pipeline_statistics);
+      num_items = util_bitcount(pool->vk.pipeline_statistics);
        copy_flags |= ANV_COPY_QUERY_FLAG_DELTA;
        break;
author	Faith Ekstrand <faith.ekstrand@collabora.com>
	Mon, 31 Jul 2023 16:30:37 +0000 (11:30 -0500)
committer	Marge Bot <emma+marge@anholt.net>
	Tue, 1 Aug 2023 19:17:05 +0000 (19:17 +0000)
src/intel/vulkan/anv_private.h		patch \| blob \| history
src/intel/vulkan/genX_query.c		patch \| blob \| history