From e4485bc062db9b2e403340fadab6520775f0bbc3 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 31 Jul 2023 11:30:37 -0500 Subject: [PATCH] anv: Use vk_query_pool Reviewed-by: Lionel Landwerlin Part-of: --- src/intel/vulkan/anv_private.h | 8 ++-- src/intel/vulkan/genX_query.c | 94 +++++++++++++++++++++--------------------- 2 files changed, 49 insertions(+), 53 deletions(-) diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index e9a4c04..b2727e8 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -91,6 +91,7 @@ #include "vk_sync.h" #include "vk_sync_timeline.h" #include "vk_util.h" +#include "vk_query_pool.h" #include "vk_queue.h" #include "vk_log.h" #include "vk_ycbcr_conversion.h" @@ -4546,14 +4547,11 @@ struct anv_sampler { #define ANV_PIPELINE_STATISTICS_MASK 0x000007ff struct anv_query_pool { - struct vk_object_base base; + struct vk_query_pool vk; - VkQueryType type; - VkQueryPipelineStatisticFlags pipeline_statistics; /** Stride between slots, in bytes */ uint32_t stride; /** Number of slots in this query pool */ - uint32_t slots; struct anv_bo * bo; /** Location for the KHR_performance_query small batch updating @@ -4775,7 +4773,7 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, base, VkPipeline, VK_OBJECT_TYPE_PIPELINE) VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, base, VkPipelineLayout, VK_OBJECT_TYPE_PIPELINE_LAYOUT) -VK_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, base, VkQueryPool, +VK_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, vk.base, VkQueryPool, VK_OBJECT_TYPE_QUERY_POOL) VK_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, vk.base, VkSampler, VK_OBJECT_TYPE_SAMPLER) diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index e0270ee..f9b1d8e 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -195,20 +195,18 @@ VkResult genX(CreateQueryPool)( assert(!"Invalid query type"); } - if (!vk_object_multialloc(&device->vk, &ma, pAllocator, - VK_OBJECT_TYPE_QUERY_POOL)) + if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - pool->type = pCreateInfo->queryType; - pool->pipeline_statistics = pipeline_statistics; + vk_query_pool_init(&device->vk, &pool->vk, pCreateInfo); pool->stride = uint64s_per_slot * sizeof(uint64_t); - pool->slots = pCreateInfo->queryCount; - if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL) { + if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL) { pool->data_offset = data_offset; pool->snapshot_size = (pool->stride - data_offset) / 2; } - else if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { + else if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { pool->pass_size = pool->stride / n_passes; pool->data_offset = data_offset; pool->snapshot_size = (pool->pass_size - data_offset) / 2; @@ -226,12 +224,12 @@ VkResult genX(CreateQueryPool)( pool->pass_query); } - uint64_t size = pool->slots * (uint64_t)pool->stride; + uint64_t size = pool->vk.query_count * (uint64_t)pool->stride; /* For KHR_performance_query we need some space in the buffer for a small * batch updating ANV_PERF_QUERY_OFFSET_REG. */ - if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { + if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { pool->khr_perf_preamble_stride = 32; pool->khr_perf_preambles_offset = size; size += (uint64_t)pool->n_passes * pool->khr_perf_preamble_stride; @@ -245,7 +243,7 @@ VkResult genX(CreateQueryPool)( if (result != VK_SUCCESS) goto fail; - if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { + if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { for (uint32_t p = 0; p < pool->n_passes; p++) { struct mi_builder b; struct anv_batch batch = { @@ -422,7 +420,7 @@ query_slot(struct anv_query_pool *pool, uint32_t query) static bool query_is_available(struct anv_query_pool *pool, uint32_t query) { - if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { + if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { for (uint32_t p = 0; p < pool->n_passes; p++) { volatile uint64_t *slot = pool->bo->map + khr_perf_query_availability_offset(pool, query, p); @@ -441,7 +439,7 @@ wait_for_available(struct anv_device *device, { /* By default we leave a 2s timeout before declaring the device lost. */ uint64_t rel_timeout = 2 * NSEC_PER_SEC; - if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { + if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { /* With performance queries, there is an additional 500us reconfiguration * time in i915. */ @@ -479,19 +477,19 @@ VkResult genX(GetQueryPoolResults)( assert( #if GFX_VERx10 >= 125 - pool->type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR || - pool->type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR || - pool->type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR || - pool->type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR || + pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR || + pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR || + pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR || + pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR || #endif - pool->type == VK_QUERY_TYPE_OCCLUSION || - pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS || - pool->type == VK_QUERY_TYPE_TIMESTAMP || - pool->type == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT || - pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR || - pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL || - pool->type == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT || - pool->type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR); + pool->vk.query_type == VK_QUERY_TYPE_OCCLUSION || + pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS || + pool->vk.query_type == VK_QUERY_TYPE_TIMESTAMP || + pool->vk.query_type == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT || + pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR || + pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL || + pool->vk.query_type == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT || + pool->vk.query_type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR); if (vk_device_is_lost(&device->vk)) return VK_ERROR_DEVICE_LOST; @@ -532,7 +530,7 @@ VkResult genX(GetQueryPoolResults)( bool write_results = available || (flags & VK_QUERY_RESULT_PARTIAL_BIT); uint32_t idx = 0; - switch (pool->type) { + switch (pool->vk.query_type) { case VK_QUERY_TYPE_OCCLUSION: case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: { uint64_t *slot = query_slot(pool, firstQuery + i); @@ -553,7 +551,7 @@ VkResult genX(GetQueryPoolResults)( case VK_QUERY_TYPE_PIPELINE_STATISTICS: { uint64_t *slot = query_slot(pool, firstQuery + i); - uint32_t statistics = pool->pipeline_statistics; + uint32_t statistics = pool->vk.pipeline_statistics; while (statistics) { UNUSED uint32_t stat = u_bit_scan(&statistics); if (write_results) { @@ -562,7 +560,7 @@ VkResult genX(GetQueryPoolResults)( } idx++; } - assert(idx == util_bitcount(pool->pipeline_statistics)); + assert(idx == util_bitcount(pool->vk.pipeline_statistics)); break; } @@ -710,7 +708,7 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer, struct mi_builder *b, struct anv_query_pool *pool, uint32_t first_index, uint32_t num_queries) { - switch (pool->type) { + switch (pool->vk.query_type) { case VK_QUERY_TYPE_OCCLUSION: case VK_QUERY_TYPE_TIMESTAMP: /* These queries are written with a PIPE_CONTROL so clear them using the @@ -801,7 +799,7 @@ void genX(CmdResetQueryPool)( trace_intel_begin_query_clear_cs(&cmd_buffer->trace); - switch (pool->type) { + switch (pool->vk.query_type) { case VK_QUERY_TYPE_OCCLUSION: #if GFX_VERx10 >= 125 case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR: @@ -887,7 +885,7 @@ void genX(ResetQueryPool)( ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); for (uint32_t i = 0; i < queryCount; i++) { - if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { + if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { for (uint32_t p = 0; p < pool->n_passes; p++) { uint64_t *pass_slot = pool->bo->map + khr_perf_query_availability_offset(pool, firstQuery + i, p); @@ -1023,7 +1021,7 @@ void genX(CmdBeginQueryIndexedEXT)( struct mi_builder b; mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch); - switch (pool->type) { + switch (pool->vk.query_type) { case VK_QUERY_TYPE_OCCLUSION: cmd_buffer->state.gfx.n_occlusion_queries++; emit_ps_depth_count(cmd_buffer, anv_address_add(query_addr, 8)); @@ -1045,7 +1043,7 @@ void genX(CmdBeginQueryIndexedEXT)( ANV_PIPE_CS_STALL_BIT | ANV_PIPE_STALL_AT_SCOREBOARD_BIT); - uint32_t statistics = pool->pipeline_statistics; + uint32_t statistics = pool->vk.pipeline_statistics; uint32_t offset = 8; while (statistics) { uint32_t stat = u_bit_scan(&statistics); @@ -1213,7 +1211,7 @@ void genX(CmdEndQueryIndexedEXT)( struct mi_builder b; mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch); - switch (pool->type) { + switch (pool->vk.query_type) { case VK_QUERY_TYPE_OCCLUSION: emit_ps_depth_count(cmd_buffer, anv_address_add(query_addr, 16)); emit_query_pc_availability(cmd_buffer, query_addr, true); @@ -1241,7 +1239,7 @@ void genX(CmdEndQueryIndexedEXT)( ANV_PIPE_CS_STALL_BIT | ANV_PIPE_STALL_AT_SCOREBOARD_BIT); - uint32_t statistics = pool->pipeline_statistics; + uint32_t statistics = pool->vk.pipeline_statistics; uint32_t offset = 16; while (statistics) { uint32_t stat = u_bit_scan(&statistics); @@ -1388,7 +1386,7 @@ void genX(CmdWriteTimestamp2)( ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); struct anv_address query_addr = anv_query_address(pool, query); - assert(pool->type == VK_QUERY_TYPE_TIMESTAMP); + assert(pool->vk.query_type == VK_QUERY_TYPE_TIMESTAMP); emit_query_clear_flush(cmd_buffer, pool, "CmdWriteTimestamp flush query clears"); @@ -1541,8 +1539,8 @@ copy_query_results_with_cs(struct anv_cmd_buffer *cmd_buffer, * previous uses of vkCmdResetQueryPool in the same queue, without any * additional synchronization." */ - if (pool->type == VK_QUERY_TYPE_OCCLUSION || - pool->type == VK_QUERY_TYPE_TIMESTAMP) + if (pool->vk.query_type == VK_QUERY_TYPE_OCCLUSION || + pool->vk.query_type == VK_QUERY_TYPE_TIMESTAMP) needed_flushes |= ANV_PIPE_CS_STALL_BIT; if (needed_flushes) { @@ -1570,7 +1568,7 @@ copy_query_results_with_cs(struct anv_cmd_buffer *cmd_buffer, } uint32_t idx = 0; - switch (pool->type) { + switch (pool->vk.query_type) { case VK_QUERY_TYPE_OCCLUSION: case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: result = compute_query_result(&b, anv_address_add(query_addr, 8)); @@ -1589,14 +1587,14 @@ copy_query_results_with_cs(struct anv_cmd_buffer *cmd_buffer, break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: { - uint32_t statistics = pool->pipeline_statistics; + uint32_t statistics = pool->vk.pipeline_statistics; while (statistics) { UNUSED uint32_t stat = u_bit_scan(&statistics); result = compute_query_result(&b, anv_address_add(query_addr, idx * 16 + 8)); gpu_write_query_result(&b, dest_addr, flags, idx++, result); } - assert(idx == util_bitcount(pool->pipeline_statistics)); + assert(idx == util_bitcount(pool->vk.pipeline_statistics)); break; } @@ -1680,10 +1678,10 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer, /* Some queries are done with shaders, so we need to have them flush * high level caches writes. The L3 should be shared across the GPU. */ - if (pool->type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR || - pool->type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR || - pool->type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR || - pool->type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR) { + if (pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR || + pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR || + pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR || + pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR) { needed_flushes |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT; } /* And we need to stall for previous CS writes to land or the flushes to @@ -1703,8 +1701,8 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer, * previous uses of vkCmdResetQueryPool in the same queue, without any * additional synchronization." */ - if (pool->type == VK_QUERY_TYPE_OCCLUSION || - pool->type == VK_QUERY_TYPE_TIMESTAMP) + if (pool->vk.query_type == VK_QUERY_TYPE_OCCLUSION || + pool->vk.query_type == VK_QUERY_TYPE_TIMESTAMP) needed_flushes |= ANV_PIPE_CS_STALL_BIT; if (needed_flushes) { @@ -1736,7 +1734,7 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer, uint32_t num_items = 1; uint32_t data_offset = 8 /* behind availability */; - switch (pool->type) { + switch (pool->vk.query_type) { case VK_QUERY_TYPE_OCCLUSION: case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: copy_flags |= ANV_COPY_QUERY_FLAG_DELTA; @@ -1749,7 +1747,7 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer, break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: - num_items = util_bitcount(pool->pipeline_statistics); + num_items = util_bitcount(pool->vk.pipeline_statistics); copy_flags |= ANV_COPY_QUERY_FLAG_DELTA; break; -- 2.7.4