From f7d8e64f51121b366a1f5c026bebbdab37fb0863 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Sun, 8 May 2022 15:50:22 +0200 Subject: [PATCH] radv: Add pstate locking for perfcounters. Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_device.c | 65 +++++++++++++++++++++++++++++++++++++++++++ src/amd/vulkan/radv_private.h | 6 ++++ src/amd/vulkan/radv_sqtt.c | 22 +-------------- 3 files changed, 72 insertions(+), 21 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 745bccb..0758dd0 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -3297,6 +3297,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr device->instance = physical_device->instance; device->physical_device = physical_device; simple_mtx_init(&device->trace_mtx, mtx_plain); + simple_mtx_init(&device->pstate_mtx, mtx_plain); device->ws = physical_device->ws; vk_device_set_drm_fd(&device->vk, device->ws->get_fd(device->ws)); @@ -3564,6 +3565,7 @@ fail: device->ws->ctx_destroy(device->hw_ctx[i]); } + simple_mtx_destroy(&device->pstate_mtx); simple_mtx_destroy(&device->trace_mtx); mtx_destroy(&device->overallocation_mutex); @@ -3605,6 +3607,7 @@ radv_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) } mtx_destroy(&device->overallocation_mutex); + simple_mtx_destroy(&device->pstate_mtx); simple_mtx_destroy(&device->trace_mtx); radv_device_finish_meta(device); @@ -6884,3 +6887,65 @@ radv_GetPhysicalDeviceFragmentShadingRatesKHR( return vk_outarray_status(&out); } + +static bool +radv_thread_trace_set_pstate(struct radv_device *device, bool enable) +{ + struct radeon_winsys *ws = device->ws; + enum radeon_ctx_pstate pstate = enable ? RADEON_CTX_PSTATE_PEAK : RADEON_CTX_PSTATE_NONE; + + if (device->physical_device->rad_info.has_stable_pstate) { + for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) { + for (unsigned q = 0; q < device->queue_count[i]; q++) { + struct radv_queue *queue = &device->queues[i][q]; + + if (ws->ctx_set_pstate(queue->hw_ctx, pstate) < 0) + return false; + } + } + } + + return true; +} + +bool +radv_device_acquire_performance_counters(struct radv_device *device) +{ + bool result = true; + simple_mtx_lock(&device->pstate_mtx); + + if (device->pstate_cnt == 0) { + result = radv_thread_trace_set_pstate(device, true); + if (result) + ++device->pstate_cnt; + } + + simple_mtx_unlock(&device->pstate_mtx); + return result; +} + +void +radv_device_release_performance_counters(struct radv_device *device) +{ + simple_mtx_lock(&device->pstate_mtx); + + if (--device->pstate_cnt == 0) + radv_thread_trace_set_pstate(device, false); + + simple_mtx_unlock(&device->pstate_mtx); +} + +VKAPI_ATTR VkResult VKAPI_CALL +radv_AcquireProfilingLockKHR(VkDevice _device, const VkAcquireProfilingLockInfoKHR *pInfo) +{ + RADV_FROM_HANDLE(radv_device, device, _device); + bool result = radv_device_acquire_performance_counters(device); + return result ? VK_SUCCESS : VK_ERROR_UNKNOWN; +} + +VKAPI_ATTR void VKAPI_CALL +radv_ReleaseProfilingLockKHR(VkDevice _device) +{ + RADV_FROM_HANDLE(radv_device, device, _device); + radv_device_release_performance_counters(device); +} diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 9ef009e..1c3a51e 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -908,8 +908,14 @@ struct radv_device { /* Whether shaders created through application entrypoints are considered internal. */ bool app_shaders_internal; + + simple_mtx_t pstate_mtx; + unsigned pstate_cnt; }; +bool radv_device_acquire_performance_counters(struct radv_device *device); +void radv_device_release_performance_counters(struct radv_device *device); + struct radv_device_memory { struct vk_object_base base; struct radeon_winsys_bo *bo; diff --git a/src/amd/vulkan/radv_sqtt.c b/src/amd/vulkan/radv_sqtt.c index ecd5b36..f2bc8d2 100644 --- a/src/amd/vulkan/radv_sqtt.c +++ b/src/amd/vulkan/radv_sqtt.c @@ -448,26 +448,6 @@ radv_thread_trace_finish_bo(struct radv_device *device) } } -static int -radv_thread_trace_init_pstate(struct radv_device *device) -{ - struct radeon_winsys *ws = device->ws; - - if (device->physical_device->rad_info.has_stable_pstate) { - for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) { - for (unsigned q = 0; q < device->queue_count[i]; q++) { - struct radv_queue *queue = &device->queues[i][q]; - - /* Set the current pstate to peak which is required for profiling. */ - if (ws->ctx_set_pstate(queue->hw_ctx, RADEON_CTX_PSTATE_PEAK) < 0) - return false; - } - } - } - - return true; -} - bool radv_thread_trace_init(struct radv_device *device) { @@ -485,7 +465,7 @@ radv_thread_trace_init(struct radv_device *device) if (!radv_thread_trace_init_bo(device)) return false; - if (!radv_thread_trace_init_pstate(device)) + if (!radv_device_acquire_performance_counters(device)) return false; list_inithead(&thread_trace_data->rgp_pso_correlation.record); -- 2.7.4