radv: Add pstate locking for perfcounters.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Sun, 8 May 2022 13:50:22 +0000 (15:50 +0200)
committerMarge Bot <emma+marge@anholt.net>
Sat, 9 Jul 2022 12:29:05 +0000 (12:29 +0000)
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16879>

src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_private.h
src/amd/vulkan/radv_sqtt.c

index 745bccb..0758dd0 100644 (file)
@@ -3297,6 +3297,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
    device->instance = physical_device->instance;
    device->physical_device = physical_device;
    simple_mtx_init(&device->trace_mtx, mtx_plain);
+   simple_mtx_init(&device->pstate_mtx, mtx_plain);
 
    device->ws = physical_device->ws;
    vk_device_set_drm_fd(&device->vk, device->ws->get_fd(device->ws));
@@ -3564,6 +3565,7 @@ fail:
          device->ws->ctx_destroy(device->hw_ctx[i]);
    }
 
+   simple_mtx_destroy(&device->pstate_mtx);
    simple_mtx_destroy(&device->trace_mtx);
    mtx_destroy(&device->overallocation_mutex);
 
@@ -3605,6 +3607,7 @@ radv_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
    }
 
    mtx_destroy(&device->overallocation_mutex);
+   simple_mtx_destroy(&device->pstate_mtx);
    simple_mtx_destroy(&device->trace_mtx);
 
    radv_device_finish_meta(device);
@@ -6884,3 +6887,65 @@ radv_GetPhysicalDeviceFragmentShadingRatesKHR(
 
    return vk_outarray_status(&out);
 }
+
+static bool
+radv_thread_trace_set_pstate(struct radv_device *device, bool enable)
+{
+   struct radeon_winsys *ws = device->ws;
+   enum radeon_ctx_pstate pstate = enable ? RADEON_CTX_PSTATE_PEAK : RADEON_CTX_PSTATE_NONE;
+
+   if (device->physical_device->rad_info.has_stable_pstate) {
+      for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
+         for (unsigned q = 0; q < device->queue_count[i]; q++) {
+            struct radv_queue *queue = &device->queues[i][q];
+
+            if (ws->ctx_set_pstate(queue->hw_ctx, pstate) < 0)
+               return false;
+         }
+      }
+   }
+
+   return true;
+}
+
+bool
+radv_device_acquire_performance_counters(struct radv_device *device)
+{
+   bool result = true;
+   simple_mtx_lock(&device->pstate_mtx);
+
+   if (device->pstate_cnt == 0) {
+      result = radv_thread_trace_set_pstate(device, true);
+      if (result)
+         ++device->pstate_cnt;
+   }
+
+   simple_mtx_unlock(&device->pstate_mtx);
+   return result;
+}
+
+void
+radv_device_release_performance_counters(struct radv_device *device)
+{
+   simple_mtx_lock(&device->pstate_mtx);
+
+   if (--device->pstate_cnt == 0)
+      radv_thread_trace_set_pstate(device, false);
+
+   simple_mtx_unlock(&device->pstate_mtx);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+radv_AcquireProfilingLockKHR(VkDevice _device, const VkAcquireProfilingLockInfoKHR *pInfo)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   bool result = radv_device_acquire_performance_counters(device);
+   return result ? VK_SUCCESS : VK_ERROR_UNKNOWN;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+radv_ReleaseProfilingLockKHR(VkDevice _device)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   radv_device_release_performance_counters(device);
+}
index 9ef009e..1c3a51e 100644 (file)
@@ -908,8 +908,14 @@ struct radv_device {
 
    /* Whether shaders created through application entrypoints are considered internal. */
    bool app_shaders_internal;
+
+   simple_mtx_t pstate_mtx;
+   unsigned pstate_cnt;
 };
 
+bool radv_device_acquire_performance_counters(struct radv_device *device);
+void radv_device_release_performance_counters(struct radv_device *device);
+
 struct radv_device_memory {
    struct vk_object_base base;
    struct radeon_winsys_bo *bo;
index ecd5b36..f2bc8d2 100644 (file)
@@ -448,26 +448,6 @@ radv_thread_trace_finish_bo(struct radv_device *device)
    }
 }
 
-static int
-radv_thread_trace_init_pstate(struct radv_device *device)
-{
-   struct radeon_winsys *ws = device->ws;
-
-   if (device->physical_device->rad_info.has_stable_pstate) {
-      for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
-         for (unsigned q = 0; q < device->queue_count[i]; q++) {
-            struct radv_queue *queue = &device->queues[i][q];
-
-            /* Set the current pstate to peak which is required for profiling. */
-            if (ws->ctx_set_pstate(queue->hw_ctx, RADEON_CTX_PSTATE_PEAK) < 0)
-               return false;
-         }
-      }
-   }
-
-   return true;
-}
-
 bool
 radv_thread_trace_init(struct radv_device *device)
 {
@@ -485,7 +465,7 @@ radv_thread_trace_init(struct radv_device *device)
    if (!radv_thread_trace_init_bo(device))
       return false;
 
-   if (!radv_thread_trace_init_pstate(device))
+   if (!radv_device_acquire_performance_counters(device))
       return false;
 
    list_inithead(&thread_trace_data->rgp_pso_correlation.record);