radv: Add helper BO for perf counters.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Mon, 9 May 2022 23:57:21 +0000 (01:57 +0200)
committerMarge Bot <emma+marge@anholt.net>
Sat, 9 Jul 2022 12:29:05 +0000 (12:29 +0000)
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16879>

src/amd/vulkan/radv_constants.h
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_private.h

index 1953a5d..38af1d4 100644 (file)
 #define RADV_SHADER_ALLOC_NUM_FREE_LISTS                                                           \
    (RADV_SHADER_ALLOC_MAX_SIZE_CLASS - RADV_SHADER_ALLOC_MIN_SIZE_CLASS + 1)
 
+#define PERF_CTR_MAX_PASSES      512
+#define PERF_CTR_BO_PASS_OFFSET  16
+#define PERF_CTR_BO_LOCK_OFFSET  0
+#define PERF_CTR_BO_FENCE_OFFSET 8
+
 #endif /* RADV_CONSTANTS_H */
index 0758dd0..fcd0c56 100644 (file)
@@ -3179,6 +3179,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
    bool global_bo_list = false;
    bool image_2d_view_of_3d = false;
    bool primitives_generated_query = false;
+   bool use_perf_counters = false;
 
    /* Check enabled features */
    if (pCreateInfo->pEnabledFeatures) {
@@ -3259,6 +3260,12 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
             primitives_generated_query = true;
          break;
       }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: {
+         const VkPhysicalDevicePerformanceQueryFeaturesKHR *features = (const void *)ext;
+         if (features->performanceCounterQueryPools)
+            use_perf_counters = true;
+         break;
+      }
       default:
          break;
       }
@@ -3533,9 +3540,21 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
               1 << util_logbase2(device->force_aniso));
    }
 
+   if (use_perf_counters) {
+      size_t bo_size = PERF_CTR_BO_PASS_OFFSET + sizeof(uint64_t) * PERF_CTR_MAX_PASSES;
+      result =
+         device->ws->buffer_create(device->ws, bo_size, 4096, RADEON_DOMAIN_GTT,
+                                   RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
+                                   RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->perf_counter_bo);
+      if (result != VK_SUCCESS)
+         goto fail_cache;
+   }
+
    *pDevice = radv_device_to_handle(device);
    return VK_SUCCESS;
 
+fail_cache:
+   radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
 fail_meta:
    radv_device_finish_meta(device);
 fail:
@@ -3546,6 +3565,8 @@ fail:
    radv_trap_handler_finish(device);
    radv_finish_trace(device);
 
+   if (device->perf_counter_bo)
+      device->ws->buffer_destroy(device->ws, device->perf_counter_bo);
    if (device->gfx_init)
       device->ws->buffer_destroy(device->ws, device->gfx_init);
 
@@ -3582,6 +3603,9 @@ radv_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
    if (!device)
       return;
 
+   if (device->perf_counter_bo)
+      device->ws->buffer_destroy(device->ws, device->perf_counter_bo);
+
    if (device->gfx_init)
       device->ws->buffer_destroy(device->ws, device->gfx_init);
 
index bfc1058..b778b58 100644 (file)
@@ -911,6 +911,13 @@ struct radv_device {
 
    simple_mtx_t pstate_mtx;
    unsigned pstate_cnt;
+
+   /* BO to contain some performance counter helpers:
+    * - A lock for profiling cmdbuffers.
+    * - a temporary fence for the end query synchronization.
+    * - the pass to use for profiling. (as an array of bools)
+    */
+   struct radeon_winsys_bo *perf_counter_bo;
 };
 
 bool radv_device_acquire_performance_counters(struct radv_device *device);