From 34bd5e2e2e8d9c213b051152f7a8b731151d9be5 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Mon, 26 Feb 2018 23:48:27 +0100 Subject: [PATCH] radv: Implement more efficient !waitAll fence waiting. Reviewed-by: Dave Airlie --- src/amd/vulkan/radv_device.c | 36 +++++++++++++++++++++++++++ src/amd/vulkan/radv_radeon_winsys.h | 5 ++++ src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 34 +++++++++++++++++++++++++ 3 files changed, 75 insertions(+) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 8eadd8f..21ccfa6 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -2907,6 +2907,17 @@ static uint64_t radv_get_absolute_timeout(uint64_t timeout) return current_time + timeout; } + +static bool radv_all_fences_plain_and_submitted(uint32_t fenceCount, const VkFence *pFences) +{ + for (uint32_t i = 0; i < fenceCount; ++i) { + RADV_FROM_HANDLE(radv_fence, fence, pFences[i]); + if (fence->syncobj || fence->temp_syncobj || (!fence->signalled && !fence->submitted)) + return false; + } + return true; +} + VkResult radv_WaitForFences( VkDevice _device, uint32_t fenceCount, @@ -2918,6 +2929,31 @@ VkResult radv_WaitForFences( timeout = radv_get_absolute_timeout(timeout); if (!waitAll && fenceCount > 1) { + /* Not doing this by default for waitAll, due to needing to allocate twice. */ + if (device->physical_device->rad_info.drm_minor >= 10 && radv_all_fences_plain_and_submitted(fenceCount, pFences)) { + uint32_t wait_count = 0; + struct radeon_winsys_fence **fences = malloc(sizeof(struct radeon_winsys_fence *) * fenceCount); + if (!fences) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + for (uint32_t i = 0; i < fenceCount; ++i) { + RADV_FROM_HANDLE(radv_fence, fence, pFences[i]); + + if (fence->signalled) { + free(fences); + return VK_SUCCESS; + } + + fences[wait_count++] = fence->fence; + } + + bool success = device->ws->fences_wait(device->ws, fences, wait_count, + waitAll, timeout - radv_get_current_time()); + + free(fences); + return success ? VK_SUCCESS : VK_TIMEOUT; + } + while(radv_get_current_time() <= timeout) { for (uint32_t i = 0; i < fenceCount; ++i) { if (radv_GetFenceStatus(_device, pFences[i]) == VK_SUCCESS) diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h index 4c30669..643d76a 100644 --- a/src/amd/vulkan/radv_radeon_winsys.h +++ b/src/amd/vulkan/radv_radeon_winsys.h @@ -270,6 +270,11 @@ struct radeon_winsys { struct radeon_winsys_fence *fence, bool absolute, uint64_t timeout); + bool (*fences_wait)(struct radeon_winsys *ws, + struct radeon_winsys_fence *const *fences, + uint32_t fence_count, + bool wait_all, + uint64_t timeout); /* old semaphores - non shareable */ struct radeon_winsys_sem *(*create_sem)(struct radeon_winsys *ws); diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index 5632b1d..d2b3354 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -154,6 +154,39 @@ static bool radv_amdgpu_fence_wait(struct radeon_winsys *_ws, return false; } + +static bool radv_amdgpu_fences_wait(struct radeon_winsys *_ws, + struct radeon_winsys_fence *const *_fences, + uint32_t fence_count, + bool wait_all, + uint64_t timeout) +{ + struct amdgpu_cs_fence *fences = malloc(sizeof(struct amdgpu_cs_fence) * fence_count); + int r; + uint32_t expired = 0, first = 0; + + if (!fences) + return false; + + for (uint32_t i = 0; i < fence_count; ++i) + fences[i] = ((struct radv_amdgpu_fence *)_fences[i])->fence; + + /* Now use the libdrm query. */ + r = amdgpu_cs_wait_fences(fences, fence_count, wait_all, + timeout, &expired, &first); + + free(fences); + if (r) { + fprintf(stderr, "amdgpu: amdgpu_cs_wait_fences failed.\n"); + return false; + } + + if (expired) + return true; + + return false; +} + static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs) { struct radv_amdgpu_cs *cs = radv_amdgpu_cs(rcs); @@ -1387,4 +1420,5 @@ void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws) ws->base.export_syncobj_to_sync_file = radv_amdgpu_export_syncobj_to_sync_file; ws->base.import_syncobj_from_sync_file = radv_amdgpu_import_syncobj_from_sync_file; ws->base.fence_wait = radv_amdgpu_fence_wait; + ws->base.fences_wait = radv_amdgpu_fences_wait; } -- 2.7.4