radv: Implement more efficient !waitAll fence waiting.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Mon, 26 Feb 2018 22:48:27 +0000 (23:48 +0100)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Thu, 1 Mar 2018 00:07:18 +0000 (01:07 +0100)
Reviewed-by: Dave Airlie <airlied@redhat.com>
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_radeon_winsys.h
src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c

index 8eadd8f..21ccfa6 100644 (file)
@@ -2907,6 +2907,17 @@ static uint64_t radv_get_absolute_timeout(uint64_t timeout)
        return current_time + timeout;
 }
 
+
+static bool radv_all_fences_plain_and_submitted(uint32_t fenceCount, const VkFence *pFences)
+{
+       for (uint32_t i = 0; i < fenceCount; ++i) {
+               RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
+               if (fence->syncobj || fence->temp_syncobj || (!fence->signalled && !fence->submitted))
+                       return false;
+       }
+       return true;
+}
+
 VkResult radv_WaitForFences(
        VkDevice                                    _device,
        uint32_t                                    fenceCount,
@@ -2918,6 +2929,31 @@ VkResult radv_WaitForFences(
        timeout = radv_get_absolute_timeout(timeout);
 
        if (!waitAll && fenceCount > 1) {
+               /* Not doing this by default for waitAll, due to needing to allocate twice. */
+               if (device->physical_device->rad_info.drm_minor >= 10 && radv_all_fences_plain_and_submitted(fenceCount, pFences)) {
+                       uint32_t wait_count = 0;
+                       struct radeon_winsys_fence **fences = malloc(sizeof(struct radeon_winsys_fence *) * fenceCount);
+                       if (!fences)
+                               return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+                       for (uint32_t i = 0; i < fenceCount; ++i) {
+                               RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
+
+                               if (fence->signalled) {
+                                       free(fences);
+                                       return VK_SUCCESS;
+                               }
+
+                               fences[wait_count++] = fence->fence;
+                       }
+
+                       bool success = device->ws->fences_wait(device->ws, fences, wait_count,
+                                                              waitAll, timeout - radv_get_current_time());
+
+                       free(fences);
+                       return success ? VK_SUCCESS : VK_TIMEOUT;
+               }
+
                while(radv_get_current_time() <= timeout) {
                        for (uint32_t i = 0; i < fenceCount; ++i) {
                                if (radv_GetFenceStatus(_device, pFences[i]) == VK_SUCCESS)
index 4c30669..643d76a 100644 (file)
@@ -270,6 +270,11 @@ struct radeon_winsys {
                           struct radeon_winsys_fence *fence,
                           bool absolute,
                           uint64_t timeout);
+       bool (*fences_wait)(struct radeon_winsys *ws,
+                           struct radeon_winsys_fence *const *fences,
+                           uint32_t fence_count,
+                           bool wait_all,
+                           uint64_t timeout);
 
        /* old semaphores - non shareable */
        struct radeon_winsys_sem *(*create_sem)(struct radeon_winsys *ws);
index 5632b1d..d2b3354 100644 (file)
@@ -154,6 +154,39 @@ static bool radv_amdgpu_fence_wait(struct radeon_winsys *_ws,
        return false;
 }
 
+
+static bool radv_amdgpu_fences_wait(struct radeon_winsys *_ws,
+                             struct radeon_winsys_fence *const *_fences,
+                             uint32_t fence_count,
+                             bool wait_all,
+                             uint64_t timeout)
+{
+       struct amdgpu_cs_fence *fences = malloc(sizeof(struct amdgpu_cs_fence) * fence_count);
+       int r;
+       uint32_t expired = 0, first = 0;
+
+       if (!fences)
+               return false;
+
+       for (uint32_t i = 0; i < fence_count; ++i)
+               fences[i] = ((struct radv_amdgpu_fence *)_fences[i])->fence;
+
+       /* Now use the libdrm query. */
+       r = amdgpu_cs_wait_fences(fences, fence_count, wait_all,
+                                 timeout, &expired, &first);
+
+       free(fences);
+       if (r) {
+               fprintf(stderr, "amdgpu: amdgpu_cs_wait_fences failed.\n");
+               return false;
+       }
+
+       if (expired)
+               return true;
+
+       return false;
+}
+
 static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs)
 {
        struct radv_amdgpu_cs *cs = radv_amdgpu_cs(rcs);
@@ -1387,4 +1420,5 @@ void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
        ws->base.export_syncobj_to_sync_file = radv_amdgpu_export_syncobj_to_sync_file;
        ws->base.import_syncobj_from_sync_file = radv_amdgpu_import_syncobj_from_sync_file;
        ws->base.fence_wait = radv_amdgpu_fence_wait;
+       ws->base.fences_wait = radv_amdgpu_fences_wait;
 }