From 5c7e60362cbf26e331b5621fac643813558b6493 Mon Sep 17 00:00:00 2001 From: Juston Li Date: Mon, 6 Feb 2023 13:44:39 -0800 Subject: [PATCH] venus: enable timeline semaphore feedback At vkQueueSubmit time, for each batch with timeline semaphores to signal, append cmd_buffers with feedback cmds to update the counter value in its respective feedback slot. Since multiple signals on the same semaphore could be pending at the same time across batches/vkQueueSubmits, src slots and commands are allocated on demand. These src slots can be reused after they've been signaled (if the current semaphore counter is greater/equal than the src value) and are cleaned up on vkDestroySemaphore. Signed-off-by: Juston Li Part-of: --- src/virtio/vulkan/vn_feedback.c | 6 +- src/virtio/vulkan/vn_queue.c | 612 +++++++++++++++++++++++++++++++++++++--- src/virtio/vulkan/vn_queue.h | 32 +++ 3 files changed, 607 insertions(+), 43 deletions(-) diff --git a/src/virtio/vulkan/vn_feedback.c b/src/virtio/vulkan/vn_feedback.c index e7275ec..0fc5f84 100644 --- a/src/virtio/vulkan/vn_feedback.c +++ b/src/virtio/vulkan/vn_feedback.c @@ -418,7 +418,7 @@ vn_feedback_cmd_record(VkCommandBuffer cmd_handle, /* slot size is 8 bytes for timeline semaphore and 4 bytes fence. * src slot is non-null for timeline semaphore. */ - VkDeviceSize buf_size = src_slot ? 8 : 4; + const VkDeviceSize buf_size = src_slot ? 8 : 4; static const VkCommandBufferBeginInfo begin_info = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, @@ -503,7 +503,7 @@ vn_feedback_cmd_record(VkCommandBuffer cmd_handle, VkResult vn_feedback_cmd_alloc(VkDevice dev_handle, struct vn_feedback_cmd_pool *pool, - struct vn_feedback_slot *slot, + struct vn_feedback_slot *dst_slot, struct vn_feedback_slot *src_slot, VkCommandBuffer *out_cmd_handle) { @@ -522,7 +522,7 @@ vn_feedback_cmd_alloc(VkDevice dev_handle, if (result != VK_SUCCESS) goto out_unlock; - result = vn_feedback_cmd_record(cmd_handle, slot, src_slot); + result = vn_feedback_cmd_record(cmd_handle, dst_slot, src_slot); if (result != VK_SUCCESS) { vn_FreeCommandBuffers(dev_handle, pool->pool, 1, &cmd_handle); goto out_unlock; diff --git a/src/virtio/vulkan/vn_queue.c b/src/virtio/vulkan/vn_queue.c index 55da4fc..3d8f58e 100644 --- a/src/virtio/vulkan/vn_queue.c +++ b/src/virtio/vulkan/vn_queue.c @@ -59,21 +59,33 @@ struct vn_queue_submission { bool synchronous; bool has_feedback_fence; + bool has_feedback_semaphore; const struct vn_device_memory *wsi_mem; - + uint32_t sem_cmd_buffer_count; + + /* Temporary storage allocation for submission + * A single alloc for storage is performed and the offsets inside + * storage are set as below: + * batches + * - copy of SubmitInfos + * - an extra SubmitInfo for appending fence feedback + * cmds + * - copy of cmd buffers for any batch with sem feedback with + * additional cmd buffers for each signal semaphore that uses + * feedback + * - an extra cmd buffer info for appending fence feedback + * when using SubmitInfo2 + */ struct { void *storage; - /* ptr offset to cmd buffer info needed by SubmitInfo2 - * for fence feedback */ - VkCommandBufferSubmitInfo *fence_feedback_cmd_info; - - /* ptr offsets to batches in storage */ union { void *batches; VkSubmitInfo *submit_batches; VkSubmitInfo2 *submit_batches2; }; + + void *cmds; } temp; }; @@ -133,6 +145,53 @@ vn_get_signal_semaphore(struct vn_queue_submission *submit, .semaphore; } +static inline uint32_t +vn_get_cmd_buffer_count(struct vn_queue_submission *submit, + uint32_t batch_index) +{ + assert((submit->batch_type == VK_STRUCTURE_TYPE_SUBMIT_INFO) || + (submit->batch_type == VK_STRUCTURE_TYPE_SUBMIT_INFO_2)); + + return submit->batch_type == VK_STRUCTURE_TYPE_SUBMIT_INFO + ? submit->submit_batches[batch_index].commandBufferCount + : submit->submit_batches2[batch_index].commandBufferInfoCount; +} + +static inline const void * +vn_get_cmd_buffer_ptr(struct vn_queue_submission *submit, + uint32_t batch_index) +{ + assert((submit->batch_type == VK_STRUCTURE_TYPE_SUBMIT_INFO) || + (submit->batch_type == VK_STRUCTURE_TYPE_SUBMIT_INFO_2)); + + return submit->batch_type == VK_STRUCTURE_TYPE_SUBMIT_INFO + ? (const void *)submit->submit_batches[batch_index] + .pCommandBuffers + : (const void *)submit->submit_batches2[batch_index] + .pCommandBufferInfos; +} + +static uint64_t +vn_get_signal_semaphore_counter(struct vn_queue_submission *submit, + uint32_t batch_index, + uint32_t semaphore_index) +{ + switch (submit->batch_type) { + case VK_STRUCTURE_TYPE_SUBMIT_INFO: { + const struct VkTimelineSemaphoreSubmitInfo *timeline_semaphore_info = + vk_find_struct_const(submit->submit_batches[batch_index].pNext, + TIMELINE_SEMAPHORE_SUBMIT_INFO); + return timeline_semaphore_info->pSignalSemaphoreValues[semaphore_index]; + } + case VK_STRUCTURE_TYPE_SUBMIT_INFO_2: + return submit->submit_batches2[batch_index] + .pSignalSemaphoreInfos[semaphore_index] + .value; + default: + unreachable("unexpected batch type"); + } +} + static VkResult vn_queue_submission_fix_batch_semaphores(struct vn_queue_submission *submit, uint32_t batch_index) @@ -166,14 +225,27 @@ vn_queue_submission_fix_batch_semaphores(struct vn_queue_submission *submit, dev->instance, vn_device_to_handle(dev), &res_info); } + bool batch_has_sem_feedback = false; for (uint32_t i = 0; i < signal_count; i++) { struct vn_semaphore *sem = vn_semaphore_from_handle( vn_get_signal_semaphore(submit, batch_index, i)); /* see vn_queue_submission_prepare */ submit->synchronous |= sem->is_external; + + if (sem->feedback.slot) { + batch_has_sem_feedback = true; + submit->sem_cmd_buffer_count++; + } + } + + if (batch_has_sem_feedback) { + submit->sem_cmd_buffer_count += + vn_get_cmd_buffer_count(submit, batch_index); } + submit->has_feedback_semaphore |= batch_has_sem_feedback; + return VK_SUCCESS; } @@ -226,9 +298,9 @@ vn_queue_submission_alloc_storage(struct vn_queue_submission *submit) size_t batch_size = 0; size_t cmd_size = 0; size_t alloc_size = 0; - size_t batch_offset = 0; + size_t cmd_offset = 0; - if (!submit->has_feedback_fence) + if (!submit->has_feedback_fence && !submit->has_feedback_semaphore) return VK_SUCCESS; switch (submit->batch_type) { @@ -239,23 +311,32 @@ vn_queue_submission_alloc_storage(struct vn_queue_submission *submit) case VK_STRUCTURE_TYPE_SUBMIT_INFO_2: batch_size = sizeof(VkSubmitInfo2); cmd_size = sizeof(VkCommandBufferSubmitInfo); + break; + default: + unreachable("unexpected batch type"); + } + + /* space for copied batches */ + alloc_size = batch_size * submit->batch_count; + cmd_offset = alloc_size; + + if (submit->has_feedback_fence) { + /* add space for an additional batch for fence feedback + * and move cmd offset + */ + alloc_size += batch_size; + cmd_offset = alloc_size; /* SubmitInfo2 needs a cmd buffer info struct for the fence * feedback cmd */ - if (submit->has_feedback_fence) + if (submit->batch_type == VK_STRUCTURE_TYPE_SUBMIT_INFO_2) alloc_size += cmd_size; - - break; - default: - unreachable("unexpected batch type"); } - /* offset/size for batches */ - batch_offset = alloc_size; - alloc_size += batch_size * submit->batch_count; - /* add space for an additional batch for fence feedback */ - if (submit->has_feedback_fence) - alloc_size += batch_size; + + /* space for copied cmds and sem feedback cmds */ + if (submit->has_feedback_semaphore) + alloc_size += submit->sem_cmd_buffer_count * cmd_size; submit->temp.storage = vk_alloc(alloc, alloc_size, VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); @@ -263,8 +344,207 @@ vn_queue_submission_alloc_storage(struct vn_queue_submission *submit) if (!submit->temp.storage) return VK_ERROR_OUT_OF_HOST_MEMORY; - submit->temp.fence_feedback_cmd_info = submit->temp.storage; - submit->temp.batches = submit->temp.storage + batch_offset; + submit->temp.batches = submit->temp.storage; + submit->temp.cmds = submit->temp.storage + cmd_offset; + + return VK_SUCCESS; +} + +struct vn_feedback_src { + struct vn_feedback_slot *src_slot; + VkCommandBuffer *commands; + + struct list_head head; +}; + +static VkResult +vn_timeline_semaphore_feedback_src_init(struct vn_device *dev, + struct vn_feedback_slot *slot, + struct vn_feedback_src *feedback_src, + const VkAllocationCallbacks *alloc) +{ + VkResult result; + VkDevice dev_handle = vn_device_to_handle(dev); + + feedback_src->src_slot = vn_feedback_pool_alloc( + &dev->feedback_pool, VN_FEEDBACK_TYPE_TIMELINE_SEMAPHORE); + + if (!feedback_src->src_slot) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + feedback_src->commands = vk_zalloc( + alloc, sizeof(feedback_src->commands) * dev->queue_family_count, + VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (!feedback_src->commands) { + vn_feedback_pool_free(&dev->feedback_pool, feedback_src->src_slot); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + for (uint32_t i = 0; i < dev->queue_family_count; i++) { + result = vn_feedback_cmd_alloc(dev_handle, &dev->cmd_pools[i], slot, + feedback_src->src_slot, + &feedback_src->commands[i]); + if (result != VK_SUCCESS) { + for (uint32_t j = 0; j < i; j++) { + vn_feedback_cmd_free(dev_handle, &dev->cmd_pools[j], + feedback_src->commands[j]); + } + vk_free(alloc, feedback_src->commands); + vn_feedback_pool_free(&dev->feedback_pool, feedback_src->src_slot); + return result; + } + } + + return VK_SUCCESS; +} + +static VkResult +vn_set_sem_feedback_cmd(struct vn_queue *queue, + struct vn_semaphore *sem, + uint64_t counter, + VkCommandBuffer *cmd_handle) +{ + VkResult result; + struct vn_device *dev = queue->device; + const VkAllocationCallbacks *alloc = &dev->base.base.alloc; + struct vn_feedback_src *free_feedback_src = NULL; + + assert(sem->feedback.slot); + + simple_mtx_lock(&sem->feedback.src_lists_mtx); + if (!list_is_empty(&sem->feedback.free_src_list)) { + free_feedback_src = list_first_entry(&sem->feedback.free_src_list, + struct vn_feedback_src, head); + list_move_to(&free_feedback_src->head, &sem->feedback.pending_src_list); + } + simple_mtx_unlock(&sem->feedback.src_lists_mtx); + + if (!free_feedback_src) { + /* allocate a new src slot if none are free */ + free_feedback_src = + vk_zalloc(alloc, sizeof(*free_feedback_src), VN_DEFAULT_ALIGN, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (!free_feedback_src) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + result = vn_timeline_semaphore_feedback_src_init( + dev, sem->feedback.slot, free_feedback_src, alloc); + if (result != VK_SUCCESS) { + vk_free(alloc, free_feedback_src); + return result; + } + + simple_mtx_lock(&sem->feedback.src_lists_mtx); + list_add(&free_feedback_src->head, &sem->feedback.pending_src_list); + simple_mtx_unlock(&sem->feedback.src_lists_mtx); + } + + vn_feedback_set_counter(free_feedback_src->src_slot, counter); + + for (uint32_t i = 0; i < queue->device->queue_family_count; i++) { + if (queue->device->queue_families[i] == queue->family) { + *cmd_handle = free_feedback_src->commands[i]; + return VK_SUCCESS; + } + } + + unreachable("bad feedback sem"); +} + +struct vn_feedback_cmds { + union { + void *cmds; + VkCommandBuffer *cmd_buffers; + VkCommandBufferSubmitInfo *cmd_buffer_infos; + }; +}; + +static inline VkCommandBuffer * +vn_get_cmd_handle(struct vn_queue_submission *submit, + struct vn_feedback_cmds *feedback_cmds, + uint32_t cmd_index) +{ + assert((submit->batch_type == VK_STRUCTURE_TYPE_SUBMIT_INFO) || + (submit->batch_type == VK_STRUCTURE_TYPE_SUBMIT_INFO_2)); + + return submit->batch_type == VK_STRUCTURE_TYPE_SUBMIT_INFO + ? &feedback_cmds->cmd_buffers[cmd_index] + : &feedback_cmds->cmd_buffer_infos[cmd_index].commandBuffer; +} + +static VkResult +vn_queue_submission_add_semaphore_feedback( + struct vn_queue_submission *submit, + uint32_t batch_index, + uint32_t cmd_buffer_count, + uint32_t sem_feedback_count, + struct vn_feedback_cmds *feedback_cmds) +{ + struct vn_queue *queue = vn_queue_from_handle(submit->queue_handle); + uint32_t signal_semaphore_count = + vn_get_signal_semaphore_count(submit, batch_index); + VkResult result; + + /* Update SubmitInfo to use our copy of cmd buffers with sem feedback cmds + * appended and update the cmd buffer count. + * SubmitInfo2 also needs to initialize the cmd buffer info struct. + */ + switch (submit->batch_type) { + case VK_STRUCTURE_TYPE_SUBMIT_INFO: { + VkSubmitInfo *submit_info = &submit->temp.submit_batches[batch_index]; + + submit_info->pCommandBuffers = feedback_cmds->cmd_buffers; + submit_info->commandBufferCount = cmd_buffer_count + sem_feedback_count; + break; + } + case VK_STRUCTURE_TYPE_SUBMIT_INFO_2: { + VkSubmitInfo2 *submit_info2 = + &submit->temp.submit_batches2[batch_index]; + + for (uint32_t i = cmd_buffer_count; + i < cmd_buffer_count + sem_feedback_count; i++) { + VkCommandBufferSubmitInfo *cmd_buffer_info = + &feedback_cmds->cmd_buffer_infos[i]; + + cmd_buffer_info->sType = + VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO; + cmd_buffer_info->pNext = NULL; + cmd_buffer_info->deviceMask = 0; + } + + submit_info2->pCommandBufferInfos = feedback_cmds->cmd_buffer_infos; + submit_info2->commandBufferInfoCount = + cmd_buffer_count + sem_feedback_count; + break; + } + default: + unreachable("unexpected batch type"); + } + + /* Set the sem feedback cmds we appended in our copy of cmd buffers + * with cmds to write the signal value. + */ + uint32_t cmd_index = cmd_buffer_count; + for (uint32_t i = 0; i < signal_semaphore_count; i++) { + struct vn_semaphore *sem = vn_semaphore_from_handle( + vn_get_signal_semaphore(submit, batch_index, i)); + + if (sem->feedback.slot) { + VkCommandBuffer *cmd_handle = + vn_get_cmd_handle(submit, feedback_cmds, cmd_index); + + uint64_t counter = + vn_get_signal_semaphore_counter(submit, batch_index, i); + + result = vn_set_sem_feedback_cmd(queue, sem, counter, cmd_handle); + if (result != VK_SUCCESS) + return result; + + cmd_index++; + } + } return VK_SUCCESS; } @@ -283,7 +563,9 @@ vn_get_fence_feedback_cmd(struct vn_queue *queue, struct vn_fence *fence) } static void -vn_queue_submission_add_fence_feedback(struct vn_queue_submission *submit) +vn_queue_submission_add_fence_feedback( + struct vn_queue_submission *submit, + VkCommandBufferSubmitInfo *fence_feedback_cmd) { struct vn_queue *queue = vn_queue_from_handle(submit->queue_handle); struct vn_fence *fence = vn_fence_from_handle(submit->fence_handle); @@ -307,10 +589,7 @@ vn_queue_submission_add_fence_feedback(struct vn_queue_submission *submit) break; } case VK_STRUCTURE_TYPE_SUBMIT_INFO_2: { - VkCommandBufferSubmitInfo *cmd_buffer_info = - submit->temp.fence_feedback_cmd_info; - - *cmd_buffer_info = (VkCommandBufferSubmitInfo){ + *fence_feedback_cmd = (VkCommandBufferSubmitInfo){ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, .commandBuffer = *cmd_handle, }; @@ -321,7 +600,7 @@ vn_queue_submission_add_fence_feedback(struct vn_queue_submission *submit) *submit_info2 = (VkSubmitInfo2){ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, .commandBufferInfoCount = 1, - .pCommandBufferInfos = cmd_buffer_info, + .pCommandBufferInfos = fence_feedback_cmd, }; break; } @@ -335,17 +614,21 @@ vn_queue_submission_add_fence_feedback(struct vn_queue_submission *submit) static VkResult vn_queue_submission_setup_batches(struct vn_queue_submission *submit) { + VkResult result; size_t batch_size = 0; + size_t cmd_size = 0; - if (!submit->has_feedback_fence) + if (!submit->has_feedback_fence && !submit->has_feedback_semaphore) return VK_SUCCESS; switch (submit->batch_type) { case VK_STRUCTURE_TYPE_SUBMIT_INFO: batch_size = sizeof(VkSubmitInfo); + cmd_size = sizeof(VkCommandBuffer); break; case VK_STRUCTURE_TYPE_SUBMIT_INFO_2: batch_size = sizeof(VkSubmitInfo2); + cmd_size = sizeof(VkCommandBufferSubmitInfo); break; default: unreachable("unexpected batch type"); @@ -361,8 +644,55 @@ vn_queue_submission_setup_batches(struct vn_queue_submission *submit) batch_size * submit->batch_count); } - if (submit->has_feedback_fence) - vn_queue_submission_add_fence_feedback(submit); + /* For any batches with semaphore feedback, copy the original + * cmd_buffer handles and append feedback cmds. + */ + uint32_t cmd_offset = 0; + for (uint32_t batch_index = 0; batch_index < submit->batch_count; + batch_index++) { + uint32_t cmd_buffer_count = + vn_get_cmd_buffer_count(submit, batch_index); + uint32_t signal_count = + vn_get_signal_semaphore_count(submit, batch_index); + + uint32_t sem_feedback_count = 0; + for (uint32_t i = 0; i < signal_count; i++) { + struct vn_semaphore *sem = vn_semaphore_from_handle( + vn_get_signal_semaphore(submit, batch_index, i)); + + if (sem->feedback.slot) + sem_feedback_count++; + } + + if (sem_feedback_count) { + struct vn_feedback_cmds feedback_cmds = { + .cmds = submit->temp.cmds + cmd_offset, + }; + + size_t cmd_buffer_size = cmd_buffer_count * cmd_size; + /* copy only needed for non-empty batches */ + if (cmd_buffer_size) { + memcpy(feedback_cmds.cmds, + vn_get_cmd_buffer_ptr(submit, batch_index), + cmd_buffer_size); + } + + result = vn_queue_submission_add_semaphore_feedback( + submit, batch_index, cmd_buffer_count, sem_feedback_count, + &feedback_cmds); + if (result != VK_SUCCESS) + return result; + + /* Set offset to next batches cmd_buffers */ + cmd_offset += cmd_buffer_size + (sem_feedback_count * cmd_size); + } + } + + if (submit->has_feedback_fence) { + VkCommandBufferSubmitInfo *fence_feedback_cmd = + submit->temp.cmds + cmd_offset; + vn_queue_submission_add_fence_feedback(submit, fence_feedback_cmd); + } submit->submit_batches = submit->temp.submit_batches; @@ -370,12 +700,67 @@ vn_queue_submission_setup_batches(struct vn_queue_submission *submit) } static void +vn_queue_sem_recycle_src_feedback(VkDevice dev_handle, VkSemaphore sem_handle) +{ + + struct vn_semaphore *sem = vn_semaphore_from_handle(sem_handle); + + if (!sem->feedback.slot) + return; + + uint64_t curr_counter = 0; + vn_GetSemaphoreCounterValue(dev_handle, sem_handle, &curr_counter); + + /* search pending src list for already signaled values*/ + simple_mtx_lock(&sem->feedback.src_lists_mtx); + list_for_each_entry_safe(struct vn_feedback_src, feedback_src, + &sem->feedback.pending_src_list, head) { + if (curr_counter >= vn_feedback_get_counter(feedback_src->src_slot)) { + list_move_to(&feedback_src->head, &sem->feedback.free_src_list); + } + } + simple_mtx_unlock(&sem->feedback.src_lists_mtx); +} + +static void +vn_queue_recycle_src_feedback(struct vn_queue_submission *submit) +{ + struct vn_queue *queue = vn_queue_from_handle(submit->queue_handle); + struct vn_device *dev = queue->device; + VkDevice dev_handle = vn_device_to_handle(dev); + + for (uint32_t batch_index = 0; batch_index < submit->batch_count; + batch_index++) { + + uint32_t wait_count = vn_get_wait_semaphore_count(submit, batch_index); + uint32_t signal_count = + vn_get_signal_semaphore_count(submit, batch_index); + + for (uint32_t i = 0; i < wait_count; i++) { + VkSemaphore sem_handle = + vn_get_wait_semaphore(submit, batch_index, i); + vn_queue_sem_recycle_src_feedback(dev_handle, sem_handle); + } + + for (uint32_t i = 0; i < signal_count; i++) { + VkSemaphore sem_handle = + vn_get_signal_semaphore(submit, batch_index, i); + vn_queue_sem_recycle_src_feedback(dev_handle, sem_handle); + } + } +} + +static void vn_queue_submission_cleanup(struct vn_queue_submission *submit) { struct vn_queue *queue = vn_queue_from_handle(submit->queue_handle); const VkAllocationCallbacks *alloc = &queue->device->base.base.alloc; - if (submit->has_feedback_fence) + /* TODO clean up pending src feedbacks on failure? */ + if (submit->has_feedback_semaphore) + vn_queue_recycle_src_feedback(submit); + + if (submit->has_feedback_fence || submit->has_feedback_semaphore) vk_free(alloc, submit->temp.storage); } @@ -816,8 +1201,8 @@ vn_GetFenceStatus(VkDevice device, VkFence _fence) if (result == VK_SUCCESS) { /* When fence feedback slot gets signaled, the real fence * signal operation follows after but the signaling isr can be - * deferred or preempted. To avoid theoretical racing, we let - * the renderer wait for the fence. This also helps resolve + * deferred or preempted. To avoid racing, we let the + * renderer wait for the fence. This also helps resolve * synchronization validation errors, because the layer no * longer sees any fence status checks and falsely believes the * caller does not sync. @@ -1094,6 +1479,84 @@ vn_semaphore_signal_wsi(struct vn_device *dev, struct vn_semaphore *sem) sem->payload = temp; } +static VkResult +vn_timeline_semaphore_feedback_init(struct vn_device *dev, + struct vn_semaphore *sem, + uint64_t initial_value, + const VkAllocationCallbacks *alloc) +{ + struct vn_feedback_slot *slot; + + assert(sem->type == VK_SEMAPHORE_TYPE_TIMELINE); + + if (sem->is_external) + return VK_SUCCESS; + + if (VN_PERF(NO_TIMELINE_SEM_FEEDBACK)) + return VK_SUCCESS; + + slot = vn_feedback_pool_alloc(&dev->feedback_pool, + VN_FEEDBACK_TYPE_TIMELINE_SEMAPHORE); + if (!slot) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + list_inithead(&sem->feedback.pending_src_list); + list_inithead(&sem->feedback.free_src_list); + + vn_feedback_set_counter(slot, initial_value); + + simple_mtx_init(&sem->feedback.src_lists_mtx, mtx_plain); + simple_mtx_init(&sem->feedback.async_wait_mtx, mtx_plain); + + sem->feedback.signaled_counter = initial_value; + sem->feedback.slot = slot; + + return VK_SUCCESS; +} + +static void +vn_timeline_semaphore_feedback_free(struct vn_device *dev, + struct vn_feedback_src *feedback_src) +{ + VkDevice dev_handle = vn_device_to_handle(dev); + const VkAllocationCallbacks *alloc = &dev->base.base.alloc; + + for (uint32_t i = 0; i < dev->queue_family_count; i++) { + vn_feedback_cmd_free(dev_handle, &dev->cmd_pools[i], + feedback_src->commands[i]); + } + vk_free(alloc, feedback_src->commands); + + vn_feedback_pool_free(&dev->feedback_pool, feedback_src->src_slot); + /* feedback_src was allocated laziy at submission time using the + * device level alloc, not the vkCreateSemaphore passed alloc + */ + vk_free(alloc, feedback_src); +} + +static void +vn_timeline_semaphore_feedback_fini(struct vn_device *dev, + struct vn_semaphore *sem) +{ + if (!sem->feedback.slot) + return; + + list_for_each_entry_safe(struct vn_feedback_src, feedback_src, + &sem->feedback.free_src_list, head) { + vn_timeline_semaphore_feedback_free(dev, feedback_src); + } + + list_for_each_entry_safe(struct vn_feedback_src, feedback_src, + &sem->feedback.pending_src_list, head) { + vn_timeline_semaphore_feedback_free(dev, feedback_src); + } + + simple_mtx_destroy(&sem->feedback.src_lists_mtx); + simple_mtx_destroy(&sem->feedback.async_wait_mtx); + + vn_feedback_pool_free(&dev->feedback_pool, sem->feedback.slot); +} + VkResult vn_CreateSemaphore(VkDevice device, const VkSemaphoreCreateInfo *pCreateInfo, @@ -1127,10 +1590,14 @@ vn_CreateSemaphore(VkDevice device, sem->is_external = export_info && export_info->handleTypes; VkResult result = vn_semaphore_init_payloads(dev, sem, initial_val, alloc); - if (result != VK_SUCCESS) { - vn_object_base_fini(&sem->base); - vk_free(alloc, sem); - return vn_error(dev->instance, result); + if (result != VK_SUCCESS) + goto out_object_base_fini; + + if (sem->type == VK_SEMAPHORE_TYPE_TIMELINE) { + result = + vn_timeline_semaphore_feedback_init(dev, sem, initial_val, alloc); + if (result != VK_SUCCESS) + goto out_payloads_fini; } VkSemaphore sem_handle = vn_semaphore_to_handle(sem); @@ -1140,6 +1607,15 @@ vn_CreateSemaphore(VkDevice device, *pSemaphore = sem_handle; return VK_SUCCESS; + +out_payloads_fini: + vn_sync_payload_release(dev, &sem->permanent); + vn_sync_payload_release(dev, &sem->temporary); + +out_object_base_fini: + vn_object_base_fini(&sem->base); + vk_free(alloc, sem); + return vn_error(dev->instance, result); } void @@ -1158,6 +1634,9 @@ vn_DestroySemaphore(VkDevice device, vn_async_vkDestroySemaphore(dev->instance, device, semaphore, NULL); + if (sem->type == VK_SEMAPHORE_TYPE_TIMELINE) + vn_timeline_semaphore_feedback_fini(dev, sem); + vn_sync_payload_release(dev, &sem->permanent); vn_sync_payload_release(dev, &sem->temporary); @@ -1170,14 +1649,53 @@ vn_GetSemaphoreCounterValue(VkDevice device, VkSemaphore semaphore, uint64_t *pValue) { - VN_TRACE_FUNC(); struct vn_device *dev = vn_device_from_handle(device); struct vn_semaphore *sem = vn_semaphore_from_handle(semaphore); ASSERTED struct vn_sync_payload *payload = sem->payload; assert(payload->type == VN_SYNC_TYPE_DEVICE_ONLY); - return vn_call_vkGetSemaphoreCounterValue(dev->instance, device, semaphore, - pValue); + + if (sem->feedback.slot) { + simple_mtx_lock(&sem->feedback.async_wait_mtx); + + *pValue = vn_feedback_get_counter(sem->feedback.slot); + + if (sem->feedback.signaled_counter < *pValue) { + /* When the timeline semaphore feedback slot gets signaled, the real + * semaphore signal operation follows after but the signaling isr can + * be deferred or preempted. To avoid racing, we let the renderer + * wait for the semaphore by sending an asynchronous wait call for + * the feedback value. + * We also cache the counter value to only send the async call once + * per counter value to prevent spamming redundant async wait calls. + * The cached counter value requires a lock to ensure multiple + * threads querying for the same value are guaranteed to encode after + * the async wait call. + * + * This also helps resolve synchronization validation errors, because + * the layer no longer sees any semaphore status checks and falsely + * believes the caller does not sync. + */ + VkSemaphoreWaitInfo wait_info = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, + .pNext = NULL, + .flags = 0, + .semaphoreCount = 1, + .pSemaphores = &semaphore, + .pValues = pValue, + }; + + vn_async_vkWaitSemaphores(dev->instance, device, &wait_info, + UINT64_MAX); + sem->feedback.signaled_counter = *pValue; + } + simple_mtx_unlock(&sem->feedback.async_wait_mtx); + + return VK_SUCCESS; + } else { + return vn_call_vkGetSemaphoreCounterValue(dev->instance, device, + semaphore, pValue); + } } VkResult @@ -1185,6 +1703,8 @@ vn_SignalSemaphore(VkDevice device, const VkSemaphoreSignalInfo *pSignalInfo) { VN_TRACE_FUNC(); struct vn_device *dev = vn_device_from_handle(device); + struct vn_semaphore *sem = + vn_semaphore_from_handle(pSignalInfo->semaphore); /* TODO if the semaphore is shared-by-ref, this needs to be synchronous */ if (false) @@ -1192,6 +1712,18 @@ vn_SignalSemaphore(VkDevice device, const VkSemaphoreSignalInfo *pSignalInfo) else vn_async_vkSignalSemaphore(dev->instance, device, pSignalInfo); + if (sem->feedback.slot) { + simple_mtx_lock(&sem->feedback.async_wait_mtx); + + vn_feedback_set_counter(sem->feedback.slot, pSignalInfo->value); + /* Update async counters. Since we're signaling, we're aligned with + * the renderer. + */ + sem->feedback.signaled_counter = pSignalInfo->value; + + simple_mtx_unlock(&sem->feedback.async_wait_mtx); + } + return VK_SUCCESS; } diff --git a/src/virtio/vulkan/vn_queue.h b/src/virtio/vulkan/vn_queue.h index e34ab98..cdfc46b 100644 --- a/src/virtio/vulkan/vn_queue.h +++ b/src/virtio/vulkan/vn_queue.h @@ -88,6 +88,38 @@ struct vn_semaphore { struct vn_sync_payload permanent; struct vn_sync_payload temporary; + struct { + /* non-NULL if VN_PERF_NO_TIMELINE_SEM_FEEDBACK is disabled */ + struct vn_feedback_slot *slot; + + /* Lists of allocated vn_feedback_src + * The pending_src_list tracks vn_feedback_src slots that have + * not been signaled since the last submission cleanup. + * The free_src_list tracks vn_feedback_src slots that have + * signaled and can be reused. + * On submission prepare, used vn_feedback_src are moved from + * the free list to the pending list. On submission cleanup, + * vn_feedback_src of any associated semaphores are checked + * and moved to the free list if they were signaled. + * vn_feedback_src slots are allocated on demand if the + * free_src_list is empty. + */ + struct list_head pending_src_list; + struct list_head free_src_list; + + /* Lock for accessing free/pending src lists */ + simple_mtx_t src_lists_mtx; + + /* Cached counter value to track if an async sem wait call is needed */ + uint64_t signaled_counter; + + /* Lock for checking if an async sem wait call is needed based on + * the current counter value and signaled_counter to ensure async + * wait order across threads. + */ + simple_mtx_t async_wait_mtx; + } feedback; + bool is_external; /* ring_idx of the last queue submission (only used for permanent -- 2.7.4