From 723041a3ed75c78d705e42d3f679ce305b3df8ab Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 11 Jan 2023 08:19:41 +1000 Subject: [PATCH] nvk/queue: add support for syncobjs and sparse binds Reviewed-by: Faith Ekstrand Part-of: --- src/nouveau/vulkan/nvk_physical_device.c | 17 ++ src/nouveau/vulkan/nvk_physical_device.h | 2 + src/nouveau/vulkan/nvk_queue.c | 12 +- src/nouveau/vulkan/nvk_queue.h | 2 + src/nouveau/vulkan/nvk_queue_drm_nouveau.c | 336 +++++++++++++++++++++++++++-- 5 files changed, 349 insertions(+), 20 deletions(-) diff --git a/src/nouveau/vulkan/nvk_physical_device.c b/src/nouveau/vulkan/nvk_physical_device.c index 4b13396..6f7d24e 100644 --- a/src/nouveau/vulkan/nvk_physical_device.c +++ b/src/nouveau/vulkan/nvk_physical_device.c @@ -12,6 +12,7 @@ #include "util/mesa-sha1.h" #include "vulkan/runtime/vk_device.h" +#include "vulkan/runtime/vk_drm_syncobj.h" #include "vulkan/wsi/wsi_common.h" #include @@ -226,6 +227,7 @@ nvk_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, .maxDescriptorSetUpdateAfterBindInputAttachments = UINT32_MAX, .filterMinmaxSingleComponentFormats = true, .filterMinmaxImageComponentMapping = true, + .maxTimelineSemaphoreValueDifference = UINT64_MAX, }; snprintf(core_1_2.driverName, VK_MAX_DRIVER_NAME_SIZE, "NVK"); @@ -364,6 +366,9 @@ nvk_get_device_extensions(const struct nv_device_info *info, .KHR_shader_non_semantic_info = true, .KHR_spirv_1_4 = true, .KHR_storage_buffer_storage_class = true, +#if NVK_NEW_UAPI == 1 + .KHR_timeline_semaphore = true, +#endif #ifdef NVK_USE_WSI_PLATFORM .KHR_swapchain = true, .KHR_swapchain_mutable_format = true, @@ -497,6 +502,9 @@ nvk_get_device_features(const struct nv_device_info *info, .uniformBufferStandardLayout = true, .separateDepthStencilLayouts = true, .hostQueryReset = true, +#if NVK_NEW_UAPI == 1 + .timelineSemaphore = true, +#endif .bufferDeviceAddress = true, .bufferDeviceAddressCaptureReplay = false, .bufferDeviceAddressMultiDevice = false, @@ -658,6 +666,10 @@ nvk_create_drm_physical_device(struct vk_instance *_instance, return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER); const struct nv_device_info info = ws_dev->info; +#if NVK_NEW_UAPI == 1 + const struct vk_sync_type syncobj_sync_type = + vk_drm_syncobj_get_type(ws_dev->fd); +#endif nouveau_ws_device_destroy(ws_dev); @@ -764,7 +776,12 @@ nvk_create_drm_physical_device(struct vk_instance *_instance, } unsigned st_idx = 0; +#if NVK_NEW_UAPI == 1 + pdev->syncobj_sync_type = syncobj_sync_type; + pdev->sync_types[st_idx++] = &pdev->syncobj_sync_type; +#else pdev->sync_types[st_idx++] = &nvk_bo_sync_type; +#endif pdev->sync_types[st_idx++] = NULL; assert(st_idx <= ARRAY_SIZE(pdev->sync_types)); pdev->vk.supported_sync_types = pdev->sync_types; diff --git a/src/nouveau/vulkan/nvk_physical_device.h b/src/nouveau/vulkan/nvk_physical_device.h index 0221780..b48d15a 100644 --- a/src/nouveau/vulkan/nvk_physical_device.h +++ b/src/nouveau/vulkan/nvk_physical_device.h @@ -7,6 +7,7 @@ #include "nv_device_info.h" #include "vulkan/runtime/vk_physical_device.h" +#include "vulkan/runtime/vk_sync.h" #include "wsi_common.h" @@ -29,6 +30,7 @@ struct nvk_physical_device { uint8_t mem_heap_cnt; uint8_t mem_type_cnt; + struct vk_sync_type syncobj_sync_type; const struct vk_sync_type *sync_types[2]; }; diff --git a/src/nouveau/vulkan/nvk_queue.c b/src/nouveau/vulkan/nvk_queue.c index d6eb74a..85e6844 100644 --- a/src/nouveau/vulkan/nvk_queue.c +++ b/src/nouveau/vulkan/nvk_queue.c @@ -318,7 +318,13 @@ nvk_queue_init(struct nvk_device *dev, struct nvk_queue *queue, nvk_queue_state_init(&queue->state); queue->vk.driver_submit = nvk_queue_submit; - +#if NVK_NEW_UAPI == 1 + int err = drmSyncobjCreate(dev->ws_dev->fd, 0, &queue->syncobj_handle); + if (err < 0) { + result = vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail_init; + } +#endif void *empty_push_map; queue->empty_push = nouveau_ws_bo_new_mapped(dev->ws_dev, 4096, 0, NOUVEAU_WS_BO_GART | @@ -357,6 +363,10 @@ void nvk_queue_finish(struct nvk_device *dev, struct nvk_queue *queue) { nvk_queue_state_finish(dev, &queue->state); +#if NVK_NEW_UAPI == 1 + ASSERTED int err = drmSyncobjDestroy(dev->ws_dev->fd, queue->syncobj_handle); + assert(err == 0); +#endif nouveau_ws_bo_destroy(queue->empty_push); vk_queue_finish(&queue->vk); } diff --git a/src/nouveau/vulkan/nvk_queue.h b/src/nouveau/vulkan/nvk_queue.h index 21a1726..65c113c 100644 --- a/src/nouveau/vulkan/nvk_queue.h +++ b/src/nouveau/vulkan/nvk_queue.h @@ -47,6 +47,8 @@ struct nvk_queue { struct nouveau_ws_bo *empty_push; uint32_t empty_push_dw_count; + + uint32_t syncobj_handle; }; static inline struct nvk_device * diff --git a/src/nouveau/vulkan/nvk_queue_drm_nouveau.c b/src/nouveau/vulkan/nvk_queue_drm_nouveau.c index 44ca437..9ee8857 100644 --- a/src/nouveau/vulkan/nvk_queue_drm_nouveau.c +++ b/src/nouveau/vulkan/nvk_queue_drm_nouveau.c @@ -4,6 +4,8 @@ #include "nvk_cmd_buffer.h" #include "nvk_cmd_pool.h" #include "nvk_device.h" +#include "nvk_buffer.h" +#include "nvk_image.h" #include "nvk_device_memory.h" #include "nvk_physical_device.h" @@ -11,19 +13,38 @@ #include "drm-uapi/nouveau_drm.h" +#include "vk_drm_syncobj.h" + #include +#define NVK_PUSH_MAX_SYNCS 16 +#define NVK_PUSH_MAX_BINDS 4096 +#define NVK_PUSH_MAX_PUSH 4096 + struct push_builder { struct nvk_device *dev; +#if NVK_NEW_UAPI == 0 struct drm_nouveau_gem_pushbuf_bo req_bo[NOUVEAU_GEM_MAX_BUFFERS]; struct drm_nouveau_gem_pushbuf_push req_push[NOUVEAU_GEM_MAX_PUSH]; struct drm_nouveau_gem_pushbuf req; +#else + struct drm_nouveau_sync req_wait[NVK_PUSH_MAX_SYNCS]; + struct drm_nouveau_sync req_sig[NVK_PUSH_MAX_SYNCS]; + struct drm_nouveau_exec_push req_push[NVK_PUSH_MAX_PUSH]; + struct drm_nouveau_exec req; + struct drm_nouveau_vm_bind vmbind; + struct drm_nouveau_vm_bind_op bind_ops[NVK_PUSH_MAX_BINDS]; + bool is_vmbind; +#endif }; static void -push_builder_init(struct nvk_device *dev, struct push_builder *pb) +push_builder_init(struct nvk_device *dev, struct push_builder *pb, + bool is_vmbind) { pb->dev = dev; +#if NVK_NEW_UAPI == 0 + assert(!is_vmbind); pb->req = (struct drm_nouveau_gem_pushbuf) { .channel = dev->ws_ctx->channel, .nr_buffers = 0, @@ -31,6 +52,27 @@ push_builder_init(struct nvk_device *dev, struct push_builder *pb) .nr_push = 0, .push = (uintptr_t)&pb->req_push, }; +#else + pb->req = (struct drm_nouveau_exec) { + .channel = dev->ws_ctx->channel, + .push_count = 0, + .wait_count = 0, + .sig_count = 0, + .push_ptr = (uintptr_t)&pb->req_push, + .wait_ptr = (uintptr_t)&pb->req_wait, + .sig_ptr = (uintptr_t)&pb->req_sig, + }; + pb->vmbind = (struct drm_nouveau_vm_bind) { + .flags = DRM_NOUVEAU_VM_BIND_RUN_ASYNC, + .op_count = 0, + .op_ptr = (uintptr_t)&pb->bind_ops, + .wait_count = 0, + .sig_count = 0, + .wait_ptr = (uintptr_t)&pb->req_wait, + .sig_ptr = (uintptr_t)&pb->req_sig, + }; + pb->is_vmbind = is_vmbind; +#endif } static uint32_t @@ -38,6 +80,7 @@ push_add_bo(struct push_builder *pb, struct nouveau_ws_bo *bo, enum nouveau_ws_bo_map_flags flags) { +#if NVK_NEW_UAPI == 0 const uint32_t domain = (bo->flags & NOUVEAU_WS_BO_GART) ? NOUVEAU_GEM_DOMAIN_GART : pb->dev->ws_dev->local_mem_domain; @@ -66,12 +109,182 @@ push_add_bo(struct push_builder *pb, pb->req_bo[i].write_domains |= domain; return i; +#else + return 0; +#endif +} + +static void +push_add_sync_wait(struct push_builder *pb, + struct vk_sync_wait *wait) +{ +#if NVK_NEW_UAPI == 1 + struct vk_drm_syncobj *sync = vk_sync_as_drm_syncobj(wait->sync); + assert(sync); + assert(pb->req.wait_count < NVK_PUSH_MAX_SYNCS); + pb->req_wait[pb->req.wait_count++] = (struct drm_nouveau_sync) { + .flags = wait->wait_value ? DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ : + DRM_NOUVEAU_SYNC_SYNCOBJ, + .handle = sync->syncobj, + .timeline_value = wait->wait_value, + }; +#endif +} + +static void +push_add_sync_signal(struct push_builder *pb, + struct vk_sync_signal *sig) +{ +#if NVK_NEW_UAPI == 0 + struct nvk_bo_sync *bo_sync = + container_of(sig->sync, struct nvk_bo_sync, sync); + + push_add_bo(pb, bo_sync->bo, NOUVEAU_WS_BO_RDWR); +#else + struct vk_drm_syncobj *sync = vk_sync_as_drm_syncobj(sig->sync); + assert(sync); + assert(pb->req.sig_count < NVK_PUSH_MAX_SYNCS); + pb->req_sig[pb->req.sig_count++] = (struct drm_nouveau_sync) { + .flags = sig->signal_value ? DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ : + DRM_NOUVEAU_SYNC_SYNCOBJ, + .handle = sync->syncobj, + .timeline_value = sig->signal_value, + }; +#endif +} + +#if NVK_NEW_UAPI == 1 +static void +push_add_buffer_bind(struct push_builder *pb, + VkSparseBufferMemoryBindInfo *bind_info) +{ + VK_FROM_HANDLE(nvk_buffer, buffer, bind_info->buffer); + for (unsigned i = 0; i < bind_info->bindCount; i++) { + const VkSparseMemoryBind *bind = &bind_info->pBinds[i]; + VK_FROM_HANDLE(nvk_device_memory, mem, bind->memory); + + assert(bind->resourceOffset + bind->size <= buffer->vma_size_B); + assert(!mem || bind->memoryOffset + bind->size <= mem->vk.size); + + assert(pb->vmbind.op_count < NVK_PUSH_MAX_BINDS); + pb->bind_ops[pb->vmbind.op_count++] = (struct drm_nouveau_vm_bind_op) { + .op = mem ? DRM_NOUVEAU_VM_BIND_OP_MAP : + DRM_NOUVEAU_VM_BIND_OP_UNMAP, + .handle = mem ? mem->bo->handle : 0, + .addr = buffer->addr + bind->resourceOffset, + .bo_offset = bind->memoryOffset, + .range = bind->size, + }; + } +} + +static void +push_add_image_plane_opaque_bind(struct push_builder *pb, + const struct nvk_image_plane *plane, + const VkSparseMemoryBind *bind, + uint64_t *image_plane_offset_B) +{ + *image_plane_offset_B = ALIGN_POT(*image_plane_offset_B, + plane->nil.align_B); + + /* The offset of the bind range within the image */ + uint64_t image_bind_offset_B = bind->resourceOffset; + uint64_t mem_bind_offset_B = bind->memoryOffset; + uint64_t bind_size_B = bind->size; + + /* If the bind starts before the plane, clamp from below */ + if (image_bind_offset_B < *image_plane_offset_B) { + /* The offset of the plane within the range being bound */ + const uint64_t bind_plane_offset_B = + *image_plane_offset_B - image_bind_offset_B; + + /* If this plane lies above the bound range, skip this bind */ + if (bind_plane_offset_B >= bind_size_B) + goto skip; + + image_bind_offset_B += bind_plane_offset_B; + mem_bind_offset_B += bind_plane_offset_B; + bind_size_B -= bind_plane_offset_B; + + assert(image_bind_offset_B == *image_plane_offset_B); + } + + /* The offset of the bind range within the plane */ + const uint64_t plane_bind_offset_B = + image_bind_offset_B - *image_plane_offset_B; + + /* The bound range lies above the plane */ + if (plane_bind_offset_B >= plane->vma_size_B) + goto skip; + + /* Clamp the size to fit inside the plane */ + bind_size_B = MIN2(bind_size_B, plane->vma_size_B - plane_bind_offset_B); + assert(bind_size_B > 0); + + VK_FROM_HANDLE(nvk_device_memory, mem, bind->memory); + + assert(plane_bind_offset_B + bind_size_B <= plane->vma_size_B); + assert(!mem || mem_bind_offset_B + bind_size_B <= mem->vk.size); + + assert(pb->vmbind.op_count < NVK_PUSH_MAX_BINDS); + pb->bind_ops[pb->vmbind.op_count++] = (struct drm_nouveau_vm_bind_op) { + .op = mem ? DRM_NOUVEAU_VM_BIND_OP_MAP : + DRM_NOUVEAU_VM_BIND_OP_UNMAP, + .handle = mem ? mem->bo->handle : 0, + .addr = plane->addr + plane_bind_offset_B, + .bo_offset = mem_bind_offset_B, + .range = bind_size_B, + .flags = plane->nil.pte_kind, + }; + +skip: + assert(plane->vma_size_B == plane->nil.size_B); + *image_plane_offset_B += plane->nil.size_B; +} + +static void +push_add_image_opaque_bind(struct push_builder *pb, + VkSparseImageOpaqueMemoryBindInfo *bind_info) +{ + VK_FROM_HANDLE(nvk_image, image, bind_info->image); + for (unsigned i = 0; i < bind_info->bindCount; i++) { + uint64_t image_plane_offset_B = 0; + for (unsigned plane = 0; plane < image->plane_count; plane++) { + push_add_image_plane_opaque_bind(pb, &image->planes[plane], + &bind_info->pBinds[i], + &image_plane_offset_B); + } + if (image->stencil_copy_temp.nil.size_B > 0) { + push_add_image_plane_opaque_bind(pb, &image->stencil_copy_temp, + &bind_info->pBinds[i], + &image_plane_offset_B); + } + } +} +#endif + +#if NVK_NEW_UAPI == 1 +static void +push_add_push(struct push_builder *pb, uint64_t addr, uint32_t range) +{ + assert((addr % 4) == 0 && (range % 4) == 0); + + if (range == 0) + return; + + assert(pb->req.push_count < NVK_PUSH_MAX_PUSH); + pb->req_push[pb->req.push_count++] = (struct drm_nouveau_exec_push) { + .va = addr, + .va_len = range, + }; } +#endif static void -push_add_push(struct push_builder *pb, struct nouveau_ws_bo *bo, - uint32_t offset, uint32_t range) +push_add_push_bo(struct push_builder *pb, struct nouveau_ws_bo *bo, + uint32_t offset, uint32_t range) { +#if NVK_NEW_UAPI == 0 assert((offset % 4) == 0 && (range % 4) == 0); if (range == 0) @@ -84,14 +297,38 @@ push_add_push(struct push_builder *pb, struct nouveau_ws_bo *bo, .offset = offset, .length = range, }; +#else + push_add_push(pb, bo->offset + offset, range); +#endif +} + +#if NVK_NEW_UAPI == 1 +static VkResult +bind_submit(struct push_builder *pb, struct nvk_queue *queue, bool sync) +{ + int err; + + pb->vmbind.wait_count = pb->req.wait_count; + pb->vmbind.sig_count = pb->req.sig_count; + err = drmCommandWriteRead(pb->dev->ws_dev->fd, + DRM_NOUVEAU_VM_BIND, + &pb->vmbind, sizeof(pb->vmbind)); + if (err) { + return vk_errorf(queue, VK_ERROR_UNKNOWN, + "DRM_NOUVEAU_VM_BIND failed: %m"); + } + return VK_SUCCESS; } +#endif static VkResult push_submit(struct push_builder *pb, struct nvk_queue *queue, bool sync) { - int err = drmCommandWriteRead(pb->dev->ws_dev->fd, - DRM_NOUVEAU_GEM_PUSHBUF, - &pb->req, sizeof(pb->req)); + int err; +#if NVK_NEW_UAPI == 0 + err = drmCommandWriteRead(pb->dev->ws_dev->fd, + DRM_NOUVEAU_GEM_PUSHBUF, + &pb->req, sizeof(pb->req)); if (err) { return vk_errorf(queue, VK_ERROR_UNKNOWN, "DRM_NOUVEAU_GEM_PUSHBUF failed: %m"); @@ -109,7 +346,36 @@ push_submit(struct push_builder *pb, struct nvk_queue *queue, bool sync) "DRM_NOUVEAU_GEM_CPU_PREP failed: %m"); } } - +#else + if (sync) { + assert(pb->req.sig_count < NVK_PUSH_MAX_SYNCS); + pb->req_sig[pb->req.sig_count++] = (struct drm_nouveau_sync) { + .flags = DRM_NOUVEAU_SYNC_SYNCOBJ, + .handle = queue->syncobj_handle, + .timeline_value = 0, + }; + } + err = drmCommandWriteRead(pb->dev->ws_dev->fd, + DRM_NOUVEAU_EXEC, + &pb->req, sizeof(pb->req)); + if (err) { + VkResult result = VK_ERROR_UNKNOWN; + if (err == -ENODEV) + result = VK_ERROR_DEVICE_LOST; + return vk_errorf(queue, result, + "DRM_NOUVEAU_EXEC failed: %m"); + } + if (sync) { + err = drmSyncobjWait(pb->dev->ws_dev->fd, + &queue->syncobj_handle, 1, INT64_MAX, + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, + NULL); + if (err) { + return vk_errorf(queue, VK_ERROR_UNKNOWN, + "DRM_SYNCOBJ_WAIT failed: %m"); + } + } +#endif return VK_SUCCESS; } @@ -123,9 +389,9 @@ nvk_queue_submit_simple_drm_nouveau(struct nvk_queue *queue, struct nvk_device *dev = nvk_queue_device(queue); struct push_builder pb; - push_builder_init(dev, &pb); + push_builder_init(dev, &pb, false); - push_add_push(&pb, push_bo, 0, push_dw_count * 4); + push_add_push_bo(&pb, push_bo, 0, push_dw_count * 4); for (uint32_t i = 0; i < extra_bo_count; i++) push_add_bo(&pb, extra_bos[i], NOUVEAU_WS_BO_RDWR); @@ -142,7 +408,7 @@ push_add_queue_state(struct push_builder *pb, struct nvk_queue_state *qs) if (qs->slm.bo) push_add_bo(pb, qs->slm.bo, NOUVEAU_WS_BO_RDWR); if (qs->push.bo) - push_add_push(pb, qs->push.bo, 0, qs->push.dw_count * 4); + push_add_push_bo(pb, qs->push.bo, 0, qs->push.dw_count * 4); } static void @@ -161,30 +427,50 @@ nvk_queue_submit_drm_nouveau(struct nvk_queue *queue, { struct nvk_device *dev = nvk_queue_device(queue); struct push_builder pb; - VkResult result; - push_builder_init(dev, &pb); + const bool is_vmbind = submit->buffer_bind_count > 0 || + submit->image_opaque_bind_count > 0; + push_builder_init(dev, &pb, is_vmbind); + + for (uint32_t i = 0; i < submit->wait_count; i++) { + push_add_sync_wait(&pb, &submit->waits[i]); + } pthread_mutex_lock(&dev->mutex); for (uint32_t i = 0; i < submit->signal_count; i++) { - struct nvk_bo_sync *bo_sync = - container_of(submit->signals[i].sync, struct nvk_bo_sync, sync); + push_add_sync_signal(&pb, &submit->signals[i]); + } - push_add_bo(&pb, bo_sync->bo, NOUVEAU_WS_BO_RDWR); +#if NVK_NEW_UAPI == 1 + for (uint32_t i = 0; i < submit->buffer_bind_count; i++) { + push_add_buffer_bind(&pb, &submit->buffer_binds[i]); } - if (submit->command_buffer_count == 0) { - push_add_push(&pb, queue->empty_push, 0, queue->empty_push_dw_count * 4); + for (uint32_t i = 0; i < submit->image_opaque_bind_count; i++) { + push_add_image_opaque_bind(&pb, &submit->image_opaque_binds[i]); + } +#else + assert(submit->buffer_bind_count == 0); + assert(submit->image_opaque_bind_count == 0); +#endif + + if (is_vmbind) { + assert(submit->command_buffer_count == 0); + } else if (submit->command_buffer_count == 0) { + push_add_push_bo(&pb, queue->empty_push, 0, + queue->empty_push_dw_count * 4); } else { push_add_queue_state(&pb, &queue->state); push_add_heap(&pb, &dev->shader_heap); +#if NVK_NEW_UAPI == 0 list_for_each_entry(struct nvk_device_memory, mem, &dev->memory_objects, link) { push_add_bo(&pb, mem->bo, NOUVEAU_WS_BO_RDWR); } +#endif for (unsigned i = 0; i < submit->command_buffer_count; i++) { struct nvk_cmd_buffer *cmd = @@ -194,14 +480,25 @@ nvk_queue_submit_drm_nouveau(struct nvk_queue *queue, push_add_bo(&pb, bo->bo, NOUVEAU_WS_BO_RD); util_dynarray_foreach(&cmd->pushes, struct nvk_cmd_push, push) - push_add_push(&pb, push->bo, push->bo_offset, push->range); + push_add_push_bo(&pb, push->bo, push->bo_offset, push->range); util_dynarray_foreach(&cmd->bo_refs, struct nvk_cmd_bo_ref, ref) push_add_bo(&pb, ref->bo, NOUVEAU_WS_BO_RDWR); } } - result = push_submit(&pb, queue, sync); + VkResult result; + if (is_vmbind) { +#if NVK_NEW_UAPI == 1 + result = bind_submit(&pb, queue, sync); +#else + unreachable("Sparse is not supported on the old uAPI"); +#endif + } else { + result = push_submit(&pb, queue, sync); + } + +#if NVK_NEW_UAPI == 0 if (result == VK_SUCCESS) { for (uint32_t i = 0; i < submit->wait_count; i++) { struct nvk_bo_sync *bo_sync = @@ -215,6 +512,7 @@ nvk_queue_submit_drm_nouveau(struct nvk_queue *queue, bo_sync->state = NVK_BO_SYNC_STATE_SUBMITTED; } } +#endif pthread_cond_broadcast(&dev->queue_submit); pthread_mutex_unlock(&dev->mutex); -- 2.7.4