From: Dave Airlie Date: Mon, 13 Jun 2022 23:14:05 +0000 (+1000) Subject: nvk: add support for preamble and tls allocation. X-Git-Tag: upstream/23.3.3~4535 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=fa32ee409a04616d397e827699425d3730fcec17;p=platform%2Fupstream%2Fmesa.git nvk: add support for preamble and tls allocation. This adds support for per-queue allocations that are reallocated upwards if the submitted cmd needs more space. This is used to implement the tls allocations for compute. Part-of: --- diff --git a/src/nouveau/vulkan/nvk_cmd_buffer.c b/src/nouveau/vulkan/nvk_cmd_buffer.c index c3f26df..91024b7 100644 --- a/src/nouveau/vulkan/nvk_cmd_buffer.c +++ b/src/nouveau/vulkan/nvk_cmd_buffer.c @@ -6,6 +6,8 @@ #include "nouveau_push.h" +#include "nvk_cla0c0.h" + static void nvk_destroy_cmd_buffer(struct nvk_cmd_buffer *cmd_buffer) { @@ -217,6 +219,30 @@ nvk_ResetCommandBuffer(VkCommandBuffer commandBuffer, return nvk_reset_cmd_buffer(cmd_buffer); } +static uint64_t +calc_tls_size(struct nvk_device *device, + uint32_t lpos, uint32_t lneg, uint32_t cstack) +{ + uint64_t size = (lpos + lneg) * 32 + cstack; + + assert (size < (1 << 20)); + + size *= 64; /* max warps */ + size = align(size, 0x8000); + size *= device->pdev->dev->mp_count; + + size = align(size, 1 << 17); + return size; +} + +static void +nve4_begin_compute(struct nvk_cmd_buffer *cmd) +{ + struct nvk_device *dev = (struct nvk_device *)cmd->vk.base.device; + + cmd->tls_space_needed = calc_tls_size(dev, 128 * 16, 0, 0x200); +} + VKAPI_ATTR VkResult VKAPI_CALL nvk_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo *pBeginInfo) @@ -230,6 +256,13 @@ nvk_BeginCommandBuffer(VkCommandBuffer commandBuffer, else cmd->reset_on_submit = false; + struct nvk_device *dev = (struct nvk_device *)cmd->vk.base.device; + struct nvk_physical_device *pdev = dev->pdev; + + /* this could be made optional for non-compute cmdbuffers */ + if (pdev->dev->cls >= 0xa0) + nve4_begin_compute(cmd); + return VK_SUCCESS; } diff --git a/src/nouveau/vulkan/nvk_cmd_buffer.h b/src/nouveau/vulkan/nvk_cmd_buffer.h index 3145ee2..96b9ac4 100644 --- a/src/nouveau/vulkan/nvk_cmd_buffer.h +++ b/src/nouveau/vulkan/nvk_cmd_buffer.h @@ -52,6 +52,8 @@ struct nvk_cmd_buffer { struct nouveau_ws_push *push; bool reset_on_submit; + + uint64_t tls_space_needed; }; VkResult nvk_reset_cmd_buffer(struct nvk_cmd_buffer *cmd_buffer); diff --git a/src/nouveau/vulkan/nvk_device.c b/src/nouveau/vulkan/nvk_device.c index 884a946..b3aa6a5 100644 --- a/src/nouveau/vulkan/nvk_device.c +++ b/src/nouveau/vulkan/nvk_device.c @@ -10,12 +10,112 @@ #include "vulkan/wsi/wsi_common.h" +#include "nvk_cla0c0.h" +#include "cla1c0.h" +#include "nvk_clc3c0.h" + +static VkResult +nvk_update_preamble_push(struct nvk_queue_state *qs, struct nvk_device *dev, + const struct nvk_queue_alloc_info *needs) +{ + struct nouveau_ws_bo *tls_bo = qs->tls_bo; + VkResult result; + if (needs->tls_size > qs->alloc_info.tls_size) { + tls_bo = nouveau_ws_bo_new(dev->pdev->dev, + needs->tls_size, (1 << 17), NOUVEAU_WS_BO_LOCAL); + if (!tls_bo) { + result = VK_ERROR_OUT_OF_DEVICE_MEMORY; + goto fail; + } + } + + if (tls_bo != qs->tls_bo) { + if (qs->tls_bo) + nouveau_ws_bo_destroy(qs->tls_bo); + qs->tls_bo = tls_bo; + } + + struct nouveau_ws_push *push = nouveau_ws_push_new(dev->pdev->dev, 256); + + nouveau_ws_push_ref(push, qs->tls_bo, NOUVEAU_WS_BO_RDWR); + P_MTHD(push, NVA0C0, SET_SHADER_LOCAL_MEMORY_A); + P_NVA0C0_SET_SHADER_LOCAL_MEMORY_A(push, qs->tls_bo->offset >> 32); + P_NVA0C0_SET_SHADER_LOCAL_MEMORY_B(push, qs->tls_bo->offset & 0xffffffff); + + uint64_t temp_size = qs->tls_bo->size / dev->pdev->dev->mp_count; + P_MTHD(push, NVA0C0, SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_A); + P_NVA0C0_SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_A(push, temp_size >> 32); + P_NVA0C0_SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_B(push, temp_size & ~0x7fff); + P_NVA0C0_SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_C(push, 0xff); + + if (dev->pdev->dev->cls < 0xc3) { + P_MTHD(push, NVA0C0, SET_SHADER_LOCAL_MEMORY_THROTTLED_A); + P_NVA0C0_SET_SHADER_LOCAL_MEMORY_THROTTLED_A(push, temp_size >> 32); + P_NVA0C0_SET_SHADER_LOCAL_MEMORY_THROTTLED_B(push, temp_size & ~0x7fff); + P_NVA0C0_SET_SHADER_LOCAL_MEMORY_THROTTLED_C(push, 0xff); + + P_MTHD(push, NVA0C0, SET_SHADER_LOCAL_MEMORY_WINDOW); + P_NVA0C0_SET_SHADER_LOCAL_MEMORY_WINDOW(push, 0xff << 24); + + P_MTHD(push, NVA0C0, SET_SHADER_SHARED_MEMORY_WINDOW); + P_NVA0C0_SET_SHADER_SHARED_MEMORY_WINDOW(push, 0xfe << 24); + + // TODO CODE_ADDRESS_HIGH + } else { + uint64_t temp = 0xfeULL << 24; + + P_MTHD(push, NVC3C0, SET_SHADER_SHARED_MEMORY_WINDOW_A); + P_NVC3C0_SET_SHADER_SHARED_MEMORY_WINDOW_A(push, temp >> 32); + P_NVC3C0_SET_SHADER_SHARED_MEMORY_WINDOW_B(push, temp & 0xffffffff); + + temp = 0xffULL << 24; + P_MTHD(push, NVC3C0, SET_SHADER_LOCAL_MEMORY_WINDOW_A); + P_NVC3C0_SET_SHADER_LOCAL_MEMORY_WINDOW_A(push, temp >> 32); + P_NVC3C0_SET_SHADER_LOCAL_MEMORY_WINDOW_B(push, temp & 0xffffffff); + } + + P_MTHD(push, NVA0C0, SET_SPA_VERSION); + P_NVA0C0_SET_SPA_VERSION(push, { .major = dev->pdev->dev->cls >= 0xa1 ? 0x4 : 0x3 }); + + if (qs->push) + nouveau_ws_push_destroy(qs->push); + qs->push = push; + return 0; + fail: + return vk_error(qs, result); +} + static VkResult -nvk_queue_submit(struct vk_queue *queue, struct vk_queue_submit *submission) +nvk_update_preambles(struct nvk_queue_state *qs, struct nvk_device *device, + struct vk_command_buffer *const *cmd_buffers, uint32_t cmd_buffer_count) { - struct nvk_device *device = container_of(queue->base.device, struct nvk_device, vk); + struct nvk_queue_alloc_info needs = { 0 }; + for (uint32_t i = 0; i < cmd_buffer_count; i++) { + struct nvk_cmd_buffer *cmd = (struct nvk_cmd_buffer *)cmd_buffers[i]; + needs.tls_size = MAX2(needs.tls_size, cmd->tls_space_needed); + } + + if (needs.tls_size == qs->alloc_info.tls_size) + return VK_SUCCESS; + return nvk_update_preamble_push(qs, device, &needs); +} + +static VkResult +nvk_queue_submit(struct vk_queue *vkqueue, struct vk_queue_submit *submission) +{ + struct nvk_device *device = container_of(vkqueue->base.device, struct nvk_device, vk); + struct nvk_queue *queue = container_of(vkqueue, struct nvk_queue, vk); + VkResult result; + + result = nvk_update_preambles(&queue->state, device, submission->command_buffers, + submission->command_buffer_count); + if (result != VK_SUCCESS) + return result; pthread_mutex_lock(&device->mutex); + + if (queue->state.push) + nouveau_ws_push_submit(queue->state.push, device->pdev->dev, device->ctx); for (unsigned i = 0; i < submission->command_buffer_count; i++) { struct nvk_cmd_buffer *cmd = (struct nvk_cmd_buffer *)submission->command_buffers[i]; @@ -77,7 +177,7 @@ nvk_CreateDevice(VkPhysicalDevice physicalDevice, goto fail_init; } - result = vk_queue_init(&device->queue, &device->vk, &pCreateInfo->pQueueCreateInfos[0], 0); + result = vk_queue_init(&device->queue.vk, &device->vk, &pCreateInfo->pQueueCreateInfos[0], 0); if (result != VK_SUCCESS) goto fail_ctx; @@ -103,7 +203,7 @@ nvk_CreateDevice(VkPhysicalDevice physicalDevice, } pthread_condattr_destroy(&condattr); - device->queue.driver_submit = nvk_queue_submit; + device->queue.vk.driver_submit = nvk_queue_submit; device->pdev = physical_device; @@ -114,7 +214,7 @@ nvk_CreateDevice(VkPhysicalDevice physicalDevice, fail_mutex: pthread_mutex_destroy(&device->mutex); fail_queue: - vk_queue_finish(&device->queue); + vk_queue_finish(&device->queue.vk); fail_ctx: nouveau_ws_context_destroy(device->ctx); fail_init: @@ -132,9 +232,13 @@ nvk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) if (!device) return; + if (device->queue.state.push) + nouveau_ws_push_destroy(device->queue.state.push); + if (device->queue.state.tls_bo) + nouveau_ws_bo_destroy(device->queue.state.tls_bo); pthread_cond_destroy(&device->queue_submit); pthread_mutex_destroy(&device->mutex); - vk_queue_finish(&device->queue); + vk_queue_finish(&device->queue.vk); vk_device_finish(&device->vk); nouveau_ws_context_destroy(device->ctx); vk_free(&device->vk.alloc, device); diff --git a/src/nouveau/vulkan/nvk_device.h b/src/nouveau/vulkan/nvk_device.h index d32444c..e561ebb 100644 --- a/src/nouveau/vulkan/nvk_device.h +++ b/src/nouveau/vulkan/nvk_device.h @@ -9,13 +9,30 @@ struct novueau_ws_context; struct nvk_physical_device; +struct nvk_queue_alloc_info { + uint64_t tls_size; +}; + +struct nvk_queue_state { + struct nvk_queue_alloc_info alloc_info; + struct nouveau_ws_bo *tls_bo; + + struct nouveau_ws_push *push; +}; + +struct nvk_queue { + struct vk_queue vk; + + struct nvk_queue_state state; +}; + struct nvk_device { struct vk_device vk; struct nvk_physical_device *pdev; struct nouveau_ws_context *ctx; - struct vk_queue queue; + struct nvk_queue queue; pthread_mutex_t mutex; pthread_cond_t queue_submit;