From 945384b504c5ba249ad6d7c673a62d8ab5236e38 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Mon, 27 Mar 2023 16:09:08 +0200 Subject: [PATCH] radv: Remove NV_mesh_shader API entrypoints. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Timur Kristóf Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_cmd_buffer.c | 286 --------------------------------------- 1 file changed, 286 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index c1ad0a2..4a22ed4 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -9067,159 +9067,6 @@ radv_after_draw(struct radv_cmd_buffer *cmd_buffer) radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_PS_PARTIAL_FLUSH); } -static struct radv_buffer -radv_nv_mesh_indirect_bo(struct radv_cmd_buffer *cmd_buffer, - struct radv_buffer *buffer, VkDeviceSize offset, - uint32_t draw_count, uint32_t stride) -{ - /* Translates the indirect BO format used by NV_mesh_shader API - * to the BO format used by DRAW_INDIRECT / DRAW_INDIRECT_MULTI. - */ - - struct radeon_cmdbuf *cs = cmd_buffer->cs; - struct radeon_winsys *ws = cmd_buffer->device->ws; - - const size_t src_stride = MAX2(stride, sizeof(VkDrawMeshTasksIndirectCommandNV)); - const size_t dst_stride = sizeof(VkDrawIndirectCommand); - const size_t src_off_task_count = offsetof(VkDrawMeshTasksIndirectCommandNV, taskCount); - const size_t src_off_first_task = offsetof(VkDrawMeshTasksIndirectCommandNV, firstTask); - const size_t dst_off_vertex_count = offsetof(VkDrawIndirectCommand, vertexCount); - const size_t dst_off_first_vertex = offsetof(VkDrawIndirectCommand, firstVertex); - - /* Fill the buffer with all zeroes except instanceCount = 1. - * This helps emit fewer copy packets below. - */ - VkDrawIndirectCommand *fill_data = (VkDrawIndirectCommand *) alloca(dst_stride * draw_count); - const VkDrawIndirectCommand filler = { .instanceCount = 1 }; - for (unsigned i = 0; i < draw_count; ++i) - fill_data[i] = filler; - - /* We'll have to copy data from the API BO. */ - uint64_t va = radv_buffer_get_va(buffer->bo) + buffer->offset + offset; - radv_cs_add_buffer(ws, cs, buffer->bo); - - /* Allocate some space in the upload BO. */ - unsigned out_offset; - radv_cmd_buffer_upload_data(cmd_buffer, dst_stride * draw_count, fill_data, &out_offset); - const uint64_t new_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + out_offset; - - ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 12 * draw_count + 2); - - /* Copy data from the API BO so that the format is suitable for the - * indirect draw packet: - * - vertexCount = taskCount (copied here) - * - instanceCount = 1 (filled by CPU above) - * - firstVertex = firstTask (copied here) - * - firstInstance = 0 (filled by CPU above) - */ - for (unsigned i = 0; i < draw_count; ++i) { - const uint64_t src_task_count = va + i * src_stride + src_off_task_count; - const uint64_t src_first_task = va + i * src_stride + src_off_first_task; - const uint64_t dst_vertex_count = new_va + i * dst_stride + dst_off_vertex_count; - const uint64_t dst_first_vertex = new_va + i * dst_stride + dst_off_first_vertex; - - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | - COPY_DATA_WR_CONFIRM); - radeon_emit(cs, src_task_count); - radeon_emit(cs, src_task_count >> 32); - radeon_emit(cs, dst_vertex_count); - radeon_emit(cs, dst_vertex_count >> 32); - - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | - COPY_DATA_WR_CONFIRM); - radeon_emit(cs, src_first_task); - radeon_emit(cs, src_first_task >> 32); - radeon_emit(cs, dst_first_vertex); - radeon_emit(cs, dst_first_vertex >> 32); - } - - /* Wait for the copies to finish */ - radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); - radeon_emit(cs, 0); - - /* The draw packet can now use this buffer: */ - struct radv_buffer buf = *buffer; - buf.bo = cmd_buffer->upload.upload_bo; - buf.offset = out_offset; - - assert(cmd_buffer->cs->cdw <= cdw_max); - - return buf; -} - -static struct radv_buffer -radv_nv_task_indirect_bo(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, - VkDeviceSize offset, uint32_t draw_count, uint32_t stride) -{ - /* Translates the indirect BO format used by NV_mesh_shader API - * to the BO format used by DISPATCH_TASKMESH_INDIRECT_MULTI_ACE. - */ - - assert(draw_count); - static_assert(sizeof(VkDispatchIndirectCommand) == 12, "Incorrect size of taskmesh command."); - - struct radeon_cmdbuf *cs = cmd_buffer->ace_internal.cs; - struct radeon_winsys *ws = cmd_buffer->device->ws; - - const size_t src_stride = MAX2(stride, sizeof(VkDrawMeshTasksIndirectCommandNV)); - const size_t dst_stride = sizeof(VkDispatchIndirectCommand); - const size_t src_off_task_count = offsetof(VkDrawMeshTasksIndirectCommandNV, taskCount); - const size_t dst_off_x = offsetof(VkDispatchIndirectCommand, x); - - const unsigned new_disp_size = dst_stride * draw_count; - - const uint64_t va = radv_buffer_get_va(buffer->bo) + buffer->offset + offset; - radv_cs_add_buffer(ws, cs, buffer->bo); - - /* Fill the buffer with X=0, Y=1, Z=1. */ - VkDispatchIndirectCommand *fill_data = (VkDispatchIndirectCommand *)alloca(new_disp_size); - for (unsigned i = 0; i < draw_count; ++i) { - fill_data[i].x = 0; - fill_data[i].y = 1; - fill_data[i].z = 1; - } - - /* Allocate space in the upload BO. */ - unsigned out_offset; - ASSERTED bool uploaded = - radv_cmd_buffer_upload_data(cmd_buffer, new_disp_size, fill_data, &out_offset); - const uint64_t new_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + out_offset; - assert(uploaded); - - /* Clamp draw count to fit the actual size of the buffer. - * This is to avoid potential out of bounds copies (eg. for draws with an indirect count buffer). - * The remaining indirect draws will stay filled with X=0, Y=1, Z=1 which is harmless. - */ - draw_count = MIN2(draw_count, (buffer->vk.size - buffer->offset - offset) / src_stride); - - ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 6 * draw_count + 2); - - /* Copy taskCount from the NV API BO to the X dispatch size of the compatible BO. */ - for (unsigned i = 0; i < draw_count; ++i) { - const uint64_t src_task_count = va + i * src_stride + src_off_task_count; - const uint64_t dst_x = new_va + i * dst_stride + dst_off_x; - - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | - COPY_DATA_WR_CONFIRM); - radeon_emit(cs, src_task_count); - radeon_emit(cs, src_task_count >> 32); - radeon_emit(cs, dst_x); - radeon_emit(cs, dst_x >> 32); - } - - assert(cs->cdw <= cdw_max); - - /* The draw packet can now use this buffer: */ - struct radv_buffer buf = *buffer; - buf.bo = cmd_buffer->upload.upload_bo; - buf.offset = out_offset; - - return buf; -} - VKAPI_ATTR void VKAPI_CALL radv_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance) @@ -9409,33 +9256,6 @@ radv_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer } VKAPI_ATTR void VKAPI_CALL -radv_CmdDrawMeshTasksNV(VkCommandBuffer commandBuffer, uint32_t taskCount, uint32_t firstTask) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_draw_info info; - - info.count = taskCount; - info.instance_count = 1; - info.first_instance = 0; - info.stride = 0; - info.indexed = false; - info.strmout_buffer = NULL; - info.count_buffer = NULL; - info.indirect = NULL; - - if (!radv_before_taskmesh_draw(cmd_buffer, &info, 1)) - return; - - if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) { - radv_emit_direct_taskmesh_draw_packets(cmd_buffer, taskCount, 1, 1, firstTask); - } else { - radv_emit_direct_mesh_draw_packet(cmd_buffer, taskCount, 1, 1, firstTask); - } - - radv_after_draw(cmd_buffer); -} - -VKAPI_ATTR void VKAPI_CALL radv_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); @@ -9463,64 +9283,6 @@ radv_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, } VKAPI_ATTR void VKAPI_CALL -radv_CmdDrawMeshTasksIndirectNV(VkCommandBuffer commandBuffer, VkBuffer _buffer, - VkDeviceSize offset, uint32_t drawCount, uint32_t stride) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); - - struct radv_draw_info info; - - info.indirect = buffer; - info.indirect_offset = offset; - info.stride = stride; - info.count = drawCount; - info.strmout_buffer = NULL; - info.count_buffer = NULL; - info.indexed = false; - info.instance_count = 0; - - if (!radv_before_taskmesh_draw(cmd_buffer, &info, drawCount)) - return; - - /* Indirect draw with mesh shader only: - * Use DRAW_INDIRECT / DRAW_INDIRECT_MULTI like normal indirect draws. - * Needed because DISPATCH_MESH_INDIRECT_MULTI doesn't support firstTask. - * - * Indirect draw with task + mesh shaders: - * Use DISPATCH_TASKMESH_INDIRECT_MULTI_ACE + DISPATCH_TASKMESH_GFX. - * These packets don't support firstTask so we implement that by - * reading the NV command's indirect buffer in the shader. - * - * The indirect BO layout from the NV_mesh_shader API is incompatible - * with AMD HW. To make it work, we allocate some space - * in the upload buffer and copy the data to it. - */ - - if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) { - uint64_t nv_ib_va = radv_buffer_get_va(buffer->bo) + buffer->offset + offset; - uint32_t nv_ib_stride = MAX2(stride, sizeof(VkDrawMeshTasksIndirectCommandNV)); - struct radv_buffer buf = - radv_nv_task_indirect_bo(cmd_buffer, buffer, offset, drawCount, stride); - info.indirect = &buf; - info.indirect_offset = 0; - info.stride = sizeof(VkDispatchIndirectCommand); - - radv_emit_indirect_taskmesh_draw_packets(cmd_buffer, &info, nv_ib_va, nv_ib_stride); - } else { - struct radv_buffer buf = - radv_nv_mesh_indirect_bo(cmd_buffer, buffer, offset, drawCount, stride); - info.indirect = &buf; - info.indirect_offset = 0; - info.stride = sizeof(VkDrawIndirectCommand); - - radv_emit_indirect_draw_packets(cmd_buffer, &info); - } - - radv_after_draw(cmd_buffer); -} - -VKAPI_ATTR void VKAPI_CALL radv_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride) { @@ -9554,54 +9316,6 @@ radv_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer _buffer } VKAPI_ATTR void VKAPI_CALL -radv_CmdDrawMeshTasksIndirectCountNV(VkCommandBuffer commandBuffer, VkBuffer _buffer, - VkDeviceSize offset, VkBuffer _countBuffer, - VkDeviceSize countBufferOffset, uint32_t maxDrawCount, - uint32_t stride) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); - RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer); - - struct radv_draw_info info; - - info.indirect = buffer; - info.indirect_offset = offset; - info.stride = stride; - info.count = maxDrawCount; - info.strmout_buffer = NULL; - info.count_buffer = count_buffer; - info.count_buffer_offset = countBufferOffset; - info.indexed = false; - info.instance_count = 0; - - if (!radv_before_taskmesh_draw(cmd_buffer, &info, maxDrawCount)) - return; - - if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) { - uint64_t nv_ib_va = radv_buffer_get_va(buffer->bo) + buffer->offset + offset; - uint32_t nv_ib_stride = MAX2(stride, sizeof(VkDrawMeshTasksIndirectCommandNV)); - struct radv_buffer buf = - radv_nv_task_indirect_bo(cmd_buffer, buffer, offset, maxDrawCount, stride); - info.indirect = &buf; - info.indirect_offset = 0; - info.stride = sizeof(VkDispatchIndirectCommand); - - radv_emit_indirect_taskmesh_draw_packets(cmd_buffer, &info, nv_ib_va, nv_ib_stride); - } else { - struct radv_buffer buf = - radv_nv_mesh_indirect_bo(cmd_buffer, buffer, offset, maxDrawCount, stride); - info.indirect = &buf; - info.indirect_offset = 0; - info.stride = sizeof(VkDrawIndirectCommand); - - radv_emit_indirect_draw_packets(cmd_buffer, &info); - } - - radv_after_draw(cmd_buffer); -} - -VKAPI_ATTR void VKAPI_CALL radv_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, VkBuffer _countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, -- 2.7.4