radv: Remove NV_mesh_shader API entrypoints.
authorTimur Kristóf <timur.kristof@gmail.com>
Mon, 27 Mar 2023 14:09:08 +0000 (16:09 +0200)
committerMarge Bot <emma+marge@anholt.net>
Wed, 29 Mar 2023 15:08:55 +0000 (15:08 +0000)
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22139>

src/amd/vulkan/radv_cmd_buffer.c

index c1ad0a2..4a22ed4 100644 (file)
@@ -9067,159 +9067,6 @@ radv_after_draw(struct radv_cmd_buffer *cmd_buffer)
    radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_PS_PARTIAL_FLUSH);
 }
 
-static struct radv_buffer
-radv_nv_mesh_indirect_bo(struct radv_cmd_buffer *cmd_buffer,
-                         struct radv_buffer *buffer, VkDeviceSize offset,
-                         uint32_t draw_count, uint32_t stride)
-{
-   /* Translates the indirect BO format used by NV_mesh_shader API
-    * to the BO format used by DRAW_INDIRECT / DRAW_INDIRECT_MULTI.
-    */
-
-   struct radeon_cmdbuf *cs = cmd_buffer->cs;
-   struct radeon_winsys *ws = cmd_buffer->device->ws;
-
-   const size_t src_stride = MAX2(stride, sizeof(VkDrawMeshTasksIndirectCommandNV));
-   const size_t dst_stride = sizeof(VkDrawIndirectCommand);
-   const size_t src_off_task_count = offsetof(VkDrawMeshTasksIndirectCommandNV, taskCount);
-   const size_t src_off_first_task = offsetof(VkDrawMeshTasksIndirectCommandNV, firstTask);
-   const size_t dst_off_vertex_count = offsetof(VkDrawIndirectCommand, vertexCount);
-   const size_t dst_off_first_vertex = offsetof(VkDrawIndirectCommand, firstVertex);
-
-   /* Fill the buffer with all zeroes except instanceCount = 1.
-    * This helps emit fewer copy packets below.
-    */
-   VkDrawIndirectCommand *fill_data = (VkDrawIndirectCommand *) alloca(dst_stride * draw_count);
-   const VkDrawIndirectCommand filler = { .instanceCount = 1 };
-   for (unsigned i = 0; i < draw_count; ++i)
-      fill_data[i] = filler;
-
-   /* We'll have to copy data from the API BO. */
-   uint64_t va = radv_buffer_get_va(buffer->bo) + buffer->offset + offset;
-   radv_cs_add_buffer(ws, cs, buffer->bo);
-
-   /* Allocate some space in the upload BO. */
-   unsigned out_offset;
-   radv_cmd_buffer_upload_data(cmd_buffer, dst_stride * draw_count, fill_data, &out_offset);
-   const uint64_t new_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + out_offset;
-
-   ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 12 * draw_count + 2);
-
-   /* Copy data from the API BO so that the format is suitable for the
-    * indirect draw packet:
-    * - vertexCount = taskCount (copied here)
-    * - instanceCount = 1 (filled by CPU above)
-    * - firstVertex = firstTask (copied here)
-    * - firstInstance = 0 (filled by CPU above)
-    */
-   for (unsigned i = 0; i < draw_count; ++i) {
-      const uint64_t src_task_count = va + i * src_stride + src_off_task_count;
-      const uint64_t src_first_task = va + i * src_stride + src_off_first_task;
-      const uint64_t dst_vertex_count = new_va + i * dst_stride + dst_off_vertex_count;
-      const uint64_t dst_first_vertex = new_va + i * dst_stride + dst_off_first_vertex;
-
-      radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
-      radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
-                      COPY_DATA_WR_CONFIRM);
-      radeon_emit(cs, src_task_count);
-      radeon_emit(cs, src_task_count >> 32);
-      radeon_emit(cs, dst_vertex_count);
-      radeon_emit(cs, dst_vertex_count >> 32);
-
-      radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
-      radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
-                      COPY_DATA_WR_CONFIRM);
-      radeon_emit(cs, src_first_task);
-      radeon_emit(cs, src_first_task >> 32);
-      radeon_emit(cs, dst_first_vertex);
-      radeon_emit(cs, dst_first_vertex >> 32);
-   }
-
-   /* Wait for the copies to finish */
-   radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
-   radeon_emit(cs, 0);
-
-   /* The draw packet can now use this buffer: */
-   struct radv_buffer buf = *buffer;
-   buf.bo = cmd_buffer->upload.upload_bo;
-   buf.offset = out_offset;
-
-   assert(cmd_buffer->cs->cdw <= cdw_max);
-
-   return buf;
-}
-
-static struct radv_buffer
-radv_nv_task_indirect_bo(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer,
-                         VkDeviceSize offset, uint32_t draw_count, uint32_t stride)
-{
-   /* Translates the indirect BO format used by NV_mesh_shader API
-    * to the BO format used by DISPATCH_TASKMESH_INDIRECT_MULTI_ACE.
-    */
-
-   assert(draw_count);
-   static_assert(sizeof(VkDispatchIndirectCommand) == 12, "Incorrect size of taskmesh command.");
-
-   struct radeon_cmdbuf *cs = cmd_buffer->ace_internal.cs;
-   struct radeon_winsys *ws = cmd_buffer->device->ws;
-
-   const size_t src_stride = MAX2(stride, sizeof(VkDrawMeshTasksIndirectCommandNV));
-   const size_t dst_stride = sizeof(VkDispatchIndirectCommand);
-   const size_t src_off_task_count = offsetof(VkDrawMeshTasksIndirectCommandNV, taskCount);
-   const size_t dst_off_x = offsetof(VkDispatchIndirectCommand, x);
-
-   const unsigned new_disp_size = dst_stride * draw_count;
-
-   const uint64_t va = radv_buffer_get_va(buffer->bo) + buffer->offset + offset;
-   radv_cs_add_buffer(ws, cs, buffer->bo);
-
-   /* Fill the buffer with X=0, Y=1, Z=1. */
-   VkDispatchIndirectCommand *fill_data = (VkDispatchIndirectCommand *)alloca(new_disp_size);
-   for (unsigned i = 0; i < draw_count; ++i) {
-      fill_data[i].x = 0;
-      fill_data[i].y = 1;
-      fill_data[i].z = 1;
-   }
-
-   /* Allocate space in the upload BO. */
-   unsigned out_offset;
-   ASSERTED bool uploaded =
-      radv_cmd_buffer_upload_data(cmd_buffer, new_disp_size, fill_data, &out_offset);
-   const uint64_t new_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + out_offset;
-   assert(uploaded);
-
-   /* Clamp draw count to fit the actual size of the buffer.
-    * This is to avoid potential out of bounds copies (eg. for draws with an indirect count buffer).
-    * The remaining indirect draws will stay filled with X=0, Y=1, Z=1 which is harmless.
-    */
-   draw_count = MIN2(draw_count, (buffer->vk.size - buffer->offset - offset) / src_stride);
-
-   ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 6 * draw_count + 2);
-
-   /* Copy taskCount from the NV API BO to the X dispatch size of the compatible BO. */
-   for (unsigned i = 0; i < draw_count; ++i) {
-      const uint64_t src_task_count = va + i * src_stride + src_off_task_count;
-      const uint64_t dst_x = new_va + i * dst_stride + dst_off_x;
-
-      radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
-      radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
-                         COPY_DATA_WR_CONFIRM);
-      radeon_emit(cs, src_task_count);
-      radeon_emit(cs, src_task_count >> 32);
-      radeon_emit(cs, dst_x);
-      radeon_emit(cs, dst_x >> 32);
-   }
-
-   assert(cs->cdw <= cdw_max);
-
-   /* The draw packet can now use this buffer: */
-   struct radv_buffer buf = *buffer;
-   buf.bo = cmd_buffer->upload.upload_bo;
-   buf.offset = out_offset;
-
-   return buf;
-}
-
 VKAPI_ATTR void VKAPI_CALL
 radv_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount,
              uint32_t firstVertex, uint32_t firstInstance)
@@ -9409,33 +9256,6 @@ radv_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer
 }
 
 VKAPI_ATTR void VKAPI_CALL
-radv_CmdDrawMeshTasksNV(VkCommandBuffer commandBuffer, uint32_t taskCount, uint32_t firstTask)
-{
-   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-   struct radv_draw_info info;
-
-   info.count = taskCount;
-   info.instance_count = 1;
-   info.first_instance = 0;
-   info.stride = 0;
-   info.indexed = false;
-   info.strmout_buffer = NULL;
-   info.count_buffer = NULL;
-   info.indirect = NULL;
-
-   if (!radv_before_taskmesh_draw(cmd_buffer, &info, 1))
-      return;
-
-   if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) {
-      radv_emit_direct_taskmesh_draw_packets(cmd_buffer, taskCount, 1, 1, firstTask);
-   } else {
-      radv_emit_direct_mesh_draw_packet(cmd_buffer, taskCount, 1, 1, firstTask);
-   }
-
-   radv_after_draw(cmd_buffer);
-}
-
-VKAPI_ATTR void VKAPI_CALL
 radv_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z)
 {
    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
@@ -9463,64 +9283,6 @@ radv_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y,
 }
 
 VKAPI_ATTR void VKAPI_CALL
-radv_CmdDrawMeshTasksIndirectNV(VkCommandBuffer commandBuffer, VkBuffer _buffer,
-                                VkDeviceSize offset, uint32_t drawCount, uint32_t stride)
-{
-   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-   RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
-
-   struct radv_draw_info info;
-
-   info.indirect = buffer;
-   info.indirect_offset = offset;
-   info.stride = stride;
-   info.count = drawCount;
-   info.strmout_buffer = NULL;
-   info.count_buffer = NULL;
-   info.indexed = false;
-   info.instance_count = 0;
-
-   if (!radv_before_taskmesh_draw(cmd_buffer, &info, drawCount))
-      return;
-
-   /* Indirect draw with mesh shader only:
-    * Use DRAW_INDIRECT / DRAW_INDIRECT_MULTI like normal indirect draws.
-    * Needed because DISPATCH_MESH_INDIRECT_MULTI doesn't support firstTask.
-    *
-    * Indirect draw with task + mesh shaders:
-    * Use DISPATCH_TASKMESH_INDIRECT_MULTI_ACE + DISPATCH_TASKMESH_GFX.
-    * These packets don't support firstTask so we implement that by
-    * reading the NV command's indirect buffer in the shader.
-    *
-    * The indirect BO layout from the NV_mesh_shader API is incompatible
-    * with AMD HW. To make it work, we allocate some space
-    * in the upload buffer and copy the data to it.
-    */
-
-   if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) {
-      uint64_t nv_ib_va = radv_buffer_get_va(buffer->bo) + buffer->offset + offset;
-      uint32_t nv_ib_stride = MAX2(stride, sizeof(VkDrawMeshTasksIndirectCommandNV));
-      struct radv_buffer buf =
-         radv_nv_task_indirect_bo(cmd_buffer, buffer, offset, drawCount, stride);
-      info.indirect = &buf;
-      info.indirect_offset = 0;
-      info.stride = sizeof(VkDispatchIndirectCommand);
-
-      radv_emit_indirect_taskmesh_draw_packets(cmd_buffer, &info, nv_ib_va, nv_ib_stride);
-   } else {
-      struct radv_buffer buf =
-         radv_nv_mesh_indirect_bo(cmd_buffer, buffer, offset, drawCount, stride);
-      info.indirect = &buf;
-      info.indirect_offset = 0;
-      info.stride = sizeof(VkDrawIndirectCommand);
-
-      radv_emit_indirect_draw_packets(cmd_buffer, &info);
-   }
-
-   radv_after_draw(cmd_buffer);
-}
-
-VKAPI_ATTR void VKAPI_CALL
 radv_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer _buffer,
                                  VkDeviceSize offset, uint32_t drawCount, uint32_t stride)
 {
@@ -9554,54 +9316,6 @@ radv_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer _buffer
 }
 
 VKAPI_ATTR void VKAPI_CALL
-radv_CmdDrawMeshTasksIndirectCountNV(VkCommandBuffer commandBuffer, VkBuffer _buffer,
-                                     VkDeviceSize offset, VkBuffer _countBuffer,
-                                     VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
-                                     uint32_t stride)
-{
-   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-   RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
-   RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer);
-
-   struct radv_draw_info info;
-
-   info.indirect = buffer;
-   info.indirect_offset = offset;
-   info.stride = stride;
-   info.count = maxDrawCount;
-   info.strmout_buffer = NULL;
-   info.count_buffer = count_buffer;
-   info.count_buffer_offset = countBufferOffset;
-   info.indexed = false;
-   info.instance_count = 0;
-
-   if (!radv_before_taskmesh_draw(cmd_buffer, &info, maxDrawCount))
-      return;
-
-   if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) {
-      uint64_t nv_ib_va = radv_buffer_get_va(buffer->bo) + buffer->offset + offset;
-      uint32_t nv_ib_stride = MAX2(stride, sizeof(VkDrawMeshTasksIndirectCommandNV));
-      struct radv_buffer buf =
-         radv_nv_task_indirect_bo(cmd_buffer, buffer, offset, maxDrawCount, stride);
-      info.indirect = &buf;
-      info.indirect_offset = 0;
-      info.stride = sizeof(VkDispatchIndirectCommand);
-
-      radv_emit_indirect_taskmesh_draw_packets(cmd_buffer, &info, nv_ib_va, nv_ib_stride);
-   } else {
-      struct radv_buffer buf =
-         radv_nv_mesh_indirect_bo(cmd_buffer, buffer, offset, maxDrawCount, stride);
-      info.indirect = &buf;
-      info.indirect_offset = 0;
-      info.stride = sizeof(VkDrawIndirectCommand);
-
-      radv_emit_indirect_draw_packets(cmd_buffer, &info);
-   }
-
-   radv_after_draw(cmd_buffer);
-}
-
-VKAPI_ATTR void VKAPI_CALL
 radv_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer _buffer,
                                       VkDeviceSize offset, VkBuffer _countBuffer,
                                       VkDeviceSize countBufferOffset, uint32_t maxDrawCount,