pvr: Use common queue submit implementation
authorJarred Davies <jarred.davies@imgtec.com>
Tue, 17 Jan 2023 21:43:42 +0000 (21:43 +0000)
committerMarge Bot <emma+marge@anholt.net>
Tue, 28 Feb 2023 21:39:49 +0000 (21:39 +0000)
A simplification of the synchronization code is also undertaken as part
of this commit to account for the implicit guarantee the FW gives the driver
that jobs submitted to the same context will be run in submission order.

Signed-off-by: Jarred Davies <jarred.davies@imgtec.com>
Reviewed-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21577>

19 files changed:
src/imagination/vulkan/pvr_job_compute.c
src/imagination/vulkan/pvr_job_compute.h
src/imagination/vulkan/pvr_job_render.c
src/imagination/vulkan/pvr_job_render.h
src/imagination/vulkan/pvr_job_transfer.c
src/imagination/vulkan/pvr_job_transfer.h
src/imagination/vulkan/pvr_private.h
src/imagination/vulkan/pvr_queue.c
src/imagination/vulkan/winsys/pvr_winsys.h
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.c
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.h
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bridge.c
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bridge.h
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.c
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_null.c
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_null.h
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.c
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_transfer.c
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_sync.c

index ef8d227..2f81499 100644 (file)
@@ -179,10 +179,7 @@ pvr_submit_info_flags_init(const struct pvr_device_info *const dev_info,
 static void pvr_compute_job_ws_submit_info_init(
    struct pvr_compute_ctx *ctx,
    struct pvr_sub_cmd_compute *sub_cmd,
-   struct vk_sync *barrier,
-   struct vk_sync **waits,
-   uint32_t wait_count,
-   uint32_t *stage_flags,
+   struct vk_sync *wait,
    struct pvr_winsys_compute_submit_info *submit_info)
 {
    const struct pvr_device *const device = ctx->device;
@@ -193,11 +190,7 @@ static void pvr_compute_job_ws_submit_info_init(
    submit_info->frame_num = device->global_queue_present_count;
    submit_info->job_num = device->global_cmd_buffer_submit_count;
 
-   submit_info->barrier = barrier;
-
-   submit_info->waits = waits;
-   submit_info->wait_count = wait_count;
-   submit_info->stage_flags = stage_flags;
+   submit_info->wait = wait;
 
    pvr_submit_info_stream_init(ctx, sub_cmd, submit_info);
    pvr_submit_info_ext_stream_init(ctx, submit_info);
@@ -206,22 +199,13 @@ static void pvr_compute_job_ws_submit_info_init(
 
 VkResult pvr_compute_job_submit(struct pvr_compute_ctx *ctx,
                                 struct pvr_sub_cmd_compute *sub_cmd,
-                                struct vk_sync *barrier,
-                                struct vk_sync **waits,
-                                uint32_t wait_count,
-                                uint32_t *stage_flags,
+                                struct vk_sync *wait,
                                 struct vk_sync *signal_sync)
 {
    struct pvr_winsys_compute_submit_info submit_info;
    struct pvr_device *device = ctx->device;
 
-   pvr_compute_job_ws_submit_info_init(ctx,
-                                       sub_cmd,
-                                       barrier,
-                                       waits,
-                                       wait_count,
-                                       stage_flags,
-                                       &submit_info);
+   pvr_compute_job_ws_submit_info_init(ctx, sub_cmd, wait, &submit_info);
 
    if (PVR_IS_DEBUG_SET(DUMP_CONTROL_STREAM)) {
       pvr_csb_dump(&sub_cmd->control_stream,
index 72dfcd0..ed715d5 100644 (file)
@@ -33,10 +33,7 @@ struct vk_sync;
 
 VkResult pvr_compute_job_submit(struct pvr_compute_ctx *ctx,
                                 struct pvr_sub_cmd_compute *sub_cmd,
-                                struct vk_sync *barrier,
-                                struct vk_sync **waits,
-                                uint32_t wait_count,
-                                uint32_t *stage_flags,
+                                struct vk_sync *wait,
                                 struct vk_sync *signal_sync);
 
 #endif /* PVR_JOB_COMPUTE_H */
index 32f49cb..a5baa38 100644 (file)
@@ -1279,10 +1279,13 @@ static void pvr_geom_state_flags_init(const struct pvr_render_job *const job,
 static void
 pvr_render_job_ws_geometry_state_init(struct pvr_render_ctx *ctx,
                                       struct pvr_render_job *job,
+                                      struct vk_sync *wait,
                                       struct pvr_winsys_geometry_state *state)
 {
    pvr_geom_state_stream_init(ctx, job, state);
    pvr_geom_state_stream_ext_init(ctx, job, state);
+
+   state->wait = wait;
    pvr_geom_state_flags_init(job, &state->flags);
 }
 
@@ -1687,21 +1690,21 @@ static void pvr_frag_state_flags_init(const struct pvr_render_job *const job,
 static void
 pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
                                       struct pvr_render_job *job,
+                                      struct vk_sync *wait,
                                       struct pvr_winsys_fragment_state *state)
 {
    pvr_frag_state_stream_init(ctx, job, state);
    pvr_frag_state_stream_ext_init(ctx, job, state);
+
+   state->wait = wait;
    pvr_frag_state_flags_init(job, &state->flags);
 }
 
 static void pvr_render_job_ws_submit_info_init(
    struct pvr_render_ctx *ctx,
    struct pvr_render_job *job,
-   struct vk_sync *barrier_geom,
-   struct vk_sync *barrier_frag,
-   struct vk_sync **waits,
-   uint32_t wait_count,
-   uint32_t *stage_flags,
+   struct vk_sync *wait_geom,
+   struct vk_sync *wait_frag,
    struct pvr_winsys_render_submit_info *submit_info)
 {
    memset(submit_info, 0, sizeof(*submit_info));
@@ -1712,29 +1715,25 @@ static void pvr_render_job_ws_submit_info_init(
    submit_info->frame_num = ctx->device->global_queue_present_count;
    submit_info->job_num = ctx->device->global_cmd_buffer_submit_count;
 
-   submit_info->barrier_geom = barrier_geom;
-
-   submit_info->waits = waits;
-   submit_info->wait_count = wait_count;
-   submit_info->stage_flags = stage_flags;
-
-   pvr_render_job_ws_geometry_state_init(ctx, job, &submit_info->geometry);
+   pvr_render_job_ws_geometry_state_init(ctx,
+                                         job,
+                                         wait_geom,
+                                         &submit_info->geometry);
 
    if (job->run_frag) {
       submit_info->run_frag = true;
-      submit_info->barrier_frag = barrier_frag;
 
-      pvr_render_job_ws_fragment_state_init(ctx, job, &submit_info->fragment);
+      pvr_render_job_ws_fragment_state_init(ctx,
+                                            job,
+                                            wait_frag,
+                                            &submit_info->fragment);
    }
 }
 
 VkResult pvr_render_job_submit(struct pvr_render_ctx *ctx,
                                struct pvr_render_job *job,
-                               struct vk_sync *barrier_geom,
-                               struct vk_sync *barrier_frag,
-                               struct vk_sync **waits,
-                               uint32_t wait_count,
-                               uint32_t *stage_flags,
+                               struct vk_sync *wait_geom,
+                               struct vk_sync *wait_frag,
                                struct vk_sync *signal_sync_geom,
                                struct vk_sync *signal_sync_frag)
 {
@@ -1745,11 +1744,8 @@ VkResult pvr_render_job_submit(struct pvr_render_ctx *ctx,
 
    pvr_render_job_ws_submit_info_init(ctx,
                                       job,
-                                      barrier_geom,
-                                      barrier_frag,
-                                      waits,
-                                      wait_count,
-                                      stage_flags,
+                                      wait_geom,
+                                      wait_frag,
                                       &submit_info);
 
    if (PVR_IS_DEBUG_SET(DUMP_CONTROL_STREAM)) {
index e424222..406aa6d 100644 (file)
@@ -129,11 +129,8 @@ void pvr_render_target_dataset_destroy(struct pvr_rt_dataset *dataset);
 
 VkResult pvr_render_job_submit(struct pvr_render_ctx *ctx,
                                struct pvr_render_job *job,
-                               struct vk_sync *barrier_geom,
-                               struct vk_sync *barrier_frag,
-                               struct vk_sync **waits,
-                               uint32_t wait_count,
-                               uint32_t *stage_flags,
+                               struct vk_sync *wait_geom,
+                               struct vk_sync *wait_frag,
                                struct vk_sync *signal_sync_geom,
                                struct vk_sync *signal_sync_frag);
 
index c4d6c1b..a0c2e79 100644 (file)
 VkResult pvr_transfer_job_submit(struct pvr_device *device,
                                  struct pvr_transfer_ctx *ctx,
                                  struct pvr_sub_cmd_transfer *sub_cmd,
-                                 struct vk_sync *barrier,
-                                 struct vk_sync **waits,
-                                 uint32_t wait_count,
-                                 uint32_t *stage_flags,
+                                 struct vk_sync *wait_sync,
                                  struct vk_sync *signal_sync)
 {
-   /* Wait for transfer semaphores here before doing any transfers. */
-   for (uint32_t i = 0U; i < wait_count; i++) {
-      if (stage_flags[i] & PVR_PIPELINE_STAGE_TRANSFER_BIT) {
-         VkResult result = vk_sync_wait(&device->vk,
-                                        waits[i],
-                                        0U,
-                                        VK_SYNC_WAIT_COMPLETE,
-                                        UINT64_MAX);
-         if (result != VK_SUCCESS)
-            return result;
+   VkResult result;
 
-         stage_flags[i] &= ~PVR_PIPELINE_STAGE_TRANSFER_BIT;
-      }
-   }
-
-
-   if (barrier) {
-      VkResult result = vk_sync_wait(&device->vk,
-                                     barrier,
-                                     0U,
-                                     VK_SYNC_WAIT_COMPLETE,
-                                     UINT64_MAX);
-      if (result != VK_SUCCESS)
-         return result;
-   }
+   result = vk_sync_wait(&device->vk,
+                         wait_sync,
+                         0U,
+                         VK_SYNC_WAIT_COMPLETE,
+                         UINT64_MAX);
+   if (result != VK_SUCCESS)
+      return result;
 
    list_for_each_entry_safe (struct pvr_transfer_cmd,
                              transfer_cmd,
index 4550374..0084969 100644 (file)
@@ -35,10 +35,7 @@ struct vk_sync;
 VkResult pvr_transfer_job_submit(struct pvr_device *device,
                                  struct pvr_transfer_ctx *ctx,
                                  struct pvr_sub_cmd_transfer *sub_cmd,
-                                 struct vk_sync *barrier,
-                                 struct vk_sync **waits,
-                                 uint32_t wait_count,
-                                 uint32_t *stage_flags,
+                                 struct vk_sync *wait,
                                  struct vk_sync *signal_sync);
 
 #endif /* PVR_JOB_TRANSFER_H */
index 8dfbbae..abff000 100644 (file)
@@ -131,14 +131,8 @@ struct pvr_queue {
    struct pvr_compute_ctx *query_ctx;
    struct pvr_transfer_ctx *transfer_ctx;
 
-   struct vk_sync *completion[PVR_JOB_TYPE_MAX];
-
-   /* Used to setup a job dependency from jobs previously submitted, onto
-    * the next job per job type.
-    *
-    * Used to create dependencies for pipeline barriers.
-    */
-   struct vk_sync *job_dependancy[PVR_JOB_TYPE_MAX];
+   struct vk_sync *last_job_signal_sync[PVR_JOB_TYPE_MAX];
+   struct vk_sync *next_job_wait_sync[PVR_JOB_TYPE_MAX];
 };
 
 struct pvr_vertex_binding {
@@ -279,6 +273,8 @@ struct pvr_device {
    VkPhysicalDeviceFeatures features;
 
    struct pvr_bo_store *bo_store;
+
+   struct vk_sync *presignaled_sync;
 };
 
 struct pvr_device_memory {
index e6d69d2..00acb32 100644 (file)
@@ -54,6 +54,9 @@
 #include "vk_sync_dummy.h"
 #include "vk_util.h"
 
+static VkResult pvr_driver_queue_submit(struct vk_queue *queue,
+                                        struct vk_queue_submit *submit);
+
 static VkResult pvr_queue_init(struct pvr_device *device,
                                struct pvr_queue *queue,
                                const VkDeviceQueueCreateInfo *pCreateInfo,
@@ -101,6 +104,8 @@ static VkResult pvr_queue_init(struct pvr_device *device,
    queue->query_ctx = query_ctx;
    queue->transfer_ctx = transfer_ctx;
 
+   queue->vk.driver_submit = pvr_driver_queue_submit;
+
    return VK_SUCCESS;
 
 err_query_ctx_destroy:
@@ -157,14 +162,14 @@ err_queues_finish:
 
 static void pvr_queue_finish(struct pvr_queue *queue)
 {
-   for (uint32_t i = 0; i < ARRAY_SIZE(queue->job_dependancy); i++) {
-      if (queue->job_dependancy[i])
-         vk_sync_destroy(&queue->device->vk, queue->job_dependancy[i]);
+   for (uint32_t i = 0; i < ARRAY_SIZE(queue->next_job_wait_sync); i++) {
+      if (queue->next_job_wait_sync[i])
+         vk_sync_destroy(&queue->device->vk, queue->next_job_wait_sync[i]);
    }
 
-   for (uint32_t i = 0; i < ARRAY_SIZE(queue->completion); i++) {
-      if (queue->completion[i])
-         vk_sync_destroy(&queue->device->vk, queue->completion[i]);
+   for (uint32_t i = 0; i < ARRAY_SIZE(queue->last_job_signal_sync); i++) {
+      if (queue->last_job_signal_sync[i])
+         vk_sync_destroy(&queue->device->vk, queue->last_job_signal_sync[i]);
    }
 
    pvr_render_ctx_destroy(queue->gfx_ctx);
@@ -183,228 +188,118 @@ void pvr_queues_destroy(struct pvr_device *device)
    vk_free(&device->vk.alloc, device->queues);
 }
 
-VkResult pvr_QueueWaitIdle(VkQueue _queue)
-{
-   PVR_FROM_HANDLE(pvr_queue, queue, _queue);
-
-   for (int i = 0U; i < ARRAY_SIZE(queue->completion); i++) {
-      VkResult result;
-
-      if (!queue->completion[i])
-         continue;
-
-      result = vk_sync_wait(&queue->device->vk,
-                            queue->completion[i],
-                            0U,
-                            VK_SYNC_WAIT_COMPLETE,
-                            UINT64_MAX);
-      if (result != VK_SUCCESS)
-         return result;
-   }
-
-   return VK_SUCCESS;
-}
-
-static VkResult
-pvr_process_graphics_cmd_part(struct pvr_device *const device,
-                              struct pvr_render_ctx *const gfx_ctx,
-                              struct pvr_render_job *const job,
-                              struct vk_sync *const geom_barrier,
-                              struct vk_sync *const frag_barrier,
-                              struct vk_sync **const geom_completion,
-                              struct vk_sync **const frag_completion,
-                              struct vk_sync **const waits,
-                              const uint32_t wait_count,
-                              uint32_t *const stage_flags)
+static VkResult pvr_process_graphics_cmd(struct pvr_device *device,
+                                         struct pvr_queue *queue,
+                                         struct pvr_cmd_buffer *cmd_buffer,
+                                         struct pvr_sub_cmd_gfx *sub_cmd)
 {
-   struct vk_sync *geom_sync = NULL;
-   struct vk_sync *frag_sync = NULL;
+   pvr_dev_addr_t original_ctrl_stream_addr = { 0 };
+   struct vk_sync *geom_signal_sync;
+   struct vk_sync *frag_signal_sync;
    VkResult result;
 
-   /* For each of geom and frag, a completion sync is optional but only allowed
-    * iff barrier is present.
-    */
-   assert(geom_barrier || !geom_completion);
-   assert(frag_barrier || !frag_completion);
-
-   if (geom_barrier) {
-      result = vk_sync_create(&device->vk,
-                              &device->pdevice->ws->syncobj_type,
-                              0U,
-                              0UL,
-                              &geom_sync);
-      if (result != VK_SUCCESS)
-         goto err_out;
-   }
-
-   if (frag_barrier) {
-      result = vk_sync_create(&device->vk,
-                              &device->pdevice->ws->syncobj_type,
-                              0U,
-                              0UL,
-                              &frag_sync);
-      if (result != VK_SUCCESS)
-         goto err_destroy_sync_geom;
-   }
+   result = vk_sync_create(&device->vk,
+                           &device->pdevice->ws->syncobj_type,
+                           0U,
+                           0UL,
+                           &geom_signal_sync);
+   if (result != VK_SUCCESS)
+      return result;
 
-   result = pvr_render_job_submit(gfx_ctx,
-                                  job,
-                                  geom_barrier,
-                                  frag_barrier,
-                                  waits,
-                                  wait_count,
-                                  stage_flags,
-                                  geom_sync,
-                                  frag_sync);
+   result = vk_sync_create(&device->vk,
+                           &device->pdevice->ws->syncobj_type,
+                           0U,
+                           0UL,
+                           &frag_signal_sync);
    if (result != VK_SUCCESS)
-      goto err_destroy_sync_frag;
+      goto err_destroy_geom_sync;
 
-   /* Replace the completion fences. */
-   if (geom_sync) {
-      if (*geom_completion)
-         vk_sync_destroy(&device->vk, *geom_completion);
+   /* FIXME: DoShadowLoadOrStore() */
 
-      *geom_completion = geom_sync;
-   }
+   /* Perform two render submits when using multiple framebuffer layers. The
+    * first submit contains just geometry, while the second only terminates
+    * (and triggers the fragment render if originally specified). This is needed
+    * because the render target cache gets cleared on terminating submits, which
+    * could result in missing primitives.
+    */
+   if (pvr_sub_cmd_gfx_requires_split_submit(sub_cmd)) {
+      /* If fragment work shouldn't be run there's no need for a split,
+       * and if geometry_terminate is false this kick can't have a fragment
+       * stage without another terminating geometry kick.
+       */
+      assert(sub_cmd->job.geometry_terminate && sub_cmd->job.run_frag);
 
-   if (frag_sync) {
-      if (*frag_completion)
-         vk_sync_destroy(&device->vk, *frag_completion);
+      /* First submit must not touch fragment work. */
+      sub_cmd->job.geometry_terminate = false;
+      sub_cmd->job.run_frag = false;
 
-      *frag_completion = frag_sync;
-   }
+      result =
+         pvr_render_job_submit(queue->gfx_ctx,
+                               &sub_cmd->job,
+                               queue->next_job_wait_sync[PVR_JOB_TYPE_GEOM],
+                               NULL,
+                               NULL,
+                               NULL);
 
-   return VK_SUCCESS;
+      sub_cmd->job.geometry_terminate = true;
+      sub_cmd->job.run_frag = true;
 
-err_destroy_sync_frag:
-   if (frag_sync)
-      vk_sync_destroy(&device->vk, frag_sync);
+      if (result != VK_SUCCESS)
+         goto err_destroy_frag_sync;
 
-err_destroy_sync_geom:
-   if (geom_sync)
-      vk_sync_destroy(&device->vk, geom_sync);
+      original_ctrl_stream_addr = sub_cmd->job.ctrl_stream_addr;
 
-err_out:
-   return result;
-}
+      /* Second submit contains only a trivial control stream to terminate the
+       * geometry work.
+       */
+      assert(sub_cmd->terminate_ctrl_stream);
+      sub_cmd->job.ctrl_stream_addr =
+         sub_cmd->terminate_ctrl_stream->vma->dev_addr;
+   }
 
-static VkResult
-pvr_process_split_graphics_cmd(struct pvr_device *const device,
-                               struct pvr_render_ctx *const gfx_ctx,
-                               struct pvr_sub_cmd_gfx *sub_cmd,
-                               struct vk_sync *const geom_barrier,
-                               struct vk_sync *const frag_barrier,
-                               struct vk_sync **const geom_completion,
-                               struct vk_sync **const frag_completion,
-                               struct vk_sync **const waits,
-                               const uint32_t wait_count,
-                               uint32_t *const stage_flags)
-{
-   struct pvr_render_job *const job = &sub_cmd->job;
-   const pvr_dev_addr_t original_ctrl_stream_addr = job->ctrl_stream_addr;
-   const bool original_geometry_terminate = job->geometry_terminate;
-   const bool original_run_frag = job->run_frag;
-   VkResult result;
+   result = pvr_render_job_submit(queue->gfx_ctx,
+                                  &sub_cmd->job,
+                                  queue->next_job_wait_sync[PVR_JOB_TYPE_GEOM],
+                                  queue->next_job_wait_sync[PVR_JOB_TYPE_FRAG],
+                                  geom_signal_sync,
+                                  frag_signal_sync);
 
-   /* First submit must not touch fragment work. */
-   job->geometry_terminate = false;
-   job->run_frag = false;
+   if (original_ctrl_stream_addr.addr > 0)
+      sub_cmd->job.ctrl_stream_addr = original_ctrl_stream_addr;
 
-   result = pvr_process_graphics_cmd_part(device,
-                                          gfx_ctx,
-                                          job,
-                                          geom_barrier,
-                                          NULL,
-                                          geom_completion,
-                                          NULL,
-                                          waits,
-                                          wait_count,
-                                          stage_flags);
+   if (result != VK_SUCCESS)
+      goto err_destroy_frag_sync;
 
-   job->geometry_terminate = original_geometry_terminate;
-   job->run_frag = original_run_frag;
+   /* Replace the completion fences. */
+   if (queue->last_job_signal_sync[PVR_JOB_TYPE_GEOM]) {
+      vk_sync_destroy(&device->vk,
+                      queue->last_job_signal_sync[PVR_JOB_TYPE_GEOM]);
+   }
 
-   if (result != VK_SUCCESS)
-      return result;
+   queue->last_job_signal_sync[PVR_JOB_TYPE_GEOM] = geom_signal_sync;
 
-   /* Second submit contains only a trivial control stream to terminate the
-    * geometry work.
-    */
-   assert(sub_cmd->terminate_ctrl_stream);
-   job->ctrl_stream_addr = sub_cmd->terminate_ctrl_stream->vma->dev_addr;
-
-   result = pvr_process_graphics_cmd_part(device,
-                                          gfx_ctx,
-                                          job,
-                                          NULL,
-                                          frag_barrier,
-                                          NULL,
-                                          frag_completion,
-                                          waits,
-                                          wait_count,
-                                          stage_flags);
-
-   job->ctrl_stream_addr = original_ctrl_stream_addr;
+   if (queue->last_job_signal_sync[PVR_JOB_TYPE_FRAG]) {
+      vk_sync_destroy(&device->vk,
+                      queue->last_job_signal_sync[PVR_JOB_TYPE_FRAG]);
+   }
 
-   return result;
-}
+   queue->last_job_signal_sync[PVR_JOB_TYPE_FRAG] = frag_signal_sync;
 
-static VkResult
-pvr_process_graphics_cmd(struct pvr_device *device,
-                         struct pvr_queue *queue,
-                         struct pvr_cmd_buffer *cmd_buffer,
-                         struct pvr_sub_cmd_gfx *sub_cmd,
-                         struct vk_sync *barrier_geom,
-                         struct vk_sync *barrier_frag,
-                         struct vk_sync **waits,
-                         uint32_t wait_count,
-                         uint32_t *stage_flags,
-                         struct vk_sync *completions[static PVR_JOB_TYPE_MAX])
-{
    /* FIXME: DoShadowLoadOrStore() */
 
-   /* Perform two render submits when using multiple framebuffer layers. The
-    * first submit contains just geometry, while the second only terminates
-    * (and triggers the fragment render if originally specified). This is needed
-    * because the render target cache gets cleared on terminating submits, which
-    * could result in missing primitives.
-    */
-   if (pvr_sub_cmd_gfx_requires_split_submit(sub_cmd)) {
-      return pvr_process_split_graphics_cmd(device,
-                                            queue->gfx_ctx,
-                                            sub_cmd,
-                                            barrier_geom,
-                                            barrier_frag,
-                                            &completions[PVR_JOB_TYPE_GEOM],
-                                            &completions[PVR_JOB_TYPE_FRAG],
-                                            waits,
-                                            wait_count,
-                                            stage_flags);
-   }
+   return VK_SUCCESS;
 
-   return pvr_process_graphics_cmd_part(device,
-                                        queue->gfx_ctx,
-                                        &sub_cmd->job,
-                                        barrier_geom,
-                                        barrier_frag,
-                                        &completions[PVR_JOB_TYPE_GEOM],
-                                        &completions[PVR_JOB_TYPE_FRAG],
-                                        waits,
-                                        wait_count,
-                                        stage_flags);
+err_destroy_frag_sync:
+   vk_sync_destroy(&device->vk, frag_signal_sync);
+err_destroy_geom_sync:
+   vk_sync_destroy(&device->vk, geom_signal_sync);
 
-   /* FIXME: DoShadowLoadOrStore() */
+   return result;
 }
 
-static VkResult
-pvr_process_compute_cmd(struct pvr_device *device,
-                        struct pvr_queue *queue,
-                        struct pvr_sub_cmd_compute *sub_cmd,
-                        struct vk_sync *barrier,
-                        struct vk_sync **waits,
-                        uint32_t wait_count,
-                        uint32_t *stage_flags,
-                        struct vk_sync *completions[static PVR_JOB_TYPE_MAX])
+static VkResult pvr_process_compute_cmd(struct pvr_device *device,
+                                        struct pvr_queue *queue,
+                                        struct pvr_sub_cmd_compute *sub_cmd)
 {
    struct vk_sync *sync;
    VkResult result;
@@ -417,36 +312,30 @@ pvr_process_compute_cmd(struct pvr_device *device,
    if (result != VK_SUCCESS)
       return result;
 
-   result = pvr_compute_job_submit(queue->compute_ctx,
-                                   sub_cmd,
-                                   barrier,
-                                   waits,
-                                   wait_count,
-                                   stage_flags,
-                                   sync);
+   result =
+      pvr_compute_job_submit(queue->compute_ctx,
+                             sub_cmd,
+                             queue->next_job_wait_sync[PVR_JOB_TYPE_COMPUTE],
+                             sync);
    if (result != VK_SUCCESS) {
       vk_sync_destroy(&device->vk, sync);
       return result;
    }
 
-   /* Replace the completion fences. */
-   if (completions[PVR_JOB_TYPE_COMPUTE])
-      vk_sync_destroy(&device->vk, completions[PVR_JOB_TYPE_COMPUTE]);
+   /* Replace the signal fence. */
+   if (queue->last_job_signal_sync[PVR_JOB_TYPE_COMPUTE]) {
+      vk_sync_destroy(&device->vk,
+                      queue->last_job_signal_sync[PVR_JOB_TYPE_COMPUTE]);
+   }
 
-   completions[PVR_JOB_TYPE_COMPUTE] = sync;
+   queue->last_job_signal_sync[PVR_JOB_TYPE_COMPUTE] = sync;
 
    return result;
 }
 
-static VkResult
-pvr_process_transfer_cmds(struct pvr_device *device,
-                          struct pvr_queue *queue,
-                          struct pvr_sub_cmd_transfer *sub_cmd,
-                          struct vk_sync *barrier,
-                          struct vk_sync **waits,
-                          uint32_t wait_count,
-                          uint32_t *stage_flags,
-                          struct vk_sync *completions[static PVR_JOB_TYPE_MAX])
+static VkResult pvr_process_transfer_cmds(struct pvr_device *device,
+                                          struct pvr_queue *queue,
+                                          struct pvr_sub_cmd_transfer *sub_cmd)
 {
    struct vk_sync *sync;
    VkResult result;
@@ -459,37 +348,32 @@ pvr_process_transfer_cmds(struct pvr_device *device,
    if (result != VK_SUCCESS)
       return result;
 
-   result = pvr_transfer_job_submit(device,
-                                    queue->transfer_ctx,
-                                    sub_cmd,
-                                    barrier,
-                                    waits,
-                                    wait_count,
-                                    stage_flags,
-                                    sync);
+   result =
+      pvr_transfer_job_submit(device,
+                              queue->transfer_ctx,
+                              sub_cmd,
+                              queue->next_job_wait_sync[PVR_JOB_TYPE_TRANSFER],
+                              sync);
    if (result != VK_SUCCESS) {
       vk_sync_destroy(&device->vk, sync);
       return result;
    }
 
-   /* Replace the completion fences. */
-   if (completions[PVR_JOB_TYPE_TRANSFER])
-      vk_sync_destroy(&device->vk, completions[PVR_JOB_TYPE_TRANSFER]);
+   /* Replace the signal syncs. */
+   if (queue->last_job_signal_sync[PVR_JOB_TYPE_TRANSFER]) {
+      vk_sync_destroy(&device->vk,
+                      queue->last_job_signal_sync[PVR_JOB_TYPE_TRANSFER]);
+   }
 
-   completions[PVR_JOB_TYPE_TRANSFER] = sync;
+   queue->last_job_signal_sync[PVR_JOB_TYPE_TRANSFER] = sync;
 
    return result;
 }
 
-static VkResult pvr_process_occlusion_query_cmd(
-   struct pvr_device *device,
-   struct pvr_queue *queue,
-   struct pvr_sub_cmd_compute *sub_cmd,
-   struct vk_sync *barrier,
-   struct vk_sync **waits,
-   uint32_t wait_count,
-   uint32_t *stage_flags,
-   struct vk_sync *completions[static PVR_JOB_TYPE_MAX])
+static VkResult
+pvr_process_occlusion_query_cmd(struct pvr_device *device,
+                                struct pvr_queue *queue,
+                                struct pvr_sub_cmd_compute *sub_cmd)
 {
    struct vk_sync *sync;
    VkResult result;
@@ -508,42 +392,35 @@ static VkResult pvr_process_occlusion_query_cmd(
    if (result != VK_SUCCESS)
       return result;
 
-   result = pvr_compute_job_submit(queue->query_ctx,
-                                   sub_cmd,
-                                   barrier,
-                                   waits,
-                                   wait_count,
-                                   stage_flags,
-                                   sync);
+   result = pvr_compute_job_submit(
+      queue->query_ctx,
+      sub_cmd,
+      queue->next_job_wait_sync[PVR_JOB_TYPE_OCCLUSION_QUERY],
+      sync);
    if (result != VK_SUCCESS) {
       vk_sync_destroy(&device->vk, sync);
       return result;
    }
 
-   if (completions[PVR_JOB_TYPE_OCCLUSION_QUERY])
-      vk_sync_destroy(&device->vk, completions[PVR_JOB_TYPE_OCCLUSION_QUERY]);
+   if (queue->last_job_signal_sync[PVR_JOB_TYPE_OCCLUSION_QUERY]) {
+      vk_sync_destroy(
+         &device->vk,
+         queue->last_job_signal_sync[PVR_JOB_TYPE_OCCLUSION_QUERY]);
+   }
 
-   completions[PVR_JOB_TYPE_OCCLUSION_QUERY] = sync;
+   queue->last_job_signal_sync[PVR_JOB_TYPE_OCCLUSION_QUERY] = sync;
 
    return result;
 }
 
-static VkResult pvr_process_event_cmd_barrier(
-   struct pvr_device *device,
-   struct pvr_sub_cmd_event *sub_cmd,
-   struct vk_sync *barriers[static PVR_JOB_TYPE_MAX],
-   struct vk_sync *per_cmd_buffer_syncobjs[static PVR_JOB_TYPE_MAX],
-   struct vk_sync *per_submit_syncobjs[static PVR_JOB_TYPE_MAX],
-   struct vk_sync *queue_syncobjs[static PVR_JOB_TYPE_MAX],
-   struct vk_sync *previous_queue_syncobjs[static PVR_JOB_TYPE_MAX])
+static VkResult pvr_process_event_cmd_barrier(struct pvr_device *device,
+                                              struct pvr_queue *queue,
+                                              struct pvr_sub_cmd_event *sub_cmd)
 {
    const uint32_t src_mask = sub_cmd->barrier.wait_for_stage_mask;
    const uint32_t dst_mask = sub_cmd->barrier.wait_at_stage_mask;
-   const bool in_render_pass = sub_cmd->barrier.in_render_pass;
-   struct vk_sync *new_barriers[PVR_JOB_TYPE_MAX] = { 0 };
-   struct vk_sync *completions[PVR_JOB_TYPE_MAX] = { 0 };
-   struct vk_sync *src_syncobjs[PVR_JOB_TYPE_MAX];
-   uint32_t src_syncobj_count = 0;
+   struct vk_sync_wait wait_syncs[PVR_JOB_TYPE_MAX + 1];
+   uint32_t src_wait_count = 0;
    VkResult result;
 
    assert(sub_cmd->type == PVR_EVENT_TYPE_BARRIER);
@@ -551,154 +428,79 @@ static VkResult pvr_process_event_cmd_barrier(
    assert(!(src_mask & ~PVR_PIPELINE_STAGE_ALL_BITS));
    assert(!(dst_mask & ~PVR_PIPELINE_STAGE_ALL_BITS));
 
-   /* TODO: We're likely over synchronizing here, but the kernel doesn't
-    * guarantee that jobs submitted on a context will execute and complete in
-    * order, even though in practice they will, so we play it safe and don't
-    * make any assumptions. If the kernel starts to offer this guarantee then
-    * remove the extra dependencies being added here.
-    */
-
    u_foreach_bit (stage, src_mask) {
-      struct vk_sync *syncobj;
-
-      syncobj = per_cmd_buffer_syncobjs[stage];
-
-      if (!in_render_pass & !syncobj) {
-         if (per_submit_syncobjs[stage])
-            syncobj = per_submit_syncobjs[stage];
-         else if (queue_syncobjs[stage])
-            syncobj = queue_syncobjs[stage];
-         else if (previous_queue_syncobjs[stage])
-            syncobj = previous_queue_syncobjs[stage];
+      if (queue->last_job_signal_sync[stage]) {
+         wait_syncs[src_wait_count++] = (struct vk_sync_wait){
+            .sync = queue->last_job_signal_sync[stage],
+            .stage_mask = ~(VkPipelineStageFlags2)0,
+            .wait_value = 0,
+         };
       }
-
-      if (!syncobj)
-         continue;
-
-      src_syncobjs[src_syncobj_count++] = syncobj;
    }
 
    /* No previous src jobs that need finishing so no need for a barrier. */
-   if (src_syncobj_count == 0)
+   if (src_wait_count == 0)
       return VK_SUCCESS;
 
    u_foreach_bit (stage, dst_mask) {
-      struct vk_sync *completion;
+      uint32_t wait_count = src_wait_count;
+      struct vk_sync_signal signal;
+      struct vk_sync *signal_sync;
 
       result = vk_sync_create(&device->vk,
                               &device->pdevice->ws->syncobj_type,
                               0U,
                               0UL,
-                              &completion);
+                              &signal_sync);
       if (result != VK_SUCCESS)
-         goto err_destroy_completions;
-
-      result = device->ws->ops->null_job_submit(device->ws,
-                                                src_syncobjs,
-                                                src_syncobj_count,
-                                                completion);
-      if (result != VK_SUCCESS) {
-         vk_sync_destroy(&device->vk, completion);
+         return result;
 
-         goto err_destroy_completions;
+      signal = (struct vk_sync_signal){
+         .sync = signal_sync,
+         .stage_mask = ~(VkPipelineStageFlags2)0,
+         .signal_value = 0,
+      };
+
+      if (queue->next_job_wait_sync[stage]) {
+         wait_syncs[wait_count++] = (struct vk_sync_wait){
+            .sync = queue->next_job_wait_sync[stage],
+            .stage_mask = ~(VkPipelineStageFlags2)0,
+            .wait_value = 0,
+         };
       }
 
-      completions[stage] = completion;
-   }
-
-   u_foreach_bit (stage, dst_mask) {
-      struct vk_sync *barrier_src_syncobjs[2];
-      uint32_t barrier_src_syncobj_count = 0;
-      struct vk_sync *barrier;
-      VkResult result;
-
-      assert(completions[stage]);
-      barrier_src_syncobjs[barrier_src_syncobj_count++] = completions[stage];
-
-      /* If there is a previous barrier we want to merge it with the new one.
-       *
-       * E.g.
-       *    A <compute>, B <compute>,
-       *       X <barrier src=compute, dst=graphics>,
-       *    C <transfer>
-       *       Y <barrier src=transfer, dst=graphics>,
-       *    D <graphics>
-       *
-       * X barriers A and B at D. Y barriers C at D. So we want to merge both
-       * X and Y graphics vk_sync barriers to pass to D.
-       *
-       * Note that this is the same as:
-       *    A <compute>, B <compute>, C <transfer>
-       *       X <barrier src=compute, dst=graphics>,
-       *       Y <barrier src=transfer, dst=graphics>,
-       *    D <graphics>
-       *
-       */
-      if (barriers[stage])
-         barrier_src_syncobjs[barrier_src_syncobj_count++] = barriers[stage];
-
-      result = vk_sync_create(&device->vk,
-                              &device->pdevice->ws->syncobj_type,
-                              0U,
-                              0UL,
-                              &barrier);
-      if (result != VK_SUCCESS)
-         goto err_destroy_new_barriers;
-
       result = device->ws->ops->null_job_submit(device->ws,
-                                                barrier_src_syncobjs,
-                                                barrier_src_syncobj_count,
-                                                barrier);
+                                                wait_syncs,
+                                                wait_count,
+                                                &signal);
       if (result != VK_SUCCESS) {
-         vk_sync_destroy(&device->vk, barrier);
-
-         goto err_destroy_new_barriers;
+         vk_sync_destroy(&device->vk, signal_sync);
+         return result;
       }
 
-      new_barriers[stage] = barrier;
-   }
-
-   u_foreach_bit (stage, dst_mask) {
-      if (per_cmd_buffer_syncobjs[stage])
-         vk_sync_destroy(&device->vk, per_cmd_buffer_syncobjs[stage]);
-
-      per_cmd_buffer_syncobjs[stage] = completions[stage];
+      if (queue->next_job_wait_sync[stage])
+         vk_sync_destroy(&device->vk, queue->next_job_wait_sync[stage]);
 
-      if (barriers[stage])
-         vk_sync_destroy(&device->vk, barriers[stage]);
-
-      barriers[stage] = new_barriers[stage];
+      queue->next_job_wait_sync[stage] = signal_sync;
    }
 
    return VK_SUCCESS;
-
-err_destroy_new_barriers:
-   u_foreach_bit (stage, dst_mask) {
-      if (new_barriers[stage])
-         vk_sync_destroy(&device->vk, new_barriers[stage]);
-   }
-
-err_destroy_completions:
-   u_foreach_bit (stage, dst_mask) {
-      if (completions[stage])
-         vk_sync_destroy(&device->vk, completions[stage]);
-   }
-
-   return result;
 }
 
-static VkResult pvr_process_event_cmd_set_or_reset(
-   struct pvr_device *device,
-   struct pvr_sub_cmd_event *sub_cmd,
-   struct vk_sync *per_cmd_buffer_syncobjs[static PVR_JOB_TYPE_MAX])
+static VkResult
+pvr_process_event_cmd_set_or_reset(struct pvr_device *device,
+                                   struct pvr_queue *queue,
+                                   struct pvr_sub_cmd_event *sub_cmd)
 {
    /* Not PVR_JOB_TYPE_MAX since that also includes
     * PVR_JOB_TYPE_OCCLUSION_QUERY so no stage in the src mask.
     */
-   struct vk_sync *src_syncobjs[PVR_NUM_SYNC_PIPELINE_STAGES];
-   struct vk_sync *new_event_syncobj;
-   uint32_t src_syncobj_count = 0;
+   struct vk_sync_wait waits[PVR_NUM_SYNC_PIPELINE_STAGES];
+   struct vk_sync_signal signal;
+   struct vk_sync *signal_sync;
+
    uint32_t wait_for_stage_mask;
+   uint32_t wait_count = 0;
    VkResult result;
 
    assert(sub_cmd->type == PVR_EVENT_TYPE_SET ||
@@ -712,29 +514,34 @@ static VkResult pvr_process_event_cmd_set_or_reset(
    assert(!(wait_for_stage_mask & ~PVR_PIPELINE_STAGE_ALL_BITS));
 
    u_foreach_bit (stage, wait_for_stage_mask) {
-      if (!per_cmd_buffer_syncobjs[stage])
+      if (!queue->last_job_signal_sync[stage])
          continue;
 
-      src_syncobjs[src_syncobj_count++] = per_cmd_buffer_syncobjs[stage];
+      waits[wait_count++] = (struct vk_sync_wait){
+         .sync = queue->last_job_signal_sync[stage],
+         .stage_mask = ~(VkPipelineStageFlags2)0,
+         .wait_value = 0,
+      };
    }
 
-   assert(src_syncobj_count <= ARRAY_SIZE(src_syncobjs));
-
    result = vk_sync_create(&device->vk,
                            &device->pdevice->ws->syncobj_type,
                            0U,
                            0UL,
-                           &new_event_syncobj);
+                           &signal_sync);
    if (result != VK_SUCCESS)
       return result;
 
-   result = device->ws->ops->null_job_submit(device->ws,
-                                             src_syncobjs,
-                                             src_syncobj_count,
-                                             new_event_syncobj);
-   if (result != VK_SUCCESS) {
-      vk_sync_destroy(&device->vk, new_event_syncobj);
+   signal = (struct vk_sync_signal){
+      .sync = signal_sync,
+      .stage_mask = ~(VkPipelineStageFlags2)0,
+      .signal_value = 0,
+   };
 
+   result =
+      device->ws->ops->null_job_submit(device->ws, waits, wait_count, &signal);
+   if (result != VK_SUCCESS) {
+      vk_sync_destroy(&device->vk, signal_sync);
       return result;
    }
 
@@ -742,13 +549,13 @@ static VkResult pvr_process_event_cmd_set_or_reset(
       if (sub_cmd->set.event->sync)
          vk_sync_destroy(&device->vk, sub_cmd->set.event->sync);
 
-      sub_cmd->set.event->sync = new_event_syncobj;
+      sub_cmd->set.event->sync = signal_sync;
       sub_cmd->set.event->state = PVR_EVENT_STATE_SET_BY_DEVICE;
    } else {
       if (sub_cmd->reset.event->sync)
          vk_sync_destroy(&device->vk, sub_cmd->reset.event->sync);
 
-      sub_cmd->reset.event->sync = new_event_syncobj;
+      sub_cmd->reset.event->sync = signal_sync;
       sub_cmd->reset.event->state = PVR_EVENT_STATE_RESET_BY_DEVICE;
    }
 
@@ -771,272 +578,110 @@ static VkResult pvr_process_event_cmd_set_or_reset(
  * \parma[in,out] per_cmd_buffer_syncobjs  Completion syncobjs for the command
  *                                         buffer being processed.
  */
-static VkResult pvr_process_event_cmd_wait(
-   struct pvr_device *device,
-   struct pvr_sub_cmd_event *sub_cmd,
-   struct vk_sync *barriers[static PVR_JOB_TYPE_MAX],
-   struct vk_sync *per_cmd_buffer_syncobjs[static PVR_JOB_TYPE_MAX])
+static VkResult pvr_process_event_cmd_wait(struct pvr_device *device,
+                                           struct pvr_queue *queue,
+                                           struct pvr_sub_cmd_event *sub_cmd)
 {
-   /* +1 if there's a previous barrier which we need to merge. */
-   struct vk_sync *new_barriers[PVR_JOB_TYPE_MAX];
-   struct vk_sync *completions[PVR_JOB_TYPE_MAX];
    uint32_t dst_mask = 0;
+   VkResult result;
 
-   STACK_ARRAY(struct vk_sync *, src_syncobjs, sub_cmd->wait.count + 1);
-   if (!src_syncobjs)
+   STACK_ARRAY(struct vk_sync_wait, waits, sub_cmd->wait.count + 1);
+   if (!waits)
       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
 
    for (uint32_t i = 0; i < sub_cmd->wait.count; i++)
       dst_mask |= sub_cmd->wait.wait_at_stage_masks[i];
 
    u_foreach_bit (stage, dst_mask) {
-      uint32_t src_syncobj_count = 0;
-      struct vk_sync *completion;
-      struct vk_sync *barrier;
-      VkResult result;
-
-      if (barriers[stage])
-         src_syncobjs[src_syncobj_count++] = barriers[stage];
+      struct vk_sync_signal signal;
+      struct vk_sync *signal_sync;
+      uint32_t wait_count = 0;
 
       for (uint32_t i = 0; i < sub_cmd->wait.count; i++) {
-         if (sub_cmd->wait.wait_at_stage_masks[i] & stage)
-            src_syncobjs[src_syncobj_count++] = sub_cmd->wait.events[i]->sync;
+         if (sub_cmd->wait.wait_at_stage_masks[i] & stage) {
+            waits[wait_count++] = (struct vk_sync_wait){
+               .sync = sub_cmd->wait.events[i]->sync,
+               .stage_mask = ~(VkPipelineStageFlags2)0,
+               .wait_value = 0,
+            };
+         }
       }
 
-      /* Create completion. */
+      if (!wait_count)
+         continue;
 
-      result = vk_sync_create(&device->vk,
-                              &device->pdevice->ws->syncobj_type,
-                              0U,
-                              0UL,
-                              &completion);
-      if (result != VK_SUCCESS) {
-         STACK_ARRAY_FINISH(src_syncobjs);
-         return result;
+      if (queue->next_job_wait_sync[stage]) {
+         waits[wait_count++] = (struct vk_sync_wait){
+            .sync = queue->next_job_wait_sync[stage],
+            .stage_mask = ~(VkPipelineStageFlags2)0,
+            .wait_value = 0,
+         };
       }
 
-      result = device->ws->ops->null_job_submit(device->ws,
-                                                src_syncobjs,
-                                                src_syncobj_count,
-                                                completion);
-      if (result != VK_SUCCESS) {
-         vk_sync_destroy(&device->vk, completion);
-         STACK_ARRAY_FINISH(src_syncobjs);
-         return result;
-      }
-
-      completions[stage] = completion;
-
-      /* Create barrier. */
-
-      /* We can't reuse the completion as a barrier since a barrier can be
-       * passed into multiple job submissions based on the dst mask while the
-       * completion gets replaced on each job submission so we'd end up in a
-       * case where the completion is replaced but other job submissions (of
-       * different type, i.e. different stages in the dst mask) get fed the
-       * freed barrier resulting in a use after free.
-       */
+      assert(wait_count <= (sub_cmd->wait.count + 1));
 
       result = vk_sync_create(&device->vk,
                               &device->pdevice->ws->syncobj_type,
                               0U,
                               0UL,
-                              &barrier);
-      if (result != VK_SUCCESS) {
-         vk_sync_destroy(&device->vk, completion);
-         STACK_ARRAY_FINISH(src_syncobjs);
-         return result;
-      }
+                              &signal_sync);
+      if (result != VK_SUCCESS)
+         goto err_free_waits;
 
-      result =
-         device->ws->ops->null_job_submit(device->ws, &completion, 1, barrier);
+      signal = (struct vk_sync_signal){
+         .sync = signal_sync,
+         .stage_mask = ~(VkPipelineStageFlags2)0,
+         .signal_value = 0,
+      };
+
+      result = device->ws->ops->null_job_submit(device->ws,
+                                                waits,
+                                                wait_count,
+                                                &signal);
       if (result != VK_SUCCESS) {
-         vk_sync_destroy(&device->vk, barrier);
-         vk_sync_destroy(&device->vk, completion);
-         STACK_ARRAY_FINISH(src_syncobjs);
-         return result;
+         vk_sync_destroy(&device->vk, signal.sync);
+         goto err_free_waits;
       }
 
-      new_barriers[stage] = barrier;
-   }
-
-   u_foreach_bit (stage, dst_mask) {
-      if (per_cmd_buffer_syncobjs[stage])
-         vk_sync_destroy(&device->vk, per_cmd_buffer_syncobjs[stage]);
-
-      per_cmd_buffer_syncobjs[stage] = completions[stage];
-
-      if (barriers[stage])
-         vk_sync_destroy(&device->vk, barriers[stage]);
+      if (queue->next_job_wait_sync[stage])
+         vk_sync_destroy(&device->vk, queue->next_job_wait_sync[stage]);
 
-      barriers[stage] = new_barriers[stage];
+      queue->next_job_wait_sync[stage] = signal.sync;
    }
 
-   STACK_ARRAY_FINISH(src_syncobjs);
+   STACK_ARRAY_FINISH(waits);
 
    return VK_SUCCESS;
+
+err_free_waits:
+   STACK_ARRAY_FINISH(waits);
+
+   return result;
 }
 
-static VkResult pvr_process_event_cmd(
-   struct pvr_device *device,
-   struct pvr_sub_cmd_event *sub_cmd,
-   struct vk_sync *barriers[static PVR_JOB_TYPE_MAX],
-   struct vk_sync *per_cmd_buffer_syncobjs[static PVR_JOB_TYPE_MAX],
-   struct vk_sync *per_submit_syncobjs[static PVR_JOB_TYPE_MAX],
-   struct vk_sync *queue_syncobjs[static PVR_JOB_TYPE_MAX],
-   struct vk_sync *previous_queue_syncobjs[static PVR_JOB_TYPE_MAX])
+static VkResult pvr_process_event_cmd(struct pvr_device *device,
+                                      struct pvr_queue *queue,
+                                      struct pvr_sub_cmd_event *sub_cmd)
 {
    switch (sub_cmd->type) {
    case PVR_EVENT_TYPE_SET:
    case PVR_EVENT_TYPE_RESET:
-      return pvr_process_event_cmd_set_or_reset(device,
-                                                sub_cmd,
-                                                per_cmd_buffer_syncobjs);
-
+      return pvr_process_event_cmd_set_or_reset(device, queue, sub_cmd);
    case PVR_EVENT_TYPE_WAIT:
-      return pvr_process_event_cmd_wait(device,
-                                        sub_cmd,
-                                        barriers,
-                                        per_cmd_buffer_syncobjs);
-
+      return pvr_process_event_cmd_wait(device, queue, sub_cmd);
    case PVR_EVENT_TYPE_BARRIER:
-      return pvr_process_event_cmd_barrier(device,
-                                           sub_cmd,
-                                           barriers,
-                                           per_cmd_buffer_syncobjs,
-                                           per_submit_syncobjs,
-                                           queue_syncobjs,
-                                           previous_queue_syncobjs);
-
+      return pvr_process_event_cmd_barrier(device, queue, sub_cmd);
    default:
       unreachable("Invalid event sub-command type.");
    };
 }
 
-static VkResult
-pvr_set_semaphore_payloads(struct pvr_device *device,
-                           struct vk_sync *completions[static PVR_JOB_TYPE_MAX],
-                           const VkSemaphore *signals,
-                           uint32_t signal_count)
-{
-   struct vk_sync *sync;
-   VkResult result;
-   int fd = -1;
-
-   result = vk_sync_create(&device->vk,
-                           &device->pdevice->ws->syncobj_type,
-                           0U,
-                           0UL,
-                           &sync);
-   if (result != VK_SUCCESS)
-      return result;
-
-   result = device->ws->ops->null_job_submit(device->ws,
-                                             completions,
-                                             PVR_JOB_TYPE_MAX,
-                                             sync);
-   if (result != VK_SUCCESS)
-      goto end_set_semaphore_payloads;
-
-   /* If we have a single signal semaphore, we can simply move merged sync's
-    * payload to the signal semahpore's payload.
-    */
-   if (signal_count == 1U) {
-      VK_FROM_HANDLE(vk_semaphore, sem, signals[0]);
-      struct vk_sync *sem_sync = vk_semaphore_get_active_sync(sem);
-
-      result = vk_sync_move(&device->vk, sem_sync, sync);
-      goto end_set_semaphore_payloads;
-   }
-
-   result = vk_sync_export_sync_file(&device->vk, sync, &fd);
-   if (result != VK_SUCCESS)
-      goto end_set_semaphore_payloads;
-
-   for (uint32_t i = 0U; i < signal_count; i++) {
-      VK_FROM_HANDLE(vk_semaphore, sem, signals[i]);
-      struct vk_sync *sem_sync = vk_semaphore_get_active_sync(sem);
-
-      result = vk_sync_import_sync_file(&device->vk, sem_sync, fd);
-      if (result != VK_SUCCESS)
-         goto end_set_semaphore_payloads;
-   }
-
-end_set_semaphore_payloads:
-   if (fd != -1)
-      close(fd);
-
-   vk_sync_destroy(&device->vk, sync);
-
-   return result;
-}
-
-static VkResult
-pvr_set_fence_payload(struct pvr_device *device,
-                      struct vk_sync *completions[static PVR_JOB_TYPE_MAX],
-                      VkFence _fence)
-{
-   VK_FROM_HANDLE(vk_fence, fence, _fence);
-   struct vk_sync *fence_sync;
-   struct vk_sync *sync;
-   VkResult result;
-
-   result = vk_sync_create(&device->vk,
-                           &device->pdevice->ws->syncobj_type,
-                           0U,
-                           0UL,
-                           &sync);
-   if (result != VK_SUCCESS)
-      return result;
-
-   result = device->ws->ops->null_job_submit(device->ws,
-                                             completions,
-                                             PVR_JOB_TYPE_MAX,
-                                             sync);
-   if (result != VK_SUCCESS) {
-      vk_sync_destroy(&device->vk, sync);
-      return result;
-   }
-
-   fence_sync = vk_fence_get_active_sync(fence);
-   result = vk_sync_move(&device->vk, fence_sync, sync);
-   vk_sync_destroy(&device->vk, sync);
-
-   return result;
-}
-
-static void pvr_update_syncobjs(struct pvr_device *device,
-                                struct vk_sync *src[static PVR_JOB_TYPE_MAX],
-                                struct vk_sync *dst[static PVR_JOB_TYPE_MAX])
-{
-   for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
-      if (src[i]) {
-         if (dst[i])
-            vk_sync_destroy(&device->vk, dst[i]);
-
-         dst[i] = src[i];
-      }
-   }
-}
-
-static VkResult pvr_process_cmd_buffer(
-   struct pvr_device *device,
-   struct pvr_queue *queue,
-   VkCommandBuffer commandBuffer,
-   struct vk_sync *barriers[static PVR_JOB_TYPE_MAX],
-   struct vk_sync **waits,
-   uint32_t wait_count,
-   uint32_t *stage_flags,
-   struct vk_sync *per_submit_syncobjs[static PVR_JOB_TYPE_MAX],
-   struct vk_sync *queue_syncobjs[static PVR_JOB_TYPE_MAX],
-   struct vk_sync *previous_queue_syncobjs[static PVR_JOB_TYPE_MAX])
+static VkResult pvr_process_cmd_buffer(struct pvr_device *device,
+                                       struct pvr_queue *queue,
+                                       struct pvr_cmd_buffer *cmd_buffer)
 {
-   struct vk_sync *per_cmd_buffer_syncobjs[PVR_JOB_TYPE_MAX] = {};
-   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
    VkResult result;
 
-   assert(cmd_buffer->vk.state == MESA_VK_COMMAND_BUFFER_STATE_EXECUTABLE);
-
    list_for_each_entry_safe (struct pvr_sub_cmd,
                              sub_cmd,
                              &cmd_buffer->sub_cmds,
@@ -1058,38 +703,19 @@ static VkResult pvr_process_cmd_buffer(
              */
 
             result = pvr_process_event_cmd_barrier(device,
-                                                   &frag_to_transfer_barrier,
-                                                   barriers,
-                                                   per_cmd_buffer_syncobjs,
-                                                   per_submit_syncobjs,
-                                                   queue_syncobjs,
-                                                   previous_queue_syncobjs);
+                                                   queue,
+                                                   &frag_to_transfer_barrier);
             if (result != VK_SUCCESS)
                break;
          }
 
-         result = pvr_process_graphics_cmd(device,
-                                           queue,
-                                           cmd_buffer,
-                                           &sub_cmd->gfx,
-                                           barriers[PVR_JOB_TYPE_GEOM],
-                                           barriers[PVR_JOB_TYPE_FRAG],
-                                           waits,
-                                           wait_count,
-                                           stage_flags,
-                                           per_cmd_buffer_syncobjs);
+         result =
+            pvr_process_graphics_cmd(device, queue, cmd_buffer, &sub_cmd->gfx);
          break;
       }
 
       case PVR_SUB_CMD_TYPE_COMPUTE:
-         result = pvr_process_compute_cmd(device,
-                                          queue,
-                                          &sub_cmd->compute,
-                                          barriers[PVR_JOB_TYPE_COMPUTE],
-                                          waits,
-                                          wait_count,
-                                          stage_flags,
-                                          per_cmd_buffer_syncobjs);
+         result = pvr_process_compute_cmd(device, queue, &sub_cmd->compute);
          break;
 
       case PVR_SUB_CMD_TYPE_TRANSFER: {
@@ -1105,24 +731,13 @@ static VkResult pvr_process_cmd_buffer(
             };
 
             result = pvr_process_event_cmd_barrier(device,
-                                                   &frag_to_transfer_barrier,
-                                                   barriers,
-                                                   per_cmd_buffer_syncobjs,
-                                                   per_submit_syncobjs,
-                                                   queue_syncobjs,
-                                                   previous_queue_syncobjs);
+                                                   queue,
+                                                   &frag_to_transfer_barrier);
             if (result != VK_SUCCESS)
                break;
          }
 
-         result = pvr_process_transfer_cmds(device,
-                                            queue,
-                                            &sub_cmd->transfer,
-                                            barriers[PVR_JOB_TYPE_TRANSFER],
-                                            waits,
-                                            wait_count,
-                                            stage_flags,
-                                            per_cmd_buffer_syncobjs);
+         result = pvr_process_transfer_cmds(device, queue, &sub_cmd->transfer);
 
          if (serialize_with_frag) {
             struct pvr_sub_cmd_event transfer_to_frag_barrier = {
@@ -1137,37 +752,20 @@ static VkResult pvr_process_cmd_buffer(
                break;
 
             result = pvr_process_event_cmd_barrier(device,
-                                                   &transfer_to_frag_barrier,
-                                                   barriers,
-                                                   per_cmd_buffer_syncobjs,
-                                                   per_submit_syncobjs,
-                                                   queue_syncobjs,
-                                                   previous_queue_syncobjs);
+                                                   queue,
+                                                   &transfer_to_frag_barrier);
          }
 
          break;
       }
 
       case PVR_SUB_CMD_TYPE_OCCLUSION_QUERY:
-         result = pvr_process_occlusion_query_cmd(
-            device,
-            queue,
-            &sub_cmd->compute,
-            barriers[PVR_JOB_TYPE_OCCLUSION_QUERY],
-            waits,
-            wait_count,
-            stage_flags,
-            per_cmd_buffer_syncobjs);
+         result =
+            pvr_process_occlusion_query_cmd(device, queue, &sub_cmd->compute);
          break;
 
       case PVR_SUB_CMD_TYPE_EVENT:
-         result = pvr_process_event_cmd(device,
-                                        &sub_cmd->event,
-                                        barriers,
-                                        per_cmd_buffer_syncobjs,
-                                        per_submit_syncobjs,
-                                        queue_syncobjs,
-                                        previous_queue_syncobjs);
+         result = pvr_process_event_cmd(device, queue, &sub_cmd->event);
          break;
 
       default:
@@ -1181,144 +779,182 @@ static VkResult pvr_process_cmd_buffer(
       p_atomic_inc(&device->global_cmd_buffer_submit_count);
    }
 
-   pvr_update_syncobjs(device, per_cmd_buffer_syncobjs, per_submit_syncobjs);
-
    return VK_SUCCESS;
 }
 
-static VkResult
-pvr_submit_null_job(struct pvr_device *device,
-                    struct vk_sync **waits,
-                    uint32_t wait_count,
-                    uint32_t *stage_flags,
-                    struct vk_sync *completions[static PVR_JOB_TYPE_MAX])
+static VkResult pvr_clear_last_submits_syncs(struct pvr_queue *queue)
 {
+   struct vk_sync_wait waits[PVR_JOB_TYPE_MAX * 2];
+   uint32_t wait_count = 0;
    VkResult result;
 
-   STATIC_ASSERT(PVR_JOB_TYPE_MAX >= PVR_NUM_SYNC_PIPELINE_STAGES);
-   for (uint32_t i = 0U; i < PVR_JOB_TYPE_MAX; i++) {
-      struct vk_sync *per_job_waits[wait_count];
-      uint32_t per_job_waits_count = 0;
+   for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
+      if (queue->next_job_wait_sync[i]) {
+         waits[wait_count++] = (struct vk_sync_wait){
+            .sync = queue->next_job_wait_sync[i],
+            .stage_mask = ~(VkPipelineStageFlags2)0,
+            .wait_value = 0,
+         };
+      }
 
-      /* Get the waits specific to the job type. */
-      for (uint32_t j = 0U; j < wait_count; j++) {
-         if (stage_flags[j] & (1U << i)) {
-            per_job_waits[per_job_waits_count] = waits[j];
-            per_job_waits_count++;
-         }
+      if (queue->last_job_signal_sync[i]) {
+         waits[wait_count++] = (struct vk_sync_wait){
+            .sync = queue->last_job_signal_sync[i],
+            .stage_mask = ~(VkPipelineStageFlags2)0,
+            .wait_value = 0,
+         };
       }
+   }
 
-      if (per_job_waits_count == 0U)
-         continue;
+   result = vk_sync_wait_many(&queue->device->vk,
+                              wait_count,
+                              waits,
+                              VK_SYNC_WAIT_COMPLETE,
+                              UINT64_MAX);
 
-      result = vk_sync_create(&device->vk,
-                              &device->pdevice->ws->syncobj_type,
-                              0U,
-                              0UL,
-                              &completions[i]);
-      if (result != VK_SUCCESS)
-         goto err_destroy_completion_syncs;
+   if (result != VK_SUCCESS)
+      return vk_error(queue, result);
 
-      result = device->ws->ops->null_job_submit(device->ws,
-                                                per_job_waits,
-                                                per_job_waits_count,
-                                                completions[i]);
-      if (result != VK_SUCCESS)
-         goto err_destroy_completion_syncs;
+   for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
+      if (queue->next_job_wait_sync[i]) {
+         vk_sync_destroy(&queue->device->vk, queue->next_job_wait_sync[i]);
+         queue->next_job_wait_sync[i] = NULL;
+      }
+
+      if (queue->last_job_signal_sync[i]) {
+         vk_sync_destroy(&queue->device->vk, queue->last_job_signal_sync[i]);
+         queue->last_job_signal_sync[i] = NULL;
+      }
    }
 
    return VK_SUCCESS;
+}
 
-err_destroy_completion_syncs:
-   for (uint32_t i = 0U; i < PVR_JOB_TYPE_MAX; i++) {
-      if (completions[i]) {
-         vk_sync_destroy(&device->vk, completions[i]);
-         completions[i] = NULL;
+static VkResult pvr_process_queue_signals(struct pvr_queue *queue,
+                                          struct vk_sync_signal *signals,
+                                          uint32_t signal_count)
+{
+   struct vk_sync_wait signal_waits[PVR_JOB_TYPE_MAX];
+   struct pvr_device *device = queue->device;
+   VkResult result;
+
+   for (uint32_t signal_idx = 0; signal_idx < signal_count; signal_idx++) {
+      struct vk_sync_signal *signal = &signals[signal_idx];
+      const enum pvr_pipeline_stage_bits signal_stage_src =
+         pvr_stage_mask_src(signal->stage_mask);
+      uint32_t wait_count = 0;
+
+      for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
+         if (!(signal_stage_src & BITFIELD_BIT(i)) ||
+             !queue->last_job_signal_sync[i])
+            continue;
+
+         signal_waits[wait_count++] = (struct vk_sync_wait){
+            .sync = queue->last_job_signal_sync[i],
+            .stage_mask = ~(VkPipelineStageFlags2)0,
+            .wait_value = 0,
+         };
       }
+
+      result = device->ws->ops->null_job_submit(device->ws,
+                                                signal_waits,
+                                                wait_count,
+                                                signal);
+      if (result != VK_SUCCESS)
+         return result;
    }
 
-   return result;
+   return VK_SUCCESS;
 }
 
-VkResult pvr_QueueSubmit(VkQueue _queue,
-                         uint32_t submitCount,
-                         const VkSubmitInfo *pSubmits,
-                         VkFence fence)
+static VkResult pvr_process_queue_waits(struct pvr_queue *queue,
+                                        struct vk_sync_wait *waits,
+                                        uint32_t wait_count)
 {
-   PVR_FROM_HANDLE(pvr_queue, queue, _queue);
-   struct vk_sync *completion_syncobjs[PVR_JOB_TYPE_MAX] = {};
    struct pvr_device *device = queue->device;
    VkResult result;
 
-   for (uint32_t i = 0U; i < submitCount; i++) {
-      struct vk_sync *per_submit_completion_syncobjs[PVR_JOB_TYPE_MAX] = {};
-      const VkSubmitInfo *desc = &pSubmits[i];
-      struct vk_sync *waits[desc->waitSemaphoreCount];
-      uint32_t stage_flags[desc->waitSemaphoreCount];
-      uint32_t wait_count = 0;
+   STACK_ARRAY(struct vk_sync_wait, stage_waits, wait_count);
+   if (!stage_waits)
+      return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
 
-      for (uint32_t j = 0U; j < desc->waitSemaphoreCount; j++) {
-         VK_FROM_HANDLE(vk_semaphore, semaphore, desc->pWaitSemaphores[j]);
-         struct vk_sync *sync = vk_semaphore_get_active_sync(semaphore);
+   for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
+      struct vk_sync_signal next_job_wait_signal_sync;
+      uint32_t stage_wait_count = 0;
 
-         if (sync->type == &vk_sync_dummy_type)
+      for (uint32_t wait_idx = 0; wait_idx < wait_count; wait_idx++) {
+         if (!(pvr_stage_mask(waits[wait_idx].stage_mask) & BITFIELD_BIT(i)))
             continue;
 
-         /* We don't currently support timeline semaphores. */
-         assert(!(sync->flags & VK_SYNC_IS_TIMELINE));
-
-         stage_flags[wait_count] =
-            pvr_stage_mask_dst(desc->pWaitDstStageMask[j]);
-         waits[wait_count] = vk_semaphore_get_active_sync(semaphore);
-         wait_count++;
+         stage_waits[stage_wait_count++] = (struct vk_sync_wait){
+            .sync = waits[wait_idx].sync,
+            .stage_mask = ~(VkPipelineStageFlags2)0,
+            .wait_value = waits[wait_idx].wait_value,
+         };
       }
 
-      if (desc->commandBufferCount > 0U) {
-         for (uint32_t j = 0U; j < desc->commandBufferCount; j++) {
-            result = pvr_process_cmd_buffer(device,
-                                            queue,
-                                            desc->pCommandBuffers[j],
-                                            queue->job_dependancy,
-                                            waits,
-                                            wait_count,
-                                            stage_flags,
-                                            per_submit_completion_syncobjs,
-                                            completion_syncobjs,
-                                            queue->completion);
-            if (result != VK_SUCCESS)
-               return result;
-         }
-      } else {
-         result = pvr_submit_null_job(device,
-                                      waits,
-                                      wait_count,
-                                      stage_flags,
-                                      per_submit_completion_syncobjs);
-         if (result != VK_SUCCESS)
-            return result;
-      }
+      result = vk_sync_create(&device->vk,
+                              &device->pdevice->ws->syncobj_type,
+                              0U,
+                              0UL,
+                              &queue->next_job_wait_sync[i]);
+      if (result != VK_SUCCESS)
+         goto err_free_waits;
 
-      if (desc->signalSemaphoreCount) {
-         result = pvr_set_semaphore_payloads(device,
-                                             per_submit_completion_syncobjs,
-                                             desc->pSignalSemaphores,
-                                             desc->signalSemaphoreCount);
-         if (result != VK_SUCCESS)
-            return result;
-      }
+      next_job_wait_signal_sync = (struct vk_sync_signal){
+         .sync = queue->next_job_wait_sync[i],
+         .stage_mask = ~(VkPipelineStageFlags2)0,
+         .signal_value = 0,
+      };
 
-      pvr_update_syncobjs(device,
-                          per_submit_completion_syncobjs,
-                          completion_syncobjs);
+      result = device->ws->ops->null_job_submit(device->ws,
+                                                stage_waits,
+                                                stage_wait_count,
+                                                &next_job_wait_signal_sync);
+      if (result != VK_SUCCESS)
+         goto err_free_waits;
    }
 
-   if (fence) {
-      result = pvr_set_fence_payload(device, completion_syncobjs, fence);
+   STACK_ARRAY_FINISH(stage_waits);
+
+   return VK_SUCCESS;
+
+err_free_waits:
+   STACK_ARRAY_FINISH(stage_waits);
+
+   return result;
+}
+
+static VkResult pvr_driver_queue_submit(struct vk_queue *queue,
+                                        struct vk_queue_submit *submit)
+{
+   struct pvr_queue *driver_queue = container_of(queue, struct pvr_queue, vk);
+   struct pvr_device *device = driver_queue->device;
+   VkResult result;
+
+   result = pvr_clear_last_submits_syncs(driver_queue);
+   if (result != VK_SUCCESS)
+      return result;
+
+   result =
+      pvr_process_queue_waits(driver_queue, submit->waits, submit->wait_count);
+   if (result != VK_SUCCESS)
+      return result;
+
+   for (uint32_t i = 0U; i < submit->command_buffer_count; i++) {
+      result = pvr_process_cmd_buffer(
+         device,
+         driver_queue,
+         container_of(submit->command_buffers[i], struct pvr_cmd_buffer, vk));
       if (result != VK_SUCCESS)
          return result;
    }
 
-   pvr_update_syncobjs(device, completion_syncobjs, queue->completion);
+   result = pvr_process_queue_signals(driver_queue,
+                                      submit->signals,
+                                      submit->signal_count);
+   if (result != VK_SUCCESS)
+      return result;
 
    return VK_SUCCESS;
 }
index 2db610a..973794f 100644 (file)
@@ -292,12 +292,7 @@ struct pvr_winsys_transfer_submit_info {
    uint32_t frame_num;
    uint32_t job_num;
 
-   struct vk_sync *barrier;
-
-   /* waits and stage_flags are arrays of length wait_count. */
-   struct vk_sync **waits;
-   uint32_t wait_count;
-   uint32_t *stage_flags;
+   struct vk_sync *wait;
 
    uint32_t cmd_count;
    struct pvr_winsys_transfer_cmd cmds[PVR_TRANSFER_MAX_PREPARES_PER_SUBMIT];
@@ -310,12 +305,7 @@ struct pvr_winsys_compute_submit_info {
    uint32_t frame_num;
    uint32_t job_num;
 
-   struct vk_sync *barrier;
-
-   /* waits and stage_flags are arrays of length wait_count. */
-   struct vk_sync **waits;
-   uint32_t wait_count;
-   uint32_t *stage_flags;
+   struct vk_sync *wait;
 
    /* Firmware stream buffer. This is the maximum possible size taking into
     * consideration all HW features.
@@ -354,14 +344,6 @@ struct pvr_winsys_render_submit_info {
    /* FIXME: should this be flags instead? */
    bool run_frag;
 
-   struct vk_sync *barrier_geom;
-   struct vk_sync *barrier_frag;
-
-   /* waits and stage_flags are arrays of length wait_count. */
-   struct vk_sync **waits;
-   uint32_t wait_count;
-   uint32_t *stage_flags;
-
    struct pvr_winsys_geometry_state {
       /* Firmware stream buffer. This is the maximum possible size taking into
        * consideration all HW features.
@@ -377,6 +359,8 @@ struct pvr_winsys_render_submit_info {
 
       /* Must be 0 or a combination of PVR_WINSYS_GEOM_FLAG_* flags. */
       uint32_t flags;
+
+      struct vk_sync *wait;
    } geometry;
 
    struct pvr_winsys_fragment_state {
@@ -394,6 +378,8 @@ struct pvr_winsys_render_submit_info {
 
       /* Must be 0 or a combination of PVR_WINSYS_FRAG_FLAG_* flags. */
       uint32_t flags;
+
+      struct vk_sync *wait;
    } fragment;
 };
 
@@ -485,9 +471,9 @@ struct pvr_winsys_ops {
       struct vk_sync *signal_sync);
 
    VkResult (*null_job_submit)(struct pvr_winsys *ws,
-                               struct vk_sync **waits,
+                               struct vk_sync_wait *waits,
                                uint32_t wait_count,
-                               struct vk_sync *signal_sync);
+                               struct vk_sync_signal *signal_sync);
 };
 
 struct pvr_winsys {
index 562dbce..56d3aa8 100644 (file)
@@ -33,6 +33,7 @@
 #include "pvr_srv.h"
 #include "pvr_srv_bo.h"
 #include "pvr_srv_bridge.h"
+#include "pvr_srv_job_common.h"
 #include "pvr_srv_job_compute.h"
 #include "pvr_srv_job_render.h"
 #include "pvr_srv_job_transfer.h"
@@ -46,6 +47,7 @@
 #include "util/macros.h"
 #include "util/os_misc.h"
 #include "vk_log.h"
+#include "vk_sync.h"
 
 /* Amount of space used to hold sync prim values (in bytes). */
 #define PVR_SRV_SYNC_PRIM_VALUE_SIZE 4U
@@ -392,6 +394,11 @@ static void pvr_srv_winsys_destroy(struct pvr_winsys *ws)
    struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws);
    int fd = srv_ws->render_fd;
 
+   if (srv_ws->presignaled_sync) {
+      vk_sync_destroy(&srv_ws->presignaled_sync_device->vk,
+                      &srv_ws->presignaled_sync->base);
+   }
+
    pvr_srv_sync_prim_block_finish(srv_ws);
    pvr_srv_memctx_finish(srv_ws);
    vk_free(srv_ws->alloc, srv_ws);
@@ -741,3 +748,83 @@ void pvr_srv_sync_prim_free(struct pvr_srv_sync_prim *sync_prim)
       vk_free(srv_ws->alloc, sync_prim);
    }
 }
+
+static VkResult pvr_srv_create_presignaled_sync(struct pvr_device *device,
+                                                struct pvr_srv_sync **out_sync)
+{
+   struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(device->ws);
+   struct vk_sync *sync;
+
+   int timeline_fd;
+   int sync_fd;
+
+   VkResult result;
+
+   result = pvr_srv_create_timeline(srv_ws->render_fd, &timeline_fd);
+   if (result != VK_SUCCESS)
+      return result;
+
+   result = pvr_srv_set_timeline_sw_only(timeline_fd);
+   if (result != VK_SUCCESS)
+      goto err_close_timeline;
+
+   result = pvr_srv_create_sw_fence(timeline_fd, &sync_fd, NULL);
+   if (result != VK_SUCCESS)
+      goto err_close_timeline;
+
+   result = pvr_srv_sw_sync_timeline_increment(timeline_fd, NULL);
+   if (result != VK_SUCCESS)
+      goto err_close_sw_fence;
+
+   result = vk_sync_create(&device->vk,
+                           &device->pdevice->ws->syncobj_type,
+                           0U,
+                           0UL,
+                           &sync);
+   if (result != VK_SUCCESS)
+      goto err_close_sw_fence;
+
+   result = vk_sync_import_sync_file(&device->vk, sync, sync_fd);
+   if (result != VK_SUCCESS)
+      goto err_destroy_sync;
+
+   *out_sync = to_srv_sync(sync);
+   (*out_sync)->signaled = true;
+
+   close(timeline_fd);
+
+   return VK_SUCCESS;
+
+err_destroy_sync:
+   vk_sync_destroy(&device->vk, sync);
+
+err_close_sw_fence:
+   close(sync_fd);
+
+err_close_timeline:
+   close(timeline_fd);
+
+   return result;
+}
+
+VkResult pvr_srv_sync_get_presignaled_sync(struct pvr_device *device,
+                                           struct pvr_srv_sync **out_sync)
+{
+   struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(device->ws);
+   VkResult result;
+
+   if (!srv_ws->presignaled_sync) {
+      result =
+         pvr_srv_create_presignaled_sync(device, &srv_ws->presignaled_sync);
+      if (result != VK_SUCCESS)
+         return result;
+
+      srv_ws->presignaled_sync_device = device;
+   }
+
+   assert(device == srv_ws->presignaled_sync_device);
+
+   *out_sync = srv_ws->presignaled_sync;
+
+   return VK_SUCCESS;
+}
index 2f9aae8..f6127d6 100644 (file)
@@ -28,6 +28,7 @@
 #include <pthread.h>
 #include <vulkan/vulkan.h>
 
+#include "pvr_srv_sync.h"
 #include "pvr_winsys.h"
 #include "util/macros.h"
 #include "util/vma.h"
@@ -71,6 +72,9 @@ struct pvr_srv_winsys {
    int master_fd;
    int render_fd;
 
+   struct pvr_device *presignaled_sync_device;
+   struct pvr_srv_sync *presignaled_sync;
+
    const VkAllocationCallbacks *alloc;
 
    /* Packed bvnc */
@@ -130,4 +134,7 @@ pvr_srv_sync_prim_get_fw_addr(const struct pvr_srv_sync_prim *const sync_prim)
    return sync_prim->srv_ws->sync_block_fw_addr + sync_prim->offset;
 }
 
+VkResult pvr_srv_sync_get_presignaled_sync(struct pvr_device *device,
+                                           struct pvr_srv_sync **out_sync);
+
 #endif /* PVR_SRV_H */
index 3b0e48a..9419ebe 100644 (file)
@@ -87,6 +87,76 @@ VkResult pvr_srv_init_module(int fd, enum pvr_srvkm_module_type module)
    return VK_SUCCESS;
 }
 
+VkResult pvr_srv_set_timeline_sw_only(int sw_timeline_fd)
+{
+   int ret;
+
+   assert(sw_timeline_fd >= 0);
+
+   ret = drmIoctl(sw_timeline_fd, DRM_IOCTL_SRVKM_SYNC_FORCE_SW_ONLY_CMD, NULL);
+
+   if (unlikely(ret < 0)) {
+      return vk_errorf(
+         NULL,
+         VK_ERROR_OUT_OF_HOST_MEMORY,
+         "DRM_IOCTL_SRVKM_SYNC_FORCE_SW_ONLY_CMD failed, Errno: %s",
+         strerror(errno));
+   }
+
+   return VK_SUCCESS;
+}
+
+VkResult pvr_srv_create_sw_fence(int sw_timeline_fd,
+                                 int *new_fence_fd,
+                                 uint64_t *sync_pt_idx)
+{
+   struct drm_srvkm_sw_sync_create_fence_data data = { .name[0] = '\0' };
+   int ret;
+
+   assert(sw_timeline_fd >= 0);
+   assert(new_fence_fd != NULL);
+
+   ret =
+      drmIoctl(sw_timeline_fd, DRM_IOCTL_SRVKM_SW_SYNC_CREATE_FENCE_CMD, &data);
+
+   if (unlikely(ret < 0)) {
+      return vk_errorf(
+         NULL,
+         VK_ERROR_OUT_OF_HOST_MEMORY,
+         "DRM_IOCTL_SRVKM_SW_SYNC_CREATE_FENCE_CMD failed, Errno: %s",
+         strerror(errno));
+   }
+
+   *new_fence_fd = data.fence;
+   if (sync_pt_idx)
+      *sync_pt_idx = data.sync_pt_idx;
+
+   return VK_SUCCESS;
+}
+
+VkResult pvr_srv_sw_sync_timeline_increment(int sw_timeline_fd,
+                                            uint64_t *sync_pt_idx)
+{
+   struct drm_srvkm_sw_timeline_advance_data data = { 0 };
+   int ret;
+
+   assert(sw_timeline_fd >= 0);
+
+   ret = drmIoctl(sw_timeline_fd, DRM_IOCTL_SRVKM_SW_SYNC_INC_CMD, &data);
+
+   if (unlikely(ret < 0)) {
+      return vk_errorf(NULL,
+                       VK_ERROR_OUT_OF_HOST_MEMORY,
+                       "DRM_IOCTL_SRVKM_SW_SYNC_INC_CMD failed, Errno: %s",
+                       strerror(errno));
+   }
+
+   if (sync_pt_idx)
+      *sync_pt_idx = data.sync_pt_idx;
+
+   return VK_SUCCESS;
+}
+
 VkResult pvr_srv_connection_create(int fd, uint64_t *const bvnc_out)
 {
    struct pvr_srv_bridge_connect_cmd cmd = {
index 78c3606..1254fb7 100644 (file)
  * These defines must be prefixed with "DRM_".
  */
 #define DRM_SRVKM_CMD 0U /* PVR Services command. */
+
+/* PVR Sync commands */
+#define DRM_SRVKM_SYNC_FORCE_SW_ONLY_CMD 2U
+
+/* PVR Software Sync commands */
+#define DRM_SRVKM_SW_SYNC_CREATE_FENCE_CMD 3U
+#define DRM_SRVKM_SW_SYNC_INC_CMD 4U
+
+/* PVR Services Render Device Init command */
 #define DRM_SRVKM_INIT 5U /* PVR Services Render Device Init command. */
 
 /* These defines must be prefixed with "DRM_IOCTL_". */
 #define DRM_IOCTL_SRVKM_CMD \
    DRM_IOWR(DRM_COMMAND_BASE + DRM_SRVKM_CMD, struct drm_srvkm_cmd)
+#define DRM_IOCTL_SRVKM_SYNC_FORCE_SW_ONLY_CMD \
+   DRM_IO(DRM_COMMAND_BASE + DRM_SRVKM_SYNC_FORCE_SW_ONLY_CMD)
+#define DRM_IOCTL_SRVKM_SW_SYNC_CREATE_FENCE_CMD                   \
+   DRM_IOWR(DRM_COMMAND_BASE + DRM_SRVKM_SW_SYNC_CREATE_FENCE_CMD, \
+            struct drm_srvkm_sw_sync_create_fence_data)
+#define DRM_IOCTL_SRVKM_SW_SYNC_INC_CMD                  \
+   DRM_IOR(DRM_COMMAND_BASE + DRM_SRVKM_SW_SYNC_INC_CMD, \
+           struct drm_srvkm_sw_timeline_advance_data)
 #define DRM_IOCTL_SRVKM_INIT \
    DRM_IOWR(DRM_COMMAND_BASE + DRM_SRVKM_INIT, struct drm_srvkm_init_data)
 
@@ -815,6 +832,17 @@ struct drm_srvkm_init_data {
    uint32_t init_module;
 };
 
+struct drm_srvkm_sw_sync_create_fence_data {
+   char name[32];
+   __s32 fence;
+   __u32 pad;
+   __u64 sync_pt_idx;
+};
+
+struct drm_srvkm_sw_timeline_advance_data {
+   __u64 sync_pt_idx;
+};
+
 /******************************************************************************
    DRM helper enum
  ******************************************************************************/
@@ -830,6 +858,15 @@ enum pvr_srvkm_module_type {
 
 VkResult pvr_srv_init_module(int fd, enum pvr_srvkm_module_type module);
 
+VkResult pvr_srv_set_timeline_sw_only(int sw_timeline_fd);
+
+VkResult pvr_srv_create_sw_fence(int sw_timeline_fd,
+                                 int *new_fence_fd,
+                                 uint64_t *sync_pt_idx);
+
+VkResult pvr_srv_sw_sync_timeline_increment(int sw_timeline_fd,
+                                            uint64_t *sync_pt_idx);
+
 /******************************************************************************
    Bridge function prototypes
  ******************************************************************************/
index 5208cd8..e90fc8c 100644 (file)
  * SOFTWARE.
  */
 
+#include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
 #include <stddef.h>
 #include <stdint.h>
+#include <string.h>
 #include <unistd.h>
 #include <vulkan/vulkan.h>
 
@@ -38,7 +40,6 @@
 #include "pvr_srv_job_compute.h"
 #include "pvr_srv_sync.h"
 #include "pvr_winsys.h"
-#include "util/libsync.h"
 #include "util/macros.h"
 #include "vk_alloc.h"
 #include "vk_log.h"
@@ -242,34 +243,16 @@ VkResult pvr_srv_winsys_compute_submit(
 
    pvr_srv_compute_cmd_init(submit_info, &compute_cmd, dev_info);
 
-   for (uint32_t i = 0U; i < submit_info->wait_count; i++) {
-      struct pvr_srv_sync *srv_wait_sync = to_srv_sync(submit_info->waits[i]);
-      int ret;
-
-      if (!submit_info->waits[i] || srv_wait_sync->fd < 0)
-         continue;
-
-      if (submit_info->stage_flags[i] & PVR_PIPELINE_STAGE_COMPUTE_BIT) {
-         ret = sync_accumulate("", &in_fd, srv_wait_sync->fd);
-         if (ret) {
-            result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
-            goto end_close_in_fd;
-         }
-
-         submit_info->stage_flags[i] &= ~PVR_PIPELINE_STAGE_COMPUTE_BIT;
-      }
-   }
-
-   if (submit_info->barrier) {
-      struct pvr_srv_sync *srv_wait_sync = to_srv_sync(submit_info->barrier);
+   if (submit_info->wait) {
+      struct pvr_srv_sync *srv_wait_sync = to_srv_sync(submit_info->wait);
 
       if (srv_wait_sync->fd >= 0) {
-         int ret;
-
-         ret = sync_accumulate("", &in_fd, srv_wait_sync->fd);
-         if (ret) {
-            result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
-            goto end_close_in_fd;
+         in_fd = dup(srv_wait_sync->fd);
+         if (in_fd == -1) {
+            return vk_errorf(NULL,
+                             VK_ERROR_OUT_OF_HOST_MEMORY,
+                             "dup called on wait sync failed, Errno: %s",
+                             strerror(errno));
          }
       }
    }
index d6d040d..cbd0797 100644 (file)
 #include "vk_sync.h"
 
 VkResult pvr_srv_winsys_null_job_submit(struct pvr_winsys *ws,
-                                        struct vk_sync **waits,
+                                        struct vk_sync_wait *waits,
                                         uint32_t wait_count,
-                                        struct vk_sync *signal_sync)
+                                        struct vk_sync_signal *signal)
 {
-   struct pvr_srv_sync *srv_signal_sync = to_srv_sync(signal_sync);
    int fd = -1;
 
-   assert(signal_sync);
+   /* Services doesn't support timeline syncs.
+    * Timeline syncs should be emulated by the Vulkan runtime and converted
+    * to binary syncs before this point.
+    */
+   assert((signal->signal_value == 0) &&
+          !(signal->sync->flags & VK_SYNC_IS_TIMELINE));
 
-   for (uint32_t i = 0; i < wait_count; i++) {
-      struct pvr_srv_sync *srv_wait_sync = to_srv_sync(waits[i]);
+   for (uint32_t wait_idx = 0; wait_idx < wait_count; wait_idx++) {
+      struct pvr_srv_sync *srv_wait_sync = to_srv_sync(waits[wait_idx].sync);
       int ret;
 
-      if (!waits[i] || srv_wait_sync->fd < 0)
+      if (srv_wait_sync->fd < 0)
          continue;
 
+      assert((waits[wait_idx].wait_value == 0) &&
+             !(waits[wait_idx].sync->flags & VK_SYNC_IS_TIMELINE));
+
       ret = sync_accumulate("", &fd, srv_wait_sync->fd);
       if (ret) {
          if (fd >= 0)
@@ -60,7 +67,7 @@ VkResult pvr_srv_winsys_null_job_submit(struct pvr_winsys *ws,
       }
    }
 
-   pvr_srv_set_sync_payload(srv_signal_sync, fd);
+   pvr_srv_set_sync_payload(to_srv_sync(signal->sync), fd);
 
    return VK_SUCCESS;
 }
index 294de7d..2f0c41e 100644 (file)
 #include <vulkan/vulkan.h>
 
 struct pvr_winsys;
-struct vk_sync;
+struct vk_sync_wait;
+struct vk_sync_signal;
 
 VkResult pvr_srv_winsys_null_job_submit(struct pvr_winsys *ws,
-                                        struct vk_sync **waits,
+                                        struct vk_sync_wait *waits,
                                         uint32_t wait_count,
-                                        struct vk_sync *signal_sync);
+                                        struct vk_sync_signal *signal_sync);
 
 #endif /* PVR_SRV_JOB_NULL_H */
index 9191da1..95b1e72 100644 (file)
@@ -22,6 +22,7 @@
  */
 
 #include <assert.h>
+#include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
 #include <stdbool.h>
@@ -42,7 +43,6 @@
 #include "pvr_srv_sync.h"
 #include "pvr_types.h"
 #include "pvr_winsys.h"
-#include "util/libsync.h"
 #include "util/log.h"
 #include "util/macros.h"
 #include "vk_alloc.h"
@@ -700,60 +700,32 @@ VkResult pvr_srv_winsys_render_submit(
    pvr_srv_geometry_cmd_init(submit_info, sync_prim, &geom_cmd, dev_info);
    pvr_srv_fragment_cmd_init(submit_info, &frag_cmd, dev_info);
 
-   for (uint32_t i = 0U; i < submit_info->wait_count; i++) {
-      struct pvr_srv_sync *srv_wait_sync = to_srv_sync(submit_info->waits[i]);
-      int ret;
-
-      if (!submit_info->waits[i] || srv_wait_sync->fd < 0)
-         continue;
-
-      if (submit_info->stage_flags[i] & PVR_PIPELINE_STAGE_GEOM_BIT) {
-         ret = sync_accumulate("", &in_geom_fd, srv_wait_sync->fd);
-         if (ret) {
-            result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
-            goto end_close_in_fds;
-         }
-
-         submit_info->stage_flags[i] &= ~PVR_PIPELINE_STAGE_GEOM_BIT;
-      }
-
-      if (submit_info->stage_flags[i] & PVR_PIPELINE_STAGE_FRAG_BIT) {
-         ret = sync_accumulate("", &in_frag_fd, srv_wait_sync->fd);
-         if (ret) {
-            result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
-            goto end_close_in_fds;
-         }
-
-         submit_info->stage_flags[i] &= ~PVR_PIPELINE_STAGE_FRAG_BIT;
-      }
-   }
-
-   if (submit_info->barrier_geom) {
+   if (submit_info->geometry.wait) {
       struct pvr_srv_sync *srv_wait_sync =
-         to_srv_sync(submit_info->barrier_geom);
+         to_srv_sync(submit_info->geometry.wait);
 
       if (srv_wait_sync->fd >= 0) {
-         int ret;
-
-         ret = sync_accumulate("", &in_geom_fd, srv_wait_sync->fd);
-         if (ret) {
-            result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
-            goto end_close_in_fds;
+         in_geom_fd = dup(srv_wait_sync->fd);
+         if (in_geom_fd == -1) {
+            return vk_errorf(NULL,
+                             VK_ERROR_OUT_OF_HOST_MEMORY,
+                             "dup called on wait sync failed, Errno: %s",
+                             strerror(errno));
          }
       }
    }
 
-   if (submit_info->barrier_frag) {
+   if (submit_info->fragment.wait) {
       struct pvr_srv_sync *srv_wait_sync =
-         to_srv_sync(submit_info->barrier_frag);
+         to_srv_sync(submit_info->fragment.wait);
 
       if (srv_wait_sync->fd >= 0) {
-         int ret;
-
-         ret = sync_accumulate("", &in_frag_fd, srv_wait_sync->fd);
-         if (ret) {
-            result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
-            goto end_close_in_fds;
+         in_frag_fd = dup(srv_wait_sync->fd);
+         if (in_frag_fd == -1) {
+            return vk_errorf(NULL,
+                             VK_ERROR_OUT_OF_HOST_MEMORY,
+                             "dup called on wait sync failed, Errno: %s",
+                             strerror(errno));
          }
       }
    }
index b1e80a0..3e925f3 100644 (file)
  * SOFTWARE.
  */
 
+#include <errno.h>
 #include <fcntl.h>
 #include <stddef.h>
 #include <stdint.h>
+#include <string.h>
 #include <unistd.h>
 #include <vulkan/vulkan.h>
 
@@ -37,7 +39,6 @@
 #include "pvr_srv_job_transfer.h"
 #include "pvr_srv_sync.h"
 #include "pvr_winsys.h"
-#include "util/libsync.h"
 #include "util/macros.h"
 #include "vk_alloc.h"
 #include "vk_log.h"
@@ -271,34 +272,16 @@ VkResult pvr_srv_winsys_transfer_submit(
       cmds_ptr_arr[i] = &transfer_cmds[i];
    }
 
-   for (uint32_t i = 0U; i < submit_info->wait_count; i++) {
-      struct pvr_srv_sync *srv_wait_sync = to_srv_sync(submit_info->waits[i]);
-      int ret;
-
-      if (!submit_info->waits[i] || srv_wait_sync->fd < 0)
-         continue;
-
-      if (submit_info->stage_flags[i] & PVR_PIPELINE_STAGE_TRANSFER_BIT) {
-         ret = sync_accumulate("", &in_fd, srv_wait_sync->fd);
-         if (ret) {
-            result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
-            goto end_close_in_fd;
-         }
-
-         submit_info->stage_flags[i] &= ~PVR_PIPELINE_STAGE_TRANSFER_BIT;
-      }
-   }
-
-   if (submit_info->barrier) {
-      struct pvr_srv_sync *srv_wait_sync = to_srv_sync(submit_info->barrier);
+   if (submit_info->wait) {
+      struct pvr_srv_sync *srv_wait_sync = to_srv_sync(submit_info->wait);
 
       if (srv_wait_sync->fd >= 0) {
-         int ret;
-
-         ret = sync_accumulate("", &in_fd, srv_wait_sync->fd);
-         if (ret) {
-            result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
-            goto end_close_in_fd;
+         in_fd = dup(srv_wait_sync->fd);
+         if (in_fd == -1) {
+            return vk_errorf(NULL,
+                             VK_ERROR_OUT_OF_HOST_MEMORY,
+                             "dup called on wait sync failed, Errno: %s",
+                             strerror(errno));
          }
       }
    }
index b33c0eb..c012d91 100644 (file)
@@ -287,13 +287,20 @@ static VkResult pvr_srv_sync_export_sync_file(struct vk_device *device,
                                               int *sync_file)
 {
    struct pvr_srv_sync *srv_sync = to_srv_sync(sync);
+   VkResult result;
    int fd;
 
    if (srv_sync->fd < 0) {
-      *sync_file = -1;
-      return VK_SUCCESS;
+      struct pvr_device *driver_device =
+         container_of(device, struct pvr_device, vk);
+
+      result = pvr_srv_sync_get_presignaled_sync(driver_device, &srv_sync);
+      if (result != VK_SUCCESS)
+         return result;
    }
 
+   assert(srv_sync->fd >= 0);
+
    fd = dup(srv_sync->fd);
    if (fd < 0)
       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);