v3dv: limit sync for barriers to hw queues selected by source mask

author Iago Toral Quiroga <itoral@igalia.com>

Mon, 30 May 2022 07:12:22 +0000 (09:12 +0200)

committer Marge Bot <emma+marge@anholt.net>

Tue, 31 May 2022 06:06:10 +0000 (06:06 +0000)
author Iago Toral Quiroga <itoral@igalia.com>
Mon, 30 May 2022 07:12:22 +0000 (09:12 +0200)
committer Marge Bot <emma+marge@anholt.net>
Tue, 31 May 2022 06:06:10 +0000 (06:06 +0000)
diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c

index 6b245d3..485e5b9 100644 (file)
--- a/src/broadcom/vulkan/v3dv_cmd_buffer.c
+++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c
@@ -707,7 +707,7 @@ cmd_buffer_serialize_job_if_needed(struct v3dv_cmd_buffer *cmd_buffer,
     }
  
     if (barrier_mask & bit) {
-      job->serialize = true;
+      job->serialize = *src_mask;
        *src_mask = 0;
        cmd_buffer->state.barrier.dst_mask &= ~bit;
     }
@@ -1714,7 +1714,12 @@ cmd_buffer_execute_outside_pass(struct v3dv_cmd_buffer *primary,
              return;
  
           if (pending_barrier.dst_mask) {
-            job->serialize = true;
+            /* FIXME: do the same we do for primaries and only choose the
+             * relevant src masks.
+             */
+            job->serialize = pending_barrier.src_mask_graphics |
+                             pending_barrier.src_mask_transfer |
+                             pending_barrier.src_mask_compute;
              if (pending_barrier.bcl_buffer_access ||
                  pending_barrier.bcl_image_access) {
                 job->needs_bcl_sync = true;
diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h

index bfde111..a191e9e 100644 (file)
--- a/src/broadcom/vulkan/v3dv_private.h
+++ b/src/broadcom/vulkan/v3dv_private.h
@@ -1094,8 +1094,10 @@ struct v3dv_job {
      */
     bool always_flush;
  
-   /* Whether we need to serialize this job in our command stream */
-   bool serialize;
+   /* A mask of V3DV_BARRIER_* indicating the source(s) of the barrier. We
+    * can use this to select the hw queues where we need to serialize the job.
+    */
+   uint8_t serialize;
  
     /* If this is a CL job, whether we should sync before binning */
     bool needs_bcl_sync;
@@ -1196,6 +1198,9 @@ enum {
     V3DV_BARRIER_COMPUTE_BIT  = (1 << 1),
     V3DV_BARRIER_TRANSFER_BIT = (1 << 2),
  };
+#define V3DV_BARRIER_ALL (V3DV_BARRIER_GRAPHICS_BIT | \
+                          V3DV_BARRIER_TRANSFER_BIT | \
+                          V3DV_BARRIER_COMPUTE_BIT);
  
  struct v3dv_barrier_state {
     /* Mask of V3DV_BARRIER_* indicating where we consume a barrier. */
diff --git a/src/broadcom/vulkan/v3dv_queue.c b/src/broadcom/vulkan/v3dv_queue.c

index eb78bef..83ea992 100644 (file)
--- a/src/broadcom/vulkan/v3dv_queue.c
+++ b/src/broadcom/vulkan/v3dv_queue.c
@@ -488,10 +488,24 @@ set_in_syncs(struct v3dv_queue *queue,
     if (queue->last_job_syncs.first[queue_sync])
        n_syncs = sync_info->wait_count;
  
-   /* If the serialize flag is set, this job waits for completion of all GPU
-    * jobs submitted in any queue V3DV_QUEUE_(CL/TFU/CSD) before running.
+   /* If the serialize flag is set the job needs to be serialized in the
+    * corresponding queues. Notice that we may implement transfer operations
+    * as both CL or TFU jobs.
+    *
+    * FIXME: maybe we could track more precisely if the source of a transfer
+    * barrier is a CL and/or a TFU job.
      */
-   *count = n_syncs + (job->serialize ? 3 : 0);
+   bool sync_csd  = job->serialize & V3DV_BARRIER_COMPUTE_BIT;
+   bool sync_tfu  = job->serialize & V3DV_BARRIER_TRANSFER_BIT;
+   bool sync_cl   = job->serialize & (V3DV_BARRIER_GRAPHICS_BIT |
+                                      V3DV_BARRIER_TRANSFER_BIT);
+   *count = n_syncs;
+   if (sync_cl)
+      (*count)++;
+   if (sync_tfu)
+      (*count)++;
+   if (sync_csd)
+      (*count)++;
  
     if (!*count)
        return NULL;
@@ -508,11 +522,16 @@ set_in_syncs(struct v3dv_queue *queue,
           vk_sync_as_drm_syncobj(sync_info->waits[i].sync)->syncobj;
     }
  
-   if (job->serialize) {
-      for (int i = 0; i < 3; i++)
-         syncs[n_syncs + i].handle = queue->last_job_syncs.syncs[i];
-   }
+   if (sync_cl)
+      syncs[n_syncs++].handle = queue->last_job_syncs.syncs[V3DV_QUEUE_CL];
+
+   if (sync_csd)
+      syncs[n_syncs++].handle = queue->last_job_syncs.syncs[V3DV_QUEUE_CSD];
+
+   if (sync_tfu)
+      syncs[n_syncs++].handle = queue->last_job_syncs.syncs[V3DV_QUEUE_TFU];
  
+   assert(n_syncs == *count);
     return syncs;
  }
  
@@ -887,7 +906,7 @@ queue_create_noop_job(struct v3dv_queue *queue)
      * order requirements, which basically require that signal operations occur
      * in submission order.
      */
-   queue->noop_job->serialize = true;
+   queue->noop_job->serialize = V3DV_BARRIER_ALL;
  
     return VK_SUCCESS;
  }
diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c

index 933210c..21bf3c4 100644 (file)
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
@@ -1578,7 +1578,8 @@ cmd_buffer_subpass_split_for_barrier(struct v3dv_cmd_buffer *cmd_buffer,
     if (!job)
        return NULL;
  
-   job->serialize = true;
+   /* FIXME: we can do better than all barriers */
+   job->serialize = V3DV_BARRIER_ALL;
     job->needs_bcl_sync = is_bcl_barrier;
     return job;
  }
@@ -1711,7 +1712,12 @@ v3dX(cmd_buffer_execute_inside_pass)(struct v3dv_cmd_buffer *primary,
              v3dv_cmd_buffer_finish_job(primary);
              v3dv_job_clone_in_cmd_buffer(secondary_job, primary);
              if (pending_barrier.dst_mask) {
-               secondary_job->serialize = true;
+               /* FIXME: do the same we do for primaries and only choose the
+                * relevant src masks.
+                */
+               secondary_job->serialize = pending_barrier.src_mask_graphics |
+                                          pending_barrier.src_mask_transfer |
+                                          pending_barrier.src_mask_compute;
                 if (pending_barrier.bcl_buffer_access ||
                     pending_barrier.bcl_image_access) {
                    secondary_job->needs_bcl_sync = true;
author	Iago Toral Quiroga <itoral@igalia.com>
	Mon, 30 May 2022 07:12:22 +0000 (09:12 +0200)
committer	Marge Bot <emma+marge@anholt.net>
	Tue, 31 May 2022 06:06:10 +0000 (06:06 +0000)
src/broadcom/vulkan/v3dv_cmd_buffer.c		patch \| blob \| history
src/broadcom/vulkan/v3dv_private.h		patch \| blob \| history
src/broadcom/vulkan/v3dv_queue.c		patch \| blob \| history
src/broadcom/vulkan/v3dvx_cmd_buffer.c		patch \| blob \| history