}
if (barrier_mask & bit) {
- job->serialize = true;
+ job->serialize = *src_mask;
*src_mask = 0;
cmd_buffer->state.barrier.dst_mask &= ~bit;
}
return;
if (pending_barrier.dst_mask) {
- job->serialize = true;
+ /* FIXME: do the same we do for primaries and only choose the
+ * relevant src masks.
+ */
+ job->serialize = pending_barrier.src_mask_graphics |
+ pending_barrier.src_mask_transfer |
+ pending_barrier.src_mask_compute;
if (pending_barrier.bcl_buffer_access ||
pending_barrier.bcl_image_access) {
job->needs_bcl_sync = true;
*/
bool always_flush;
- /* Whether we need to serialize this job in our command stream */
- bool serialize;
+ /* A mask of V3DV_BARRIER_* indicating the source(s) of the barrier. We
+ * can use this to select the hw queues where we need to serialize the job.
+ */
+ uint8_t serialize;
/* If this is a CL job, whether we should sync before binning */
bool needs_bcl_sync;
V3DV_BARRIER_COMPUTE_BIT = (1 << 1),
V3DV_BARRIER_TRANSFER_BIT = (1 << 2),
};
+#define V3DV_BARRIER_ALL (V3DV_BARRIER_GRAPHICS_BIT | \
+ V3DV_BARRIER_TRANSFER_BIT | \
+ V3DV_BARRIER_COMPUTE_BIT);
struct v3dv_barrier_state {
/* Mask of V3DV_BARRIER_* indicating where we consume a barrier. */
if (queue->last_job_syncs.first[queue_sync])
n_syncs = sync_info->wait_count;
- /* If the serialize flag is set, this job waits for completion of all GPU
- * jobs submitted in any queue V3DV_QUEUE_(CL/TFU/CSD) before running.
+ /* If the serialize flag is set the job needs to be serialized in the
+ * corresponding queues. Notice that we may implement transfer operations
+ * as both CL or TFU jobs.
+ *
+ * FIXME: maybe we could track more precisely if the source of a transfer
+ * barrier is a CL and/or a TFU job.
*/
- *count = n_syncs + (job->serialize ? 3 : 0);
+ bool sync_csd = job->serialize & V3DV_BARRIER_COMPUTE_BIT;
+ bool sync_tfu = job->serialize & V3DV_BARRIER_TRANSFER_BIT;
+ bool sync_cl = job->serialize & (V3DV_BARRIER_GRAPHICS_BIT |
+ V3DV_BARRIER_TRANSFER_BIT);
+ *count = n_syncs;
+ if (sync_cl)
+ (*count)++;
+ if (sync_tfu)
+ (*count)++;
+ if (sync_csd)
+ (*count)++;
if (!*count)
return NULL;
vk_sync_as_drm_syncobj(sync_info->waits[i].sync)->syncobj;
}
- if (job->serialize) {
- for (int i = 0; i < 3; i++)
- syncs[n_syncs + i].handle = queue->last_job_syncs.syncs[i];
- }
+ if (sync_cl)
+ syncs[n_syncs++].handle = queue->last_job_syncs.syncs[V3DV_QUEUE_CL];
+
+ if (sync_csd)
+ syncs[n_syncs++].handle = queue->last_job_syncs.syncs[V3DV_QUEUE_CSD];
+
+ if (sync_tfu)
+ syncs[n_syncs++].handle = queue->last_job_syncs.syncs[V3DV_QUEUE_TFU];
+ assert(n_syncs == *count);
return syncs;
}
* order requirements, which basically require that signal operations occur
* in submission order.
*/
- queue->noop_job->serialize = true;
+ queue->noop_job->serialize = V3DV_BARRIER_ALL;
return VK_SUCCESS;
}
if (!job)
return NULL;
- job->serialize = true;
+ /* FIXME: we can do better than all barriers */
+ job->serialize = V3DV_BARRIER_ALL;
job->needs_bcl_sync = is_bcl_barrier;
return job;
}
v3dv_cmd_buffer_finish_job(primary);
v3dv_job_clone_in_cmd_buffer(secondary_job, primary);
if (pending_barrier.dst_mask) {
- secondary_job->serialize = true;
+ /* FIXME: do the same we do for primaries and only choose the
+ * relevant src masks.
+ */
+ secondary_job->serialize = pending_barrier.src_mask_graphics |
+ pending_barrier.src_mask_transfer |
+ pending_barrier.src_mask_compute;
if (pending_barrier.bcl_buffer_access ||
pending_barrier.bcl_image_access) {
secondary_job->needs_bcl_sync = true;