}
}
+static void si_improve_sync_flags(struct si_context *sctx, struct pipe_resource *dst,
+ struct pipe_resource *src, unsigned *flags)
+{
+ if (dst->target != PIPE_BUFFER || (src && src->target != PIPE_BUFFER))
+ return;
+
+ const unsigned cs_mask = SI_BIND_CONSTANT_BUFFER(PIPE_SHADER_COMPUTE) |
+ SI_BIND_SHADER_BUFFER(PIPE_SHADER_COMPUTE) |
+ SI_BIND_IMAGE_BUFFER(PIPE_SHADER_COMPUTE) |
+ SI_BIND_SAMPLER_BUFFER(PIPE_SHADER_COMPUTE);
+
+ const unsigned ps_mask = SI_BIND_CONSTANT_BUFFER(PIPE_SHADER_FRAGMENT) |
+ SI_BIND_SHADER_BUFFER(PIPE_SHADER_FRAGMENT) |
+ SI_BIND_IMAGE_BUFFER(PIPE_SHADER_FRAGMENT) |
+ SI_BIND_SAMPLER_BUFFER(PIPE_SHADER_FRAGMENT);
+
+ unsigned bind_history = si_resource(dst)->bind_history |
+ (src ? si_resource(src)->bind_history : 0);
+
+ /* Clear SI_OP_SYNC_CS_BEFORE if the buffer has never been used with a CS. */
+ if (*flags & SI_OP_SYNC_CS_BEFORE && !(bind_history & cs_mask))
+ *flags &= ~SI_OP_SYNC_CS_BEFORE;
+
+ /* Clear SI_OP_SYNC_PS_BEFORE if the buffer has never been used with a PS. */
+ if (*flags & SI_OP_SYNC_PS_BEFORE && !(bind_history & ps_mask)) {
+ *flags &= ~SI_OP_SYNC_PS_BEFORE;
+ *flags |= SI_OP_SYNC_GE_BEFORE;
+ }
+}
+
void si_launch_grid_internal(struct si_context *sctx, struct pipe_grid_info *info,
void *shader, unsigned flags)
{
-
/* Wait for previous shaders to finish. */
+ if (flags & SI_OP_SYNC_GE_BEFORE)
+ sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH;
+
if (flags & SI_OP_SYNC_PS_BEFORE)
sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
if (!size)
return;
+ si_improve_sync_flags(sctx, dst, NULL, &flags);
+
ASSERTED unsigned clear_alignment = MIN2(clear_value_size, 4);
assert(clear_value_size != 3 && clear_value_size != 6); /* 12 is allowed. */
enum si_cache_policy cache_policy = get_cache_policy(sctx, coher, size);
uint64_t compute_min_size = 8 * 1024;
+ si_improve_sync_flags(sctx, dst, src, &flags);
+
/* Only use compute for VRAM copies on dGPUs. */
if (sctx->screen->info.has_dedicated_vram && si_resource(dst)->domains & RADEON_DOMAIN_VRAM &&
si_resource(src)->domains & RADEON_DOMAIN_VRAM && size > compute_min_size &&
assert(size && size % 4 == 0);
+ if (user_flags & SI_OP_SYNC_GE_BEFORE)
+ sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH | SI_CONTEXT_PFP_SYNC_ME;
+
if (user_flags & SI_OP_SYNC_CS_BEFORE)
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PFP_SYNC_ME;
}
}
+ if (user_flags & SI_OP_SYNC_GE_BEFORE)
+ sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH | SI_CONTEXT_PFP_SYNC_ME;
+
if (user_flags & SI_OP_SYNC_CS_BEFORE)
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PFP_SYNC_ME;