From bc65dcab3bc48673ff6180afb036561a4b8b1119 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Nicolai=20H=C3=A4hnle?= Date: Fri, 10 Nov 2017 10:58:10 +0100 Subject: [PATCH] radeonsi: avoid syncing the driver thread in si_fence_finish MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit It is really only required when we need to flush for deferred fences. Reviewed-by: Marek Olšák --- src/gallium/auxiliary/util/u_threaded_context.h | 8 +++ src/gallium/drivers/radeonsi/si_fence.c | 75 +++++++++++++------------ src/gallium/drivers/radeonsi/si_hw_context.c | 3 + 3 files changed, 49 insertions(+), 37 deletions(-) diff --git a/src/gallium/auxiliary/util/u_threaded_context.h b/src/gallium/auxiliary/util/u_threaded_context.h index b2d9045..3408956 100644 --- a/src/gallium/auxiliary/util/u_threaded_context.h +++ b/src/gallium/auxiliary/util/u_threaded_context.h @@ -407,6 +407,14 @@ threaded_transfer(struct pipe_transfer *transfer) return (struct threaded_transfer*)transfer; } +static inline struct pipe_context * +threaded_context_unwrap_unsync(struct pipe_context *pipe) +{ + if (!pipe || !pipe->priv) + return pipe; + return (struct pipe_context*)pipe->priv; +} + static inline void tc_unflushed_batch_token_reference(struct tc_unflushed_batch_token **dst, struct tc_unflushed_batch_token *src) diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c index ff1800c..5163d65 100644 --- a/src/gallium/drivers/radeonsi/si_fence.c +++ b/src/gallium/drivers/radeonsi/si_fence.c @@ -188,12 +188,8 @@ static boolean si_fence_finish(struct pipe_screen *screen, { struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws; struct si_multi_fence *rfence = (struct si_multi_fence *)fence; - struct r600_common_context *rctx; int64_t abs_timeout = os_time_get_absolute_timeout(timeout); - ctx = threaded_context_unwrap_sync(ctx); - rctx = ctx ? (struct r600_common_context*)ctx : NULL; - if (!util_queue_fence_is_signalled(&rfence->ready)) { if (!timeout) return false; @@ -245,41 +241,46 @@ static boolean si_fence_finish(struct pipe_screen *screen, } /* Flush the gfx IB if it hasn't been flushed yet. */ - if (rctx && - rfence->gfx_unflushed.ctx == rctx && - rfence->gfx_unflushed.ib_index == rctx->num_gfx_cs_flushes) { - /* Section 4.1.2 (Signaling) of the OpenGL 4.6 (Core profile) - * spec says: - * - * "If the sync object being blocked upon will not be - * signaled in finite time (for example, by an associated - * fence command issued previously, but not yet flushed to - * the graphics pipeline), then ClientWaitSync may hang - * forever. To help prevent this behavior, if - * ClientWaitSync is called and all of the following are - * true: - * - * * the SYNC_FLUSH_COMMANDS_BIT bit is set in flags, - * * sync is unsignaled when ClientWaitSync is called, - * * and the calls to ClientWaitSync and FenceSync were - * issued from the same context, - * - * then the GL will behave as if the equivalent of Flush - * were inserted immediately after the creation of sync." - * - * This means we need to flush for such fences even when we're - * not going to wait. - */ - rctx->gfx.flush(rctx, timeout ? 0 : RADEON_FLUSH_ASYNC, NULL); - rfence->gfx_unflushed.ctx = NULL; + if (ctx && rfence->gfx_unflushed.ctx) { + struct si_context *sctx; + + sctx = (struct si_context *)threaded_context_unwrap_unsync(ctx); + if (rfence->gfx_unflushed.ctx == &sctx->b && + rfence->gfx_unflushed.ib_index == sctx->b.num_gfx_cs_flushes) { + /* Section 4.1.2 (Signaling) of the OpenGL 4.6 (Core profile) + * spec says: + * + * "If the sync object being blocked upon will not be + * signaled in finite time (for example, by an associated + * fence command issued previously, but not yet flushed to + * the graphics pipeline), then ClientWaitSync may hang + * forever. To help prevent this behavior, if + * ClientWaitSync is called and all of the following are + * true: + * + * * the SYNC_FLUSH_COMMANDS_BIT bit is set in flags, + * * sync is unsignaled when ClientWaitSync is called, + * * and the calls to ClientWaitSync and FenceSync were + * issued from the same context, + * + * then the GL will behave as if the equivalent of Flush + * were inserted immediately after the creation of sync." + * + * This means we need to flush for such fences even when we're + * not going to wait. + */ + threaded_context_unwrap_sync(ctx); + sctx->b.gfx.flush(&sctx->b, timeout ? 0 : RADEON_FLUSH_ASYNC, NULL); + rfence->gfx_unflushed.ctx = NULL; - if (!timeout) - return false; + if (!timeout) + return false; - /* Recompute the timeout after all that. */ - if (timeout && timeout != PIPE_TIMEOUT_INFINITE) { - int64_t time = os_time_get_nano(); - timeout = abs_timeout > time ? abs_timeout - time : 0; + /* Recompute the timeout after all that. */ + if (timeout && timeout != PIPE_TIMEOUT_INFINITE) { + int64_t time = os_time_get_nano(); + timeout = abs_timeout > time ? abs_timeout - time : 0; + } } } diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index 15234d7..1903cf8 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -129,6 +129,9 @@ void si_context_gfx_flush(void *context, unsigned flags, ws->cs_flush(cs, flags, &ctx->b.last_gfx_fence); if (fence) ws->fence_reference(fence, ctx->b.last_gfx_fence); + + /* This must be after cs_flush returns, since the context's API + * thread can concurrently read this value in si_fence_finish. */ ctx->b.num_gfx_cs_flushes++; /* Check VM faults if needed. */ -- 2.7.4