radeonsi/gfx10: make sure GDS is idle between IBs
authorMarek Olšák <marek.olsak@amd.com>
Wed, 5 Jun 2019 02:02:25 +0000 (22:02 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Wed, 3 Jul 2019 19:51:13 +0000 (15:51 -0400)
Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/gallium/drivers/radeonsi/si_gfx_cs.c
src/gallium/drivers/radeonsi/si_state_streamout.c

index 4a4a7ee..13ef470 100644 (file)
@@ -75,22 +75,21 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
 {
        struct radeon_cmdbuf *cs = ctx->gfx_cs;
        struct radeon_winsys *ws = ctx->ws;
+       const unsigned wait_ps_cs = SI_CONTEXT_PS_PARTIAL_FLUSH |
+                                   SI_CONTEXT_CS_PARTIAL_FLUSH;
        unsigned wait_flags = 0;
 
        if (ctx->gfx_flush_in_progress)
                return;
 
        if (!ctx->screen->info.kernel_flushes_tc_l2_after_ib) {
-               wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
-                             SI_CONTEXT_CS_PARTIAL_FLUSH |
+               wait_flags |= wait_ps_cs |
                              SI_CONTEXT_INV_L2;
        } else if (ctx->chip_class == GFX6) {
                /* The kernel flushes L2 before shaders are finished. */
-               wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
-                             SI_CONTEXT_CS_PARTIAL_FLUSH;
+               wait_flags |= wait_ps_cs;
        } else if (!(flags & RADEON_FLUSH_START_NEXT_GFX_IB_NOW)) {
-               wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
-                             SI_CONTEXT_CS_PARTIAL_FLUSH;
+               wait_flags |= wait_ps_cs;
        }
 
        /* Drop this flush if it's a no-op. */
@@ -162,6 +161,13 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
                if (ctx->streamout.begin_emitted) {
                        si_emit_streamout_end(ctx);
                        ctx->streamout.suspended = true;
+
+                       /* Since streamout uses GDS on gfx10, we need to make
+                        * GDS idle when we leave the IB, otherwise another
+                        * process might overwrite it while our shaders are busy.
+                        */
+                       if (ctx->chip_class >= GFX10)
+                               wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
                }
        }
 
@@ -175,7 +181,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
                ctx->flags |= wait_flags;
                ctx->emit_cache_flush(ctx);
        }
-       ctx->gfx_last_ib_is_busy = wait_flags == 0;
+       ctx->gfx_last_ib_is_busy = (wait_flags & wait_ps_cs) != wait_ps_cs;
 
        if (ctx->current_saved_cs) {
                si_trace_emit(ctx);
index 6b72749..1eb06b7 100644 (file)
@@ -96,6 +96,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
        struct si_context *sctx = (struct si_context *)ctx;
        unsigned old_num_targets = sctx->streamout.num_targets;
        unsigned i;
+       bool wait_now = false;
 
        /* We are going to unbind the buffers. Mark which caches need to be flushed. */
        if (sctx->streamout.num_targets && sctx->streamout.begin_emitted) {
@@ -126,10 +127,19 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
                               SI_CONTEXT_INV_VCACHE;
 
                /* The BUFFER_FILLED_SIZE is written using a PS_DONE event. */
-               if (sctx->chip_class >= GFX10)
+               if (sctx->chip_class >= GFX10) {
                        sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
-               else
+
+                       /* Wait now. This is needed to make sure that GDS is not
+                        * busy at the end of IBs.
+                        *
+                        * Also, the next streamout operation will overwrite GDS,
+                        * so we need to make sure that it's idle.
+                        */
+                       wait_now = true;
+               } else {
                        sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH;
+               }
        }
 
        /* All readers of the streamout targets need to be finished before we can
@@ -200,6 +210,9 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
        }
        for (; i < old_num_targets; i++)
                si_set_rw_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, NULL);
+
+       if (wait_now)
+               sctx->emit_cache_flush(sctx);
 }
 
 static void gfx10_emit_streamout_begin(struct si_context *sctx)