radeonsi: atomize L2 prefetches
authorMarek Olšák <marek.olsak@amd.com>
Tue, 24 Jan 2017 22:28:32 +0000 (23:28 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Mon, 30 Jan 2017 12:27:14 +0000 (13:27 +0100)
to move the big conditional statement out of draw_vbo

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_cp_dma.c
src/gallium/drivers/radeonsi/si_descriptors.c
src/gallium/drivers/radeonsi/si_hw_context.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_draw.c
src/gallium/drivers/radeonsi/si_state_shaders.c

index 582e599..b398256 100644 (file)
@@ -406,7 +406,46 @@ void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf
        si_copy_buffer(sctx, buf, buf, offset, offset, size, SI_CPDMA_SKIP_ALL);
 }
 
+static void cik_prefetch_shader_async(struct si_context *sctx,
+                                     struct si_pm4_state *state)
+{
+       if (state) {
+               struct pipe_resource *bo = &state->bo[0]->b.b;
+               assert(state->nbo == 1);
+
+               cik_prefetch_TC_L2_async(sctx, bo, 0, bo->width0);
+       }
+}
+
+static void cik_emit_prefetch_L2(struct si_context *sctx, struct r600_atom *atom)
+{
+       /* Prefetch shaders and VBO descriptors to TC L2. */
+       if (si_pm4_state_changed(sctx, ls))
+               cik_prefetch_shader_async(sctx, sctx->queued.named.ls);
+       if (si_pm4_state_changed(sctx, hs))
+               cik_prefetch_shader_async(sctx, sctx->queued.named.hs);
+       if (si_pm4_state_changed(sctx, es))
+               cik_prefetch_shader_async(sctx, sctx->queued.named.es);
+       if (si_pm4_state_changed(sctx, gs))
+               cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
+       if (si_pm4_state_changed(sctx, vs))
+               cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
+
+       /* Vertex buffer descriptors are uploaded uncached, so prefetch
+        * them right after the VS binary. */
+       if (sctx->vertex_buffer_pointer_dirty) {
+               cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b,
+                                        sctx->vertex_buffers.buffer_offset,
+                                        sctx->vertex_elements->count * 16);
+       }
+       if (si_pm4_state_changed(sctx, ps))
+               cik_prefetch_shader_async(sctx, sctx->queued.named.ps);
+}
+
 void si_init_cp_dma_functions(struct si_context *sctx)
 {
        sctx->b.clear_buffer = si_clear_buffer;
+
+       si_init_atom(sctx, &sctx->prefetch_L2, &sctx->atoms.s.prefetch_L2,
+                    cik_emit_prefetch_L2);
 }
index 4a9fcd0..4c1120a 100644 (file)
@@ -1038,6 +1038,8 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
         * uploaded to a fresh new buffer, so I don't think flushing the const
         * cache is needed. */
        si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
+       if (sctx->b.chip_class >= CIK)
+               si_mark_atom_dirty(sctx, &sctx->prefetch_L2);
        sctx->vertex_buffers_dirty = false;
        sctx->vertex_buffer_pointer_dirty = true;
        return true;
index 57eaac9..d862e26 100644 (file)
@@ -207,6 +207,9 @@ void si_begin_new_cs(struct si_context *ctx)
        if (ctx->ce_preamble_ib)
                si_ce_reinitialize_all_descriptors(ctx);
 
+       if (ctx->b.chip_class >= CIK)
+               si_mark_atom_dirty(ctx, &ctx->prefetch_L2);
+
        ctx->framebuffer.dirty_cbufs = (1 << 8) - 1;
        ctx->framebuffer.dirty_zsbuf = true;
        si_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
index 6558474..b6474e6 100644 (file)
@@ -232,6 +232,7 @@ struct si_context {
        union si_state                  emitted;
 
        /* Atom declarations. */
+       struct r600_atom                prefetch_L2;
        struct si_framebuffer           framebuffer;
        struct si_sample_locs           msaa_sample_locs;
        struct r600_atom                db_render_state;
index 03e5011..915a8eb 100644 (file)
@@ -132,6 +132,7 @@ union si_state {
 union si_state_atoms {
        struct {
                /* The order matters. */
+               struct r600_atom *prefetch_L2;
                struct r600_atom *render_cond;
                struct r600_atom *streamout_begin;
                struct r600_atom *streamout_enable; /* must be after streamout_begin */
index db671c9..0374841 100644 (file)
@@ -940,17 +940,6 @@ void si_ce_post_draw_synchronization(struct si_context *sctx)
        }
 }
 
-static void cik_prefetch_shader_async(struct si_context *sctx,
-                                     struct si_pm4_state *state)
-{
-       if (state) {
-               struct pipe_resource *bo = &state->bo[0]->b.b;
-               assert(state->nbo == 1);
-
-               cik_prefetch_TC_L2_async(sctx, bo, 0, bo->width0);
-       }
-}
-
 void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 {
        struct si_context *sctx = (struct si_context *)ctx;
@@ -1129,34 +1118,10 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
        if (!si_upload_vertex_buffer_descriptors(sctx))
                return;
 
-       /* Flushed caches prior to prefetching shaders. */
+       /* Flush caches before the first state atom, which does L2 prefetches. */
        if (sctx->b.flags)
                si_emit_cache_flush(sctx);
 
-       /* Prefetch shaders and VBO descriptors to TC L2. */
-       if (sctx->b.chip_class >= CIK) {
-               if (si_pm4_state_changed(sctx, ls))
-                       cik_prefetch_shader_async(sctx, sctx->queued.named.ls);
-               if (si_pm4_state_changed(sctx, hs))
-                       cik_prefetch_shader_async(sctx, sctx->queued.named.hs);
-               if (si_pm4_state_changed(sctx, es))
-                       cik_prefetch_shader_async(sctx, sctx->queued.named.es);
-               if (si_pm4_state_changed(sctx, gs))
-                       cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
-               if (si_pm4_state_changed(sctx, vs))
-                       cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
-
-               /* Vertex buffer descriptors are uploaded uncached, so prefetch
-                * them right after the VS binary. */
-               if (sctx->vertex_buffer_pointer_dirty) {
-                       cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b,
-                                               sctx->vertex_buffers.buffer_offset,
-                                               sctx->vertex_elements->count * 16);
-               }
-               if (si_pm4_state_changed(sctx, ps))
-                       cik_prefetch_shader_async(sctx, sctx->queued.named.ps);
-       }
-
        /* Emit states. */
        mask = sctx->dirty_atoms;
        while (mask) {
index b3616dc..02f8d6c 100644 (file)
@@ -2525,6 +2525,9 @@ bool si_update_shaders(struct si_context *sctx)
                        return false;
        }
 
+       if (sctx->b.chip_class >= CIK)
+               si_mark_atom_dirty(sctx, &sctx->prefetch_L2);
+
        sctx->do_update_shaders = false;
        return true;
 }