radeonsi/compute: flush caches with si_emit_cache_flush
authorMarek Olšák <marek.olsak@amd.com>
Sat, 20 Sep 2014 09:48:58 +0000 (11:48 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Wed, 24 Sep 2014 12:48:02 +0000 (14:48 +0200)
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
src/gallium/drivers/radeon/r600_pipe_common.h
src/gallium/drivers/radeonsi/si_compute.c
src/gallium/drivers/radeonsi/si_state_draw.c

index beaa312..9a5a100 100644 (file)
 #define R600_CONTEXT_FLUSH_AND_INV_DB_META     (1 << 11)
 #define R600_CONTEXT_FLUSH_AND_INV_DB          (1 << 12)
 #define R600_CONTEXT_FLUSH_AND_INV_CB          (1 << 13)
+#define R600_CONTEXT_FLUSH_WITH_INV_L2         (1 << 14)
 /* engine synchronization */
 #define R600_CONTEXT_PS_PARTIAL_FLUSH          (1 << 16)
 #define R600_CONTEXT_WAIT_3D_IDLE              (1 << 17)
 #define R600_CONTEXT_WAIT_CP_DMA_IDLE          (1 << 18)
 #define R600_CONTEXT_VGT_FLUSH                 (1 << 19)
 #define R600_CONTEXT_VGT_STREAMOUT_SYNC                (1 << 20)
+/* other flags */
+#define R600_CONTEXT_FLAG_COMPUTE              (1u << 31)
 
 /* special primitive types */
 #define R600_PRIM_RECTANGLE_LIST       PIPE_PRIM_MAX
index 3ad9182..e24c6e2 100644 (file)
@@ -189,17 +189,14 @@ static void si_launch_grid(
        radeon_emit(cs, 0x80000000);
        radeon_emit(cs, 0x80000000);
 
-       pm4->compute_pkt = true;
-
-       si_pm4_cmd_begin(pm4, PKT3_EVENT_WRITE);
-       si_pm4_cmd_add(pm4, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH) |
-                           EVENT_INDEX(0x7) |
-                           EVENT_WRITE_INV_L2);
-       si_pm4_cmd_end(pm4, false);
+       sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
+                        R600_CONTEXT_INV_SHADER_CACHE |
+                        R600_CONTEXT_INV_CONST_CACHE |
+                        R600_CONTEXT_FLUSH_WITH_INV_L2 |
+                        R600_CONTEXT_FLAG_COMPUTE;
+       si_emit_cache_flush(&sctx->b, NULL);
 
-       si_pm4_inval_texture_cache(pm4);
-       si_pm4_inval_shader_cache(pm4);
-       si_cmd_surface_sync(pm4, pm4->cp_coher_cntl);
+       pm4->compute_pkt = true;
 
        /* Upload the kernel arguments */
 
@@ -368,10 +365,6 @@ static void si_launch_grid(
        si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(0x4)));
        si_pm4_cmd_end(pm4, false);
 
-       si_pm4_inval_texture_cache(pm4);
-       si_pm4_inval_shader_cache(pm4);
-       si_cmd_surface_sync(pm4, pm4->cp_coher_cntl);
-
        si_pm4_emit(sctx, pm4);
 
 #if 0
@@ -382,6 +375,12 @@ static void si_launch_grid(
 #endif
 
        si_pm4_free_state(sctx, pm4, ~0);
+
+       sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
+                        R600_CONTEXT_INV_SHADER_CACHE |
+                        R600_CONTEXT_INV_CONST_CACHE |
+                        R600_CONTEXT_FLAG_COMPUTE;
+       si_emit_cache_flush(&sctx->b, NULL);
 }
 
 
index 61951ee..a4b7017 100644 (file)
@@ -788,6 +788,8 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
 {
        struct radeon_winsys_cs *cs = sctx->rings.gfx.cs;
        uint32_t cp_coher_cntl = 0;
+       uint32_t compute =
+               PKT3_SHADER_TYPE_S(!!(sctx->flags & R600_CONTEXT_FLAG_COMPUTE));
 
        /* XXX SI flushes both ICACHE and KCACHE if either flag is set.
         * XXX CIK shouldn't have this issue. Test CIK before separating the flags
@@ -821,7 +823,7 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
 
        if (cp_coher_cntl) {
                if (sctx->chip_class >= CIK) {
-                       radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
+                       radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) | compute);
                        radeon_emit(cs, cp_coher_cntl);   /* CP_COHER_CNTL */
                        radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */
                        radeon_emit(cs, 0xff);            /* CP_COHER_SIZE_HI */
@@ -829,7 +831,7 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
                        radeon_emit(cs, 0);               /* CP_COHER_BASE_HI */
                        radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
                } else {
-                       radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0));
+                       radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0) | compute);
                        radeon_emit(cs, cp_coher_cntl);   /* CP_COHER_CNTL */
                        radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */
                        radeon_emit(cs, 0);               /* CP_COHER_BASE */
@@ -838,37 +840,42 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
        }
 
        if (sctx->flags & R600_CONTEXT_FLUSH_AND_INV_CB_META) {
-               radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+               radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
                radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
        }
        if (sctx->flags & R600_CONTEXT_FLUSH_AND_INV_DB_META) {
-               radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+               radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
                radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0));
        }
+       if (sctx->flags & R600_CONTEXT_FLUSH_WITH_INV_L2) {
+               radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
+               radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH) | EVENT_INDEX(7) |
+                               EVENT_WRITE_INV_L2);
+        }
 
        if (sctx->flags & (R600_CONTEXT_WAIT_3D_IDLE |
                           R600_CONTEXT_PS_PARTIAL_FLUSH)) {
-               radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+               radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
                radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
        } else if (sctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) {
                /* Needed if streamout buffers are going to be used as a source. */
-               radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+               radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
                radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
        }
 
        if (sctx->flags & R600_CONTEXT_VGT_FLUSH) {
-               radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+               radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
                radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
        }
        if (sctx->flags & R600_CONTEXT_VGT_STREAMOUT_SYNC) {
-               radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+               radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
                radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0));
        }
 
        sctx->flags = 0;
 }
 
-const struct r600_atom si_atom_cache_flush = { si_emit_cache_flush, 17 }; /* number of CS dwords */
+const struct r600_atom si_atom_cache_flush = { si_emit_cache_flush, 19 }; /* number of CS dwords */
 
 static void si_get_draw_start_count(struct si_context *sctx,
                                    const struct pipe_draw_info *info,