radeonsi: make si_gfx_write_event_eop more configurable
authorMarek Olšák <marek.olsak@amd.com>
Fri, 21 Sep 2018 07:36:32 +0000 (03:36 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 16 Oct 2018 19:28:22 +0000 (15:28 -0400)
src/amd/common/sid.h
src/gallium/drivers/radeonsi/si_fence.c
src/gallium/drivers/radeonsi/si_perfcounter.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_query.c
src/gallium/drivers/radeonsi/si_state_draw.c

index b3321ea..3e36eb2 100644 (file)
 #define PKT3_COND_WRITE                        0x45
 #define PKT3_EVENT_WRITE                       0x46
 #define PKT3_EVENT_WRITE_EOP                   0x47 /* not on GFX9 */
+#define         EOP_DST_SEL(x)                         ((x) << 16)
+#define                        EOP_DST_SEL_MEM                 0
+#define                        EOP_DST_SEL_TC_L2               1
 #define         EOP_INT_SEL(x)                          ((x) << 24)
 #define                        EOP_INT_SEL_NONE                        0
 #define                        EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM  3
 #define                        EOP_DATA_SEL_VALUE_32BIT        1
 #define                        EOP_DATA_SEL_VALUE_64BIT        2
 #define                        EOP_DATA_SEL_TIMESTAMP          3
+#define                        EOP_DATA_SEL_GDS                5
+#define                EOP_DATA_GDS(dw_offset, num_dwords)     ((dw_offset) | ((unsigned)(num_dwords) << 16))
 /* CP DMA bug: Any use of CP_DMA.DST_SEL=TC must be avoided when EOS packets
  * are used. Use DST_SEL=MC instead. For prefetch, use SRC_SEL=TC and
  * DST_SEL=MC. Only CIK chips are affected.
index abb7057..f1e0fac 100644 (file)
@@ -58,7 +58,9 @@ struct si_multi_fence {
  *
  * \param event                EVENT_TYPE_*
  * \param event_flags  Optional cache flush flags (TC)
- * \param data_sel     1 = fence, 3 = timestamp
+ * \param dst_sel       MEM or TC_L2
+ * \param int_sel       NONE or SEND_DATA_AFTER_WR_CONFIRM
+ * \param data_sel     DISCARD, VALUE_32BIT, TIMESTAMP, or GDS
  * \param buf          Buffer
  * \param va           GPU address
  * \param old_value    Previous fence value (for a bug workaround)
@@ -66,20 +68,18 @@ struct si_multi_fence {
  */
 void si_gfx_write_event_eop(struct si_context *ctx,
                            unsigned event, unsigned event_flags,
-                           unsigned data_sel,
+                           unsigned dst_sel, unsigned int_sel, unsigned data_sel,
                            struct r600_resource *buf, uint64_t va,
                            uint32_t new_fence, unsigned query_type)
 {
        struct radeon_cmdbuf *cs = ctx->gfx_cs;
        unsigned op = EVENT_TYPE(event) |
-                     EVENT_INDEX(5) |
+                     EVENT_INDEX(event == V_028A90_CS_DONE ||
+                                 event == V_028A90_PS_DONE ? 6 : 5) |
                      event_flags;
-       unsigned sel = EOP_DATA_SEL(data_sel);
-
-       /* Wait for write confirmation before writing data, but don't send
-        * an interrupt. */
-       if (data_sel != EOP_DATA_SEL_DISCARD)
-               sel |= EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM);
+       unsigned sel = EOP_DST_SEL(dst_sel) |
+                      EOP_INT_SEL(int_sel) |
+                      EOP_DATA_SEL(data_sel);
 
        if (ctx->chip_class >= GFX9) {
                /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
@@ -275,7 +275,10 @@ static void si_fine_fence_set(struct si_context *ctx,
                radeon_emit(cs, fence_va >> 32);
                radeon_emit(cs, 0x80000000);
        } else if (flags & PIPE_FLUSH_BOTTOM_OF_PIPE) {
-               si_gfx_write_event_eop(ctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
+               si_gfx_write_event_eop(ctx,
+                                      V_028A90_BOTTOM_OF_PIPE_TS, 0,
+                                      EOP_DST_SEL_MEM,
+                                      EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
                                       EOP_DATA_SEL_VALUE_32BIT,
                                       NULL, fence_va, 0x80000000,
                                       PIPE_QUERY_GPU_FINISHED);
index de71572..f3ef3d2 100644 (file)
@@ -581,6 +581,8 @@ static void si_pc_emit_stop(struct si_context *sctx,
        struct radeon_cmdbuf *cs = sctx->gfx_cs;
 
        si_gfx_write_event_eop(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
+                              EOP_DST_SEL_MEM,
+                              EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
                               EOP_DATA_SEL_VALUE_32BIT,
                               buffer, va, 0, SI_NOT_QUERY);
        si_gfx_wait_fence(sctx, va, 0, 0xffffffff);
index 29d7e55..73c54df 100644 (file)
@@ -1172,7 +1172,7 @@ void si_screen_clear_buffer(struct si_screen *sscreen, struct pipe_resource *dst
 /* si_fence.c */
 void si_gfx_write_event_eop(struct si_context *ctx,
                            unsigned event, unsigned event_flags,
-                           unsigned data_sel,
+                           unsigned dst_sel, unsigned int_sel, unsigned data_sel,
                            struct r600_resource *buf, uint64_t va,
                            uint32_t new_fence, unsigned query_type);
 unsigned si_gfx_write_fence_dwords(struct si_screen *screen);
index bdd7e2c..45c8e14 100644 (file)
@@ -891,7 +891,9 @@ static void si_query_hw_do_emit_stop(struct si_context *sctx,
                /* fall through */
        case PIPE_QUERY_TIMESTAMP:
                si_gfx_write_event_eop(sctx, V_028A90_BOTTOM_OF_PIPE_TS,
-                                      0, EOP_DATA_SEL_TIMESTAMP, NULL, va,
+                                      0, EOP_DST_SEL_MEM,
+                                      EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
+                                      EOP_DATA_SEL_TIMESTAMP, NULL, va,
                                       0, query->b.type);
                fence_va = va + 8;
                break;
@@ -913,11 +915,14 @@ static void si_query_hw_do_emit_stop(struct si_context *sctx,
        radeon_add_to_buffer_list(sctx, sctx->gfx_cs, query->buffer.buf, RADEON_USAGE_WRITE,
                                  RADEON_PRIO_QUERY);
 
-       if (fence_va)
+       if (fence_va) {
                si_gfx_write_event_eop(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
+                                      EOP_DST_SEL_MEM,
+                                      EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
                                       EOP_DATA_SEL_VALUE_32BIT,
                                       query->buffer.buf, fence_va, 0x80000000,
                                       query->b.type);
+       }
 }
 
 static void si_query_hw_emit_stop(struct si_context *sctx,
index c9fb97d..81eb34d 100644 (file)
@@ -918,8 +918,10 @@ void si_emit_cache_flush(struct si_context *sctx)
 
                        /* Necessary for DCC */
                        if (sctx->chip_class == VI)
-                               si_gfx_write_event_eop(sctx, V_028A90_FLUSH_AND_INV_CB_DATA_TS,
-                                                      0, EOP_DATA_SEL_DISCARD, NULL,
+                               si_gfx_write_event_eop(sctx,
+                                                      V_028A90_FLUSH_AND_INV_CB_DATA_TS,
+                                                      0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
+                                                      EOP_DATA_SEL_DISCARD, NULL,
                                                       0, 0, SI_NOT_QUERY);
                }
                if (flags & SI_CONTEXT_FLUSH_AND_INV_DB)
@@ -1034,6 +1036,8 @@ void si_emit_cache_flush(struct si_context *sctx)
                sctx->wait_mem_number++;
 
                si_gfx_write_event_eop(sctx, cb_db_event, tc_flags,
+                                      EOP_DST_SEL_MEM,
+                                      EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
                                       EOP_DATA_SEL_VALUE_32BIT,
                                       sctx->wait_mem_scratch, va,
                                       sctx->wait_mem_number, SI_NOT_QUERY);