From 6e1cf6532dc7a498a6a04e9fd5f8147aec0d2030 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 21 Sep 2018 03:36:32 -0400 Subject: [PATCH] radeonsi: make si_gfx_write_event_eop more configurable --- src/amd/common/sid.h | 5 +++++ src/gallium/drivers/radeonsi/si_fence.c | 23 +++++++++++++---------- src/gallium/drivers/radeonsi/si_perfcounter.c | 2 ++ src/gallium/drivers/radeonsi/si_pipe.h | 2 +- src/gallium/drivers/radeonsi/si_query.c | 9 +++++++-- src/gallium/drivers/radeonsi/si_state_draw.c | 8 ++++++-- 6 files changed, 34 insertions(+), 15 deletions(-) diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h index b3321ea..3e36eb2 100644 --- a/src/amd/common/sid.h +++ b/src/amd/common/sid.h @@ -181,6 +181,9 @@ #define PKT3_COND_WRITE 0x45 #define PKT3_EVENT_WRITE 0x46 #define PKT3_EVENT_WRITE_EOP 0x47 /* not on GFX9 */ +#define EOP_DST_SEL(x) ((x) << 16) +#define EOP_DST_SEL_MEM 0 +#define EOP_DST_SEL_TC_L2 1 #define EOP_INT_SEL(x) ((x) << 24) #define EOP_INT_SEL_NONE 0 #define EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM 3 @@ -189,6 +192,8 @@ #define EOP_DATA_SEL_VALUE_32BIT 1 #define EOP_DATA_SEL_VALUE_64BIT 2 #define EOP_DATA_SEL_TIMESTAMP 3 +#define EOP_DATA_SEL_GDS 5 +#define EOP_DATA_GDS(dw_offset, num_dwords) ((dw_offset) | ((unsigned)(num_dwords) << 16)) /* CP DMA bug: Any use of CP_DMA.DST_SEL=TC must be avoided when EOS packets * are used. Use DST_SEL=MC instead. For prefetch, use SRC_SEL=TC and * DST_SEL=MC. Only CIK chips are affected. diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c index abb7057..f1e0fac 100644 --- a/src/gallium/drivers/radeonsi/si_fence.c +++ b/src/gallium/drivers/radeonsi/si_fence.c @@ -58,7 +58,9 @@ struct si_multi_fence { * * \param event EVENT_TYPE_* * \param event_flags Optional cache flush flags (TC) - * \param data_sel 1 = fence, 3 = timestamp + * \param dst_sel MEM or TC_L2 + * \param int_sel NONE or SEND_DATA_AFTER_WR_CONFIRM + * \param data_sel DISCARD, VALUE_32BIT, TIMESTAMP, or GDS * \param buf Buffer * \param va GPU address * \param old_value Previous fence value (for a bug workaround) @@ -66,20 +68,18 @@ struct si_multi_fence { */ void si_gfx_write_event_eop(struct si_context *ctx, unsigned event, unsigned event_flags, - unsigned data_sel, + unsigned dst_sel, unsigned int_sel, unsigned data_sel, struct r600_resource *buf, uint64_t va, uint32_t new_fence, unsigned query_type) { struct radeon_cmdbuf *cs = ctx->gfx_cs; unsigned op = EVENT_TYPE(event) | - EVENT_INDEX(5) | + EVENT_INDEX(event == V_028A90_CS_DONE || + event == V_028A90_PS_DONE ? 6 : 5) | event_flags; - unsigned sel = EOP_DATA_SEL(data_sel); - - /* Wait for write confirmation before writing data, but don't send - * an interrupt. */ - if (data_sel != EOP_DATA_SEL_DISCARD) - sel |= EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM); + unsigned sel = EOP_DST_SEL(dst_sel) | + EOP_INT_SEL(int_sel) | + EOP_DATA_SEL(data_sel); if (ctx->chip_class >= GFX9) { /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion @@ -275,7 +275,10 @@ static void si_fine_fence_set(struct si_context *ctx, radeon_emit(cs, fence_va >> 32); radeon_emit(cs, 0x80000000); } else if (flags & PIPE_FLUSH_BOTTOM_OF_PIPE) { - si_gfx_write_event_eop(ctx, V_028A90_BOTTOM_OF_PIPE_TS, 0, + si_gfx_write_event_eop(ctx, + V_028A90_BOTTOM_OF_PIPE_TS, 0, + EOP_DST_SEL_MEM, + EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, EOP_DATA_SEL_VALUE_32BIT, NULL, fence_va, 0x80000000, PIPE_QUERY_GPU_FINISHED); diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index de71572..f3ef3d2 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -581,6 +581,8 @@ static void si_pc_emit_stop(struct si_context *sctx, struct radeon_cmdbuf *cs = sctx->gfx_cs; si_gfx_write_event_eop(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0, + EOP_DST_SEL_MEM, + EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, EOP_DATA_SEL_VALUE_32BIT, buffer, va, 0, SI_NOT_QUERY); si_gfx_wait_fence(sctx, va, 0, 0xffffffff); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 29d7e55..73c54df 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1172,7 +1172,7 @@ void si_screen_clear_buffer(struct si_screen *sscreen, struct pipe_resource *dst /* si_fence.c */ void si_gfx_write_event_eop(struct si_context *ctx, unsigned event, unsigned event_flags, - unsigned data_sel, + unsigned dst_sel, unsigned int_sel, unsigned data_sel, struct r600_resource *buf, uint64_t va, uint32_t new_fence, unsigned query_type); unsigned si_gfx_write_fence_dwords(struct si_screen *screen); diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c index bdd7e2c..45c8e14 100644 --- a/src/gallium/drivers/radeonsi/si_query.c +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -891,7 +891,9 @@ static void si_query_hw_do_emit_stop(struct si_context *sctx, /* fall through */ case PIPE_QUERY_TIMESTAMP: si_gfx_write_event_eop(sctx, V_028A90_BOTTOM_OF_PIPE_TS, - 0, EOP_DATA_SEL_TIMESTAMP, NULL, va, + 0, EOP_DST_SEL_MEM, + EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, + EOP_DATA_SEL_TIMESTAMP, NULL, va, 0, query->b.type); fence_va = va + 8; break; @@ -913,11 +915,14 @@ static void si_query_hw_do_emit_stop(struct si_context *sctx, radeon_add_to_buffer_list(sctx, sctx->gfx_cs, query->buffer.buf, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); - if (fence_va) + if (fence_va) { si_gfx_write_event_eop(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0, + EOP_DST_SEL_MEM, + EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, EOP_DATA_SEL_VALUE_32BIT, query->buffer.buf, fence_va, 0x80000000, query->b.type); + } } static void si_query_hw_emit_stop(struct si_context *sctx, diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index c9fb97d..81eb34d 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -918,8 +918,10 @@ void si_emit_cache_flush(struct si_context *sctx) /* Necessary for DCC */ if (sctx->chip_class == VI) - si_gfx_write_event_eop(sctx, V_028A90_FLUSH_AND_INV_CB_DATA_TS, - 0, EOP_DATA_SEL_DISCARD, NULL, + si_gfx_write_event_eop(sctx, + V_028A90_FLUSH_AND_INV_CB_DATA_TS, + 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE, + EOP_DATA_SEL_DISCARD, NULL, 0, 0, SI_NOT_QUERY); } if (flags & SI_CONTEXT_FLUSH_AND_INV_DB) @@ -1034,6 +1036,8 @@ void si_emit_cache_flush(struct si_context *sctx) sctx->wait_mem_number++; si_gfx_write_event_eop(sctx, cb_db_event, tc_flags, + EOP_DST_SEL_MEM, + EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, EOP_DATA_SEL_VALUE_32BIT, sctx->wait_mem_scratch, va, sctx->wait_mem_number, SI_NOT_QUERY); -- 2.7.4