From cc731821527acd4469775990d24952f40ffe22f4 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Tue, 1 Sep 2020 17:09:19 +0200 Subject: [PATCH] radv: Include flushes in the barrier. Since the flushes really happen on the next draw delay the barrier end to include the flushes. This fixes the barrier duration in RGP. Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/layers/radv_sqtt_layer.c | 39 +++++++++++++++++++++------------ src/amd/vulkan/radv_cmd_buffer.c | 8 +++---- src/amd/vulkan/radv_private.h | 2 ++ src/amd/vulkan/si_cmd_buffer.c | 6 ++++- 4 files changed, 36 insertions(+), 19 deletions(-) diff --git a/src/amd/vulkan/layers/radv_sqtt_layer.c b/src/amd/vulkan/layers/radv_sqtt_layer.c index 2b3f13a..5518026 100644 --- a/src/amd/vulkan/layers/radv_sqtt_layer.c +++ b/src/amd/vulkan/layers/radv_sqtt_layer.c @@ -509,41 +509,52 @@ radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer) } void -radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, - enum rgp_barrier_reason reason) +radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer) { - struct rgp_sqtt_marker_barrier_start marker = {}; + struct rgp_sqtt_marker_barrier_end marker = {}; struct radeon_cmdbuf *cs = cmd_buffer->cs; - if (likely(!cmd_buffer->device->thread_trace_bo)) + if (likely(!cmd_buffer->device->thread_trace_bo) || + !cmd_buffer->state.pending_sqtt_barrier_end) return; - marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START; + cmd_buffer->state.pending_sqtt_barrier_end = false; + + marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END; marker.cb_id = 0; - marker.dword02 = reason; + + marker.num_layout_transitions = cmd_buffer->state.num_layout_transitions; + + /* TODO: fill pipeline stalls, cache flushes, etc */ radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); + + cmd_buffer->state.num_layout_transitions = 0; } void -radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer) +radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, + enum rgp_barrier_reason reason) { - struct rgp_sqtt_marker_barrier_end marker = {}; + struct rgp_sqtt_marker_barrier_start marker = {}; struct radeon_cmdbuf *cs = cmd_buffer->cs; if (likely(!cmd_buffer->device->thread_trace_bo)) return; - marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END; - marker.cb_id = 0; - - marker.num_layout_transitions = cmd_buffer->state.num_layout_transitions; + radv_describe_barrier_end_delayed(cmd_buffer); - /* TODO: fill pipeline stalls, cache flushes, etc */ + marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START; + marker.cb_id = 0; + marker.dword02 = reason; radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); +} - cmd_buffer->state.num_layout_transitions = 0; +void +radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer) +{ + cmd_buffer->state.pending_sqtt_barrier_end = true; } void diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 8023956..243810f 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -5056,6 +5056,8 @@ radv_emit_draw_packets(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys *ws = cmd_buffer->device->ws; struct radeon_cmdbuf *cs = cmd_buffer->cs; + radv_describe_draw(cmd_buffer); + if (info->indirect) { uint64_t va = radv_buffer_get_va(info->indirect->bo); uint64_t count_va = 0; @@ -5286,8 +5288,6 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer, return; } - radv_describe_draw(cmd_buffer); - /* Use optimal packet order based on whether we need to sync the * pipeline. */ @@ -5523,6 +5523,8 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radv_userdata_info *loc; + radv_describe_dispatch(cmd_buffer, 8, 8, 8); + loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE); @@ -5663,8 +5665,6 @@ radv_dispatch(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty = pipeline && pipeline != cmd_buffer->state.emitted_compute_pipeline; - radv_describe_dispatch(cmd_buffer, 8, 8, 8); - if (cmd_buffer->state.flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 4f954d5..4de20b0 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1369,6 +1369,7 @@ struct radv_cmd_state { uint32_t current_event_type; uint32_t num_events; uint32_t num_layout_transitions; + bool pending_sqtt_barrier_end; }; struct radv_cmd_pool { @@ -2551,6 +2552,7 @@ void radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer); void radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, enum rgp_barrier_reason reason); void radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer); +void radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer); void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, const struct radv_barrier_data *barrier); diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index cd6cf23..d840457 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -1424,8 +1424,10 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) RADV_CMD_FLAG_START_PIPELINE_STATS | RADV_CMD_FLAG_STOP_PIPELINE_STATS); - if (!cmd_buffer->state.flush_bits) + if (!cmd_buffer->state.flush_bits) { + radv_describe_barrier_end_delayed(cmd_buffer); return; + } radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 128); @@ -1452,6 +1454,8 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) * should be finished at this point. */ cmd_buffer->pending_reset_query = false; + + radv_describe_barrier_end_delayed(cmd_buffer); } /* sets the CP predication state using a boolean stored at va */ -- 2.7.4