From 02443d752e83d470f400af004e3702fb8f7d8ce3 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 18 Apr 2023 13:49:52 +0200 Subject: [PATCH] radv: delay enabling/disabling occlusion queries at draw time Most applications have a sequence like BeginQuery/Draw/EndQuery which can be optimized by delaying DB_COUNT_CONTROL at draw time instead of enabling/disabling for every draw. Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/meta/radv_meta.c | 11 +++++++---- src/amd/vulkan/meta/radv_meta.h | 1 + src/amd/vulkan/radv_cmd_buffer.c | 25 ++++++++++++++++++++----- src/amd/vulkan/radv_private.h | 4 +++- src/amd/vulkan/radv_query.c | 8 ++++---- 5 files changed, 35 insertions(+), 14 deletions(-) diff --git a/src/amd/vulkan/meta/radv_meta.c b/src/amd/vulkan/meta/radv_meta.c index 7586667..f83a7eb 100644 --- a/src/amd/vulkan/meta/radv_meta.c +++ b/src/amd/vulkan/meta/radv_meta.c @@ -50,8 +50,10 @@ radv_suspend_queries(struct radv_meta_saved_state *state, struct radv_cmd_buffer } /* Occlusion queries. */ - if (cmd_buffer->state.active_occlusion_queries > 0) { - radv_set_db_count_control(cmd_buffer, false); + if (cmd_buffer->state.active_occlusion_queries) { + state->active_occlusion_queries = cmd_buffer->state.active_occlusion_queries; + cmd_buffer->state.active_occlusion_queries = 0; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_OCCLUSION_QUERY; } /* Primitives generated queries (legacy). */ @@ -88,8 +90,9 @@ radv_resume_queries(const struct radv_meta_saved_state *state, struct radv_cmd_b } /* Occlusion queries. */ - if (cmd_buffer->state.active_occlusion_queries > 0) { - radv_set_db_count_control(cmd_buffer, true); + if (state->active_occlusion_queries) { + cmd_buffer->state.active_occlusion_queries = state->active_occlusion_queries; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_OCCLUSION_QUERY; } /* Primitives generated queries (legacy). */ diff --git a/src/amd/vulkan/meta/radv_meta.h b/src/amd/vulkan/meta/radv_meta.h index 6f273be..46a582a 100644 --- a/src/amd/vulkan/meta/radv_meta.h +++ b/src/amd/vulkan/meta/radv_meta.h @@ -57,6 +57,7 @@ struct radv_meta_saved_state { unsigned active_pipeline_gds_queries; unsigned active_prims_gen_gds_queries; unsigned active_prims_xfb_gds_queries; + unsigned active_occlusion_queries; bool predicating; }; diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 541454c..d8ea47e 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -3631,9 +3631,11 @@ radv_emit_index_buffer(struct radv_cmd_buffer *cmd_buffer, bool indirect) cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_INDEX_BUFFER; } -void -radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer, bool enable_occlusion_queries) +static void +radv_flush_occlusion_query_state(struct radv_cmd_buffer *cmd_buffer) { + const bool enable_occlusion_queries = cmd_buffer->state.active_occlusion_queries || + cmd_buffer->state.inherited_occlusion_queries; uint32_t db_count_control; if (!enable_occlusion_queries) { @@ -3643,7 +3645,8 @@ radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer, bool enable_occlus uint32_t sample_rate = util_logbase2(cmd_buffer->state.render.max_samples); bool gfx10_perfect = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10 && - cmd_buffer->state.perfect_occlusion_queries_enabled; + (cmd_buffer->state.perfect_occlusion_queries_enabled || + cmd_buffer->state.inherited_query_control_flags & VK_QUERY_CONTROL_PRECISE_BIT); if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) { /* Always enable PERFECT_ZPASS_COUNTS due to issues with partially @@ -5841,7 +5844,8 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi cmd_buffer->state.last_pa_sc_binner_cntl_0 = -1; cmd_buffer->usage_flags = pBeginInfo->flags; - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_ALL | RADV_CMD_DIRTY_GUARDBAND; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_ALL | RADV_CMD_DIRTY_GUARDBAND | + RADV_CMD_DIRTY_OCCLUSION_QUERY; if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) { uint32_t pred_value = 0; @@ -5919,6 +5923,12 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi if (cmd_buffer->state.inherited_pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY; + + cmd_buffer->state.inherited_occlusion_queries = + pBeginInfo->pInheritanceInfo->occlusionQueryEnable; + cmd_buffer->state.inherited_query_control_flags = pBeginInfo->pInheritanceInfo->queryFlags; + if (cmd_buffer->state.inherited_occlusion_queries) + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_OCCLUSION_QUERY; } if (unlikely(cmd_buffer->device->trace_bo)) @@ -7634,7 +7644,7 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou */ primary->state.dirty |= RADV_CMD_DIRTY_PIPELINE | RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_GUARDBAND | RADV_CMD_DIRTY_DYNAMIC_ALL | - RADV_CMD_DIRTY_NGG_QUERY; + RADV_CMD_DIRTY_NGG_QUERY | RADV_CMD_DIRTY_OCCLUSION_QUERY; radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_GRAPHICS); radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_COMPUTE); @@ -8936,6 +8946,11 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r radv_flush_ngg_query_state(cmd_buffer); } + if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_OCCLUSION_QUERY) { + radv_flush_occlusion_query_state(cmd_buffer); + cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_OCCLUSION_QUERY; + } + if ((cmd_buffer->state.dirty & (RADV_CMD_DIRTY_PIPELINE | RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE | RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE | diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 981d669..241b246 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1324,6 +1324,7 @@ enum radv_cmd_dirty_bits { RADV_CMD_DIRTY_GUARDBAND = 1ull << 53, RADV_CMD_DIRTY_RBPLUS = 1ull << 54, RADV_CMD_DIRTY_NGG_QUERY = 1ull << 55, + RADV_CMD_DIRTY_OCCLUSION_QUERY = 1ull << 56, }; enum radv_cmd_flush_bits { @@ -1639,6 +1640,8 @@ struct radv_cmd_state { /* Inheritance info. */ VkQueryPipelineStatisticFlags inherited_pipeline_statistics; + bool inherited_occlusion_queries; + VkQueryControlFlags inherited_query_control_flags; bool context_roll_without_scissor_emitted; @@ -1901,7 +1904,6 @@ void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uin unsigned value); void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer); -void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer, bool enable_occlusion_queries); uint32_t radv_get_pa_su_sc_mode_cntl(const struct radv_cmd_buffer *cmd_buffer); uint32_t radv_get_vgt_index_size(uint32_t type); diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index a13c6d6..fd0e8f3 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -1793,7 +1793,7 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo cmd_buffer->state.perfect_occlusion_queries_enabled = true; } - radv_set_db_count_control(cmd_buffer, true); + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_OCCLUSION_QUERY; } else { if ((flags & VK_QUERY_CONTROL_PRECISE_BIT) && !cmd_buffer->state.perfect_occlusion_queries_enabled) { @@ -1803,7 +1803,7 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo */ cmd_buffer->state.perfect_occlusion_queries_enabled = true; - radv_set_db_count_control(cmd_buffer, true); + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_OCCLUSION_QUERY; } } @@ -1946,12 +1946,12 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, cmd_buffer->state.active_occlusion_queries--; if (cmd_buffer->state.active_occlusion_queries == 0) { - radv_set_db_count_control(cmd_buffer, false); - /* Reset the perfect occlusion queries hint now that no * queries are active. */ cmd_buffer->state.perfect_occlusion_queries_enabled = false; + + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_OCCLUSION_QUERY; } if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11 && -- 2.7.4