From 107473162e328aae754cd718340933693c839094 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Wed, 18 Oct 2023 14:29:55 +0200 Subject: [PATCH] radv: Refactor WRITE_DATA helper function. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Create a version of this function that takes a CS and queue family. move it to radv_cs.h so it can be called from multiple other files. Signed-off-by: Timur Kristóf Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_cmd_buffer.c | 26 +++++++++----------------- src/amd/vulkan/radv_cs.h | 27 +++++++++++++++++++++++++++ src/amd/vulkan/radv_private.h | 2 +- src/amd/vulkan/radv_query.c | 12 ++++++------ src/amd/vulkan/si_cmd_buffer.c | 2 +- 5 files changed, 44 insertions(+), 25 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 2632f54..8e719f6 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -277,18 +277,10 @@ radv_queue_family_to_ring(const struct radv_physical_device *physical_device, en } static void -radv_emit_write_data_packet(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, uint64_t va, unsigned count, - const uint32_t *data) +radv_write_data(struct radv_cmd_buffer *cmd_buffer, const unsigned engine_sel, const uint64_t va, const unsigned count, + const uint32_t *data, const bool predicating) { - struct radeon_cmdbuf *cs = cmd_buffer->cs; - - radeon_check_space(cmd_buffer->device->ws, cs, 4 + count); - - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0)); - radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(engine_sel)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit_array(cs, data, count); + radv_cs_write_data(cmd_buffer->device, cmd_buffer->cs, cmd_buffer->qf, engine_sel, va, count, data, predicating); } static void @@ -296,7 +288,7 @@ radv_emit_clear_data(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, ui { uint32_t *zeroes = alloca(size); memset(zeroes, 0, size); - radv_emit_write_data_packet(cmd_buffer, engine_sel, va, size / 4, zeroes); + radv_write_data(cmd_buffer, engine_sel, va, size / 4, zeroes, false); } static void @@ -554,7 +546,7 @@ radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer) va += 4; ++cmd_buffer->state.trace_id; - radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 1, &cmd_buffer->state.trace_id); + radv_write_data(cmd_buffer, V_370_ME, va, 1, &cmd_buffer->state.trace_id, false); radeon_check_space(cmd_buffer->device->ws, cs, 2); @@ -769,7 +761,7 @@ radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pip data[0] = pipeline_address; data[1] = pipeline_address >> 32; - radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data); + radv_write_data(cmd_buffer, V_370_ME, va, 2, data, false); } static void @@ -785,7 +777,7 @@ radv_save_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, uint64_t vb_ptr data[0] = vb_ptr; data[1] = vb_ptr >> 32; - radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data); + radv_write_data(cmd_buffer, V_370_ME, va, 2, data, false); } static void @@ -802,7 +794,7 @@ radv_save_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader data[0] = prolog_address; data[1] = prolog_address >> 32; - radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data); + radv_write_data(cmd_buffer, V_370_ME, va, 2, data, false); } void @@ -832,7 +824,7 @@ radv_save_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bi data[i * 2 + 1] = (uint64_t)(uintptr_t)set >> 32; } - radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, MAX_SETS * 2, data); + radv_write_data(cmd_buffer, V_370_ME, va, MAX_SETS * 2, data, false); } const struct radv_userdata_info * diff --git a/src/amd/vulkan/radv_cs.h b/src/amd/vulkan/radv_cs.h index e8ca11b..25a25ec 100644 --- a/src/amd/vulkan/radv_cs.h +++ b/src/amd/vulkan/radv_cs.h @@ -224,4 +224,31 @@ radv_cp_wait_mem(struct radeon_cmdbuf *cs, const enum radv_queue_family qf, cons radeon_emit(cs, 4); /* poll interval */ } +ALWAYS_INLINE static unsigned +radv_cs_write_data_head(const struct radv_device *device, struct radeon_cmdbuf *cs, const enum radv_queue_family qf, + const unsigned engine_sel, const uint64_t va, const unsigned count, const bool predicating) +{ + assert(qf == RADV_QUEUE_GENERAL || qf == RADV_QUEUE_COMPUTE); + + /* Return the correct cdw at the end of the packet so the caller can assert it. */ + const unsigned cdw_end = radeon_check_space(device->ws, cs, 4 + count); + + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, false)); + radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(engine_sel)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + + return cdw_end; +} + +ALWAYS_INLINE static void +radv_cs_write_data(const struct radv_device *device, struct radeon_cmdbuf *cs, const enum radv_queue_family qf, + const unsigned engine_sel, const uint64_t va, const unsigned count, const uint32_t *dwords, + const bool predicating) +{ + ASSERTED const unsigned cdw_end = radv_cs_write_data_head(device, cs, qf, engine_sel, va, count, predicating); + radeon_emit_array(cs, dwords, count); + assert(cs->cdw == cdw_end); +} + #endif /* RADV_CS_H */ diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index a02556a..46f1926 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -2068,7 +2068,7 @@ unsigned radv_get_default_max_sample_dist(int log_samples); void radv_device_init_msaa(struct radv_device *device); VkResult radv_device_init_vrs_state(struct radv_device *device); -void radv_emit_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, uint64_t va, uint32_t imm); +void radv_cs_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, uint64_t va, uint32_t imm); void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview, VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects); diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index c6e371b..e1d8206 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -1777,11 +1777,11 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo if (cmd_buffer->device->physical_device->use_ngg_streamout) { /* generated prim counter */ gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va); - radv_emit_write_data_imm(cs, V_370_ME, va + 4, 0x80000000); + radv_cs_write_data_imm(cs, V_370_ME, va + 4, 0x80000000); /* written prim counter */ gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_XFB_OFFSET(index), va + 8); - radv_emit_write_data_imm(cs, V_370_ME, va + 12, 0x80000000); + radv_cs_write_data_imm(cs, V_370_ME, va + 12, 0x80000000); /* Record that the command buffer needs GDS. */ cmd_buffer->gds_needed = true; @@ -1802,7 +1802,7 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { /* On GFX11+, primitives generated query always use GDS. */ gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va); - radv_emit_write_data_imm(cs, V_370_ME, va + 4, 0x80000000); + radv_cs_write_data_imm(cs, V_370_ME, va + 4, 0x80000000); /* Record that the command buffer needs GDS. */ cmd_buffer->gds_needed = true; @@ -1938,11 +1938,11 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, if (cmd_buffer->device->physical_device->use_ngg_streamout) { /* generated prim counter */ gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16); - radv_emit_write_data_imm(cs, V_370_ME, va + 20, 0x80000000); + radv_cs_write_data_imm(cs, V_370_ME, va + 20, 0x80000000); /* written prim counter */ gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_XFB_OFFSET(index), va + 24); - radv_emit_write_data_imm(cs, V_370_ME, va + 28, 0x80000000); + radv_cs_write_data_imm(cs, V_370_ME, va + 28, 0x80000000); cmd_buffer->state.active_prims_xfb_gds_queries--; @@ -1960,7 +1960,7 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { /* On GFX11+, primitives generated query always use GDS. */ gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16); - radv_emit_write_data_imm(cs, V_370_ME, va + 20, 0x80000000); + radv_cs_write_data_imm(cs, V_370_ME, va + 20, 0x80000000); cmd_buffer->state.active_prims_gen_gds_queries--; diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index bcad170..e633696 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -2010,7 +2010,7 @@ radv_device_init_msaa(struct radv_device *device) } void -radv_emit_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, uint64_t va, uint32_t imm) +radv_cs_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, uint64_t va, uint32_t imm) { radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(engine_sel)); -- 2.7.4