From 85a4c5b35131ab4279101821c060bdd2ce58c3ea Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Sun, 22 May 2022 10:43:42 +0200 Subject: [PATCH] radv: Implement conditional rendering for async compute queue. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit MEC (the compute queue firmware) does not support real predication, so we have to emulate that using COND_EXEC packets before each dispatch. Additionally, COND_EXEC doesn't have an inverted mode, so in order to support inverted mode conditional rendering, we allocate a new piece of memory in which we invert the condition. Cc: mesa-stable Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6533 Signed-off-by: Timur Kristóf Reviewed-by: Bas Nieuwenhuizen Part-of: --- src/amd/vulkan/radv_cmd_buffer.c | 95 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 87 insertions(+), 8 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 58fcb99..d1af39e 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -6113,6 +6113,26 @@ radv_emit_view_index(struct radv_cmd_buffer *cmd_buffer, unsigned index) } } +/** + * Emulates predication for MEC using COND_EXEC. + * When the current command buffer is predicating, emit a COND_EXEC packet + * so that the MEC skips the next few dwords worth of packets. + */ +static void +radv_cs_emit_compute_predication(struct radv_cmd_buffer *cmd_buffer, unsigned dwords) +{ + if (!cmd_buffer->state.predicating) + return; + + struct radeon_cmdbuf *cs = cmd_buffer->cs; + + radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0)); + radeon_emit(cs, cmd_buffer->state.predication_va); + radeon_emit(cs, cmd_buffer->state.predication_va >> 32); + radeon_emit(cs, 0); /* Cache policy */ + radeon_emit(cs, dwords); /* Size of the predicated packet(s) in DWORDs. */ +} + static void radv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t vertex_count, uint32_t use_opaque) @@ -7326,7 +7346,7 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, loc = radv_lookup_user_sgpr(&pipeline->base, MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE); - ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 25); + ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 30); if (compute_shader->info.wave_size == 32) { assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10); @@ -7362,7 +7382,8 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, } if (radv_cmd_buffer_uses_mec(cmd_buffer)) { - radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, predicating) | PKT3_SHADER_TYPE_S(1)); + radv_cs_emit_compute_predication(cmd_buffer, 4 /* DISPATCH_INDIRECT size */); + radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, 0) | PKT3_SHADER_TYPE_S(1)); radeon_emit(cs, info->va); radeon_emit(cs, info->va >> 32); radeon_emit(cs, dispatch_initiator); @@ -7443,6 +7464,11 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, dispatch_initiator |= S_00B800_FORCE_START_AT_000(1); } + if (radv_cmd_buffer_uses_mec(cmd_buffer)) { + radv_cs_emit_compute_predication(cmd_buffer, 5 /* DISPATCH_DIRECT size */); + predicating = false; + } + radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, predicating) | PKT3_SHADER_TYPE_S(1)); radeon_emit(cs, blocks[0]); radeon_emit(cs, blocks[1]); @@ -8782,11 +8808,59 @@ radv_CmdBeginConditionalRenderingEXT( pred_op = PREDICATION_OP_BOOL64; } - /* Enable predication for this command buffer. */ - si_emit_set_predication_state(cmd_buffer, draw_visible, pred_op, va); - cmd_buffer->state.predicating = true; + + if (radv_cmd_buffer_uses_mec(cmd_buffer)) { + /* MEC does not support real predication, so we have to emulate that + * using COND_EXEC packets before each dispatch. + */ + + if (!draw_visible) { + /* COND_EXEC can only skip the next packet(s) when the condition is 0. + * When using inverted conditional rendering, we allocate some space in + * the upload BO and emit some packets to invert the condition. + */ + + uint64_t pred_value = 0, pred_va; + unsigned pred_offset; + + radv_cmd_buffer_upload_data(cmd_buffer, 4, &pred_value, &pred_offset); + pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset; + + /* Write 1 to the new predication VA. */ + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | + COPY_DATA_WR_CONFIRM); + radeon_emit(cs, 1); + radeon_emit(cs, 0); + radeon_emit(cs, pred_va); + radeon_emit(cs, pred_va >> 32); + + /* If the API predication VA == 0, skip next command. */ + radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, 0); + radeon_emit(cs, 6); /* 1x COPY_DATA size */ + + /* Write 0 to the new predication VA (when the API condition != 0) */ + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | + COPY_DATA_WR_CONFIRM); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + radeon_emit(cs, pred_va); + radeon_emit(cs, pred_va >> 32); + + va = pred_va; + draw_visible = true; + } + } else { + /* Enable predication for this command buffer. */ + si_emit_set_predication_state(cmd_buffer, draw_visible, pred_op, va); + } /* Store conditional rendering user info. */ + cmd_buffer->state.predicating = true; cmd_buffer->state.predication_type = draw_visible; cmd_buffer->state.predication_op = pred_op; cmd_buffer->state.predication_va = va; @@ -8797,11 +8871,16 @@ radv_CmdEndConditionalRenderingEXT(VkCommandBuffer commandBuffer) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - /* Disable predication for this command buffer. */ - si_emit_set_predication_state(cmd_buffer, false, 0, 0); - cmd_buffer->state.predicating = false; + /* Note, MEC doesn't support predication, so we + * don't have to emit anything for MEC here. + */ + if (!radv_cmd_buffer_uses_mec(cmd_buffer)) { + /* Disable predication for this command buffer. */ + si_emit_set_predication_state(cmd_buffer, false, 0, 0); + } /* Reset conditional rendering user info. */ + cmd_buffer->state.predicating = false; cmd_buffer->state.predication_type = -1; cmd_buffer->state.predication_op = 0; cmd_buffer->state.predication_va = 0; -- 2.7.4