From: Samuel Pitoiset Date: Mon, 12 Dec 2022 15:56:42 +0000 (+0100) Subject: radv: move emitting the strmout buffer in CmdDrawIndirectByteCountEXT() X-Git-Tag: upstream/23.3.3~15126 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6aaba10c6e060a5c2111e700cb7a87d5e39f9fe0;p=platform%2Fupstream%2Fmesa.git radv: move emitting the strmout buffer in CmdDrawIndirectByteCountEXT() This doesn't need to be in the generic draw path because only one draw command uses it. Signed-off-by: Samuel Pitoiset Part-of: --- diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 359ada4..005e0f4 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -5058,38 +5058,6 @@ radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_d } } - if (draw_info->strmout_buffer) { - uint64_t va = radv_buffer_get_va(draw_info->strmout_buffer->bo); - - va += draw_info->strmout_buffer->offset + draw_info->strmout_buffer_offset; - - radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, draw_info->stride); - - if (info->gfx_level >= GFX10) { - /* Emitting a COPY_DATA packet should be enough because RADV doesn't support preemption - * (shadow memory) but for unknown reasons, it can lead to GPU hangs on GFX10+. - */ - radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); - radeon_emit(cs, 0); - - radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, 0)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit(cs, (R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE - SI_CONTEXT_REG_OFFSET) >> 2); - radeon_emit(cs, 1); /* 1 DWORD */ - } else { - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | - COPY_DATA_WR_CONFIRM); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2); - radeon_emit(cs, 0); /* unused */ - } - - radv_cs_add_buffer(cmd_buffer->device->ws, cs, draw_info->strmout_buffer->bo); - } - /* RDNA2 is affected by a hardware bug when instance packing is enabled for adjacent primitive * topologies and instance_count > 1, pipeline stats generated by GE are incorrect. It needs to * be applied for indexed and non-indexed draws. @@ -10859,6 +10827,42 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou radv_set_streamout_enable(cmd_buffer, false); } +static void +radv_emit_strmout_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *draw_info) +{ + const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level; + uint64_t va = radv_buffer_get_va(draw_info->strmout_buffer->bo); + struct radeon_cmdbuf *cs = cmd_buffer->cs; + + va += draw_info->strmout_buffer->offset + draw_info->strmout_buffer_offset; + + radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, draw_info->stride); + + if (gfx_level >= GFX10) { + /* Emitting a COPY_DATA packet should be enough because RADV doesn't support preemption + * (shadow memory) but for unknown reasons, it can lead to GPU hangs on GFX10+. + */ + radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); + radeon_emit(cs, 0); + + radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, 0)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, (R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE - SI_CONTEXT_REG_OFFSET) >> 2); + radeon_emit(cs, 1); /* 1 DWORD */ + } else { + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | + COPY_DATA_WR_CONFIRM); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2); + radeon_emit(cs, 0); /* unused */ + } + + radv_cs_add_buffer(cmd_buffer->device->ws, cs, draw_info->strmout_buffer->bo); +} + VKAPI_ATTR void VKAPI_CALL radv_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanceCount, uint32_t firstInstance, VkBuffer _counterBuffer, @@ -10881,6 +10885,7 @@ radv_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanc if (!radv_before_draw(cmd_buffer, &info, 1)) return; struct VkMultiDrawInfoEXT minfo = { 0, 0 }; + radv_emit_strmout_buffer(cmd_buffer, &info); radv_emit_direct_draw_packets(cmd_buffer, &info, 1, &minfo, S_0287F0_USE_OPAQUE(1), 0); radv_after_draw(cmd_buffer); }