From 218ce34962c2b563398a0ffbb3889b23e98a845f Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 6 Jun 2019 17:30:17 +0200 Subject: [PATCH] radv: add mipmap support for the clear depth/stencil values Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen --- src/amd/vulkan/radv_cmd_buffer.c | 77 ++++++++++++++++++++++++++-------------- src/amd/vulkan/radv_private.h | 9 +++++ 2 files changed, 59 insertions(+), 27 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index a19b554..83c6eb0 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1575,34 +1575,50 @@ radv_update_bound_fast_clear_ds(struct radv_cmd_buffer *cmd_buffer, static void radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, + const VkImageSubresourceRange *range, VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects) { struct radeon_cmdbuf *cs = cmd_buffer->cs; - uint64_t va = radv_buffer_get_va(image->bo); - unsigned reg_offset = 0, reg_count = 0; + uint64_t va = radv_get_ds_clear_value_va(image, range->baseMipLevel); + uint32_t level_count = radv_get_levelCount(image, range); - va += image->offset + image->clear_value_offset; + if (aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)) { + /* Use the fastest way when both aspects are used. */ + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + 2 * level_count, cmd_buffer->state.predicating)); + radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | + S_370_WR_CONFIRM(1) | + S_370_ENGINE_SEL(V_370_PFP)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); - if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { - ++reg_count; + for (uint32_t l = 0; l < level_count; l++) { + radeon_emit(cs, ds_clear_value.stencil); + radeon_emit(cs, fui(ds_clear_value.depth)); + } } else { - ++reg_offset; - va += 4; - } - if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) - ++reg_count; + /* Otherwise we need one WRITE_DATA packet per level. */ + for (uint32_t l = 0; l < level_count; l++) { + uint64_t va = radv_get_ds_clear_value_va(image, range->baseMipLevel + l); + unsigned value; + + if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { + value = fui(ds_clear_value.depth); + va += 4; + } else { + value = ds_clear_value.stencil; + } - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + reg_count, cmd_buffer->state.predicating)); - radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_PFP)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) - radeon_emit(cs, ds_clear_value.stencil); - if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) - radeon_emit(cs, fui(ds_clear_value.depth)); + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, cmd_buffer->state.predicating)); + radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | + S_370_WR_CONFIRM(1) | + S_370_ENGINE_SEL(V_370_PFP)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, value); + } + } } /** @@ -1665,11 +1681,19 @@ radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects) { + VkImageSubresourceRange range = { + .aspectMask = iview->aspect_mask, + .baseMipLevel = iview->base_mip, + .levelCount = iview->level_count, + .baseArrayLayer = iview->base_layer, + .layerCount = iview->layer_count, + }; struct radv_image *image = iview->image; assert(radv_image_has_htile(image)); - radv_set_ds_clear_metadata(cmd_buffer, image, ds_clear_value, aspects); + radv_set_ds_clear_metadata(cmd_buffer, iview->image, &range, + ds_clear_value, aspects); if (radv_image_is_tc_compat_htile(image) && (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) { @@ -1686,15 +1710,14 @@ radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, */ static void radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image) + const struct radv_image_view *iview) { struct radeon_cmdbuf *cs = cmd_buffer->cs; + const struct radv_image *image = iview->image; VkImageAspectFlags aspects = vk_format_aspects(image->vk_format); - uint64_t va = radv_buffer_get_va(image->bo); + uint64_t va = radv_get_ds_clear_value_va(image, iview->base_mip); unsigned reg_offset = 0, reg_count = 0; - va += image->offset + image->clear_value_offset; - if (!radv_image_has_htile(image)) return; @@ -1966,7 +1989,7 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; cmd_buffer->state.offset_scale = cmd_buffer->state.attachments[idx].ds.offset_scale; } - radv_load_ds_clear_metadata(cmd_buffer, image); + radv_load_ds_clear_metadata(cmd_buffer, iview); } else { if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 2); @@ -5081,7 +5104,7 @@ static void radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer, if (vk_format_is_stencil(image->vk_format)) aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; - radv_set_ds_clear_metadata(cmd_buffer, image, value, aspects); + radv_set_ds_clear_metadata(cmd_buffer, image, range, value, aspects); if (radv_image_is_tc_compat_htile(image)) { /* Initialize the TC-compat metada value to 0 because by diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index fb55219..9bcb2ba 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1814,6 +1814,15 @@ radv_get_tc_compat_zrange_va(const struct radv_image *image, return va; } +static inline uint64_t +radv_get_ds_clear_value_va(const struct radv_image *image, + uint32_t base_level) +{ + uint64_t va = radv_buffer_get_va(image->bo); + va += image->offset + image->clear_value_offset + base_level * 8; + return va; +} + unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family); static inline uint32_t -- 2.7.4