From ae61fe4982acf1ba86419beaf757f5f099656a53 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Tue, 9 May 2017 08:26:07 +0200 Subject: [PATCH] radv: Implement TC compatible HTILE. The situations where we enable it are quite limitied, but it works, even for madmax, so lets just enable it. Reviewed-by: Dave Airlie --- src/amd/vulkan/radv_device.c | 28 ++++++++++++++++++++++++++-- src/amd/vulkan/radv_image.c | 21 +++++++++++++++++++++ src/amd/vulkan/radv_meta_clear.c | 18 ++++++++++++++---- src/amd/vulkan/radv_private.h | 1 + 4 files changed, 62 insertions(+), 6 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 402c948..aa7fe35 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -3249,6 +3249,18 @@ radv_initialise_ds_surface(struct radv_device *device, if (iview->image->surface.htile_size && !level) { ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1); + if (iview->image->tc_compatible_htile) { + unsigned max_zplanes = 4; + + if (iview->vk_format == VK_FORMAT_D16_UNORM && + iview->image->info.samples > 1) + max_zplanes = 2; + + ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1) | + S_028038_ITERATE_FLUSH(1); + ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1); + } + if (!iview->image->surface.has_stencil) /* Use all of the htile_buffer for depth if there's no stencil. */ ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1); @@ -3268,7 +3280,7 @@ radv_initialise_ds_surface(struct radv_device *device, z_offs += iview->image->surface.u.legacy.level[level].offset; s_offs += iview->image->surface.u.legacy.stencil_level[level].offset; - ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1); + ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!iview->image->tc_compatible_htile); ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1); ds->db_stencil_info = S_028044_FORMAT(stencil_format); @@ -3312,7 +3324,8 @@ radv_initialise_ds_surface(struct radv_device *device, if (iview->image->surface.htile_size && !level) { ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1); - if (!iview->image->surface.has_stencil) + if (!iview->image->surface.has_stencil && + !iview->image->tc_compatible_htile) /* Use all of the htile_buffer for depth if there's no stencil. */ ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1); @@ -3320,6 +3333,17 @@ radv_initialise_ds_surface(struct radv_device *device, iview->image->htile_offset; ds->db_htile_data_base = va >> 8; ds->db_htile_surface = S_028ABC_FULL_CACHE(1); + + if (iview->image->tc_compatible_htile) { + ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1); + + if (iview->image->info.samples <= 1) + ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5); + else if (iview->image->info.samples <= 4) + ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3); + else + ds->db_z_info|= S_028040_DECOMPRESS_ON_N_ZPLANES(2); + } } } diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index 35c58f4..bf30281 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -109,6 +109,15 @@ radv_init_surface(struct radv_device *device, if (is_depth) { surface->flags |= RADEON_SURF_ZBUFFER; + if (!(pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) && + !(pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) && + pCreateInfo->tiling != VK_IMAGE_TILING_LINEAR && + pCreateInfo->mipLevels <= 1 && + device->physical_device->rad_info.chip_class >= VI && + (pCreateInfo->format == VK_FORMAT_D32_SFLOAT || + (device->physical_device->rad_info.chip_class >= GFX9 && + pCreateInfo->format == VK_FORMAT_D16_UNORM))) + surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE; } if (is_stencil) @@ -255,6 +264,11 @@ si_set_mutable_tex_desc_fields(struct radv_device *device, meta_va = gpu_address + image->dcc_offset; if (chip_class <= VI) meta_va += base_level_info->dcc_offset; + } else if(image->tc_compatible_htile && image->surface.htile_size) { + meta_va = gpu_address + image->htile_offset; + } + + if (meta_va) { state[6] |= S_008F28_COMPRESSION_EN(1); state[7] = meta_va >> 8; state[7] |= image->surface.tile_swizzle; @@ -898,6 +912,7 @@ radv_image_create(VkDevice _device, if (radv_image_can_enable_htile(image) && !(device->debug_flags & RADV_DEBUG_NO_HIZ)) { radv_image_alloc_htile(image); + image->tc_compatible_htile = image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE; } else { image->surface.htile_size = 0; } @@ -1040,6 +1055,9 @@ bool radv_layout_has_htile(const struct radv_image *image, VkImageLayout layout, unsigned queue_mask) { + if (image->surface.htile_size && image->tc_compatible_htile) + return layout != VK_IMAGE_LAYOUT_GENERAL; + return image->surface.htile_size && (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL || layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) && @@ -1050,6 +1068,9 @@ bool radv_layout_is_htile_compressed(const struct radv_image *image, VkImageLayout layout, unsigned queue_mask) { + if (image->surface.htile_size && image->tc_compatible_htile) + return layout != VK_IMAGE_LAYOUT_GENERAL; + return image->surface.htile_size && (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL || layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) && diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c index 1133024..fd2caf3 100644 --- a/src/amd/vulkan/radv_meta_clear.c +++ b/src/amd/vulkan/radv_meta_clear.c @@ -543,8 +543,10 @@ create_depthstencil_pipeline(struct radv_device *device, static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview, + VkImageAspectFlags aspects, VkImageLayout layout, - const VkClearRect *clear_rect) + const VkClearRect *clear_rect, + VkClearDepthStencilValue clear_value) { uint32_t queue_mask = radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, @@ -553,7 +555,13 @@ static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer, clear_rect->rect.extent.width != iview->extent.width || clear_rect->rect.extent.height != iview->extent.height) return false; - if (iview->base_mip == 0 && + if (iview->image->tc_compatible_htile && + (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && clear_value.depth != 0.0 && + clear_value.depth != 1.0) || + ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && clear_value.stencil != 0))) + return false; + if (iview->image->surface.htile_size && + iview->base_mip == 0 && iview->base_layer == 0 && radv_layout_is_htile_compressed(iview->image, layout, queue_mask) && !radv_image_extent_compare(iview->image, &iview->extent)) @@ -571,7 +579,7 @@ pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer, const VkClearRect *clear_rect, VkClearDepthStencilValue clear_value) { - bool fast = depth_view_can_fast_clear(cmd_buffer, iview, layout, clear_rect); + bool fast = depth_view_can_fast_clear(cmd_buffer, iview, aspects, layout, clear_rect, clear_value); int index = DEPTH_CLEAR_SLOW; if (fast) { @@ -641,7 +649,9 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer, pipeline); } - if (depth_view_can_fast_clear(cmd_buffer, iview, subpass->depth_stencil_attachment.layout, clear_rect)) + if (depth_view_can_fast_clear(cmd_buffer, iview, aspects, + subpass->depth_stencil_attachment.layout, + clear_rect, clear_value)) radv_set_depth_clear_regs(cmd_buffer, iview->image, clear_value, aspects); radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) { diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 5cab407..c2d78a7 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1218,6 +1218,7 @@ struct radv_image { VkDeviceSize offset; uint32_t dcc_offset; uint32_t htile_offset; + bool tc_compatible_htile; struct radeon_surf surface; struct radv_fmask_info fmask; -- 2.7.4