From e490434479ac49c157b338a40ad9ca4c9f0a70f0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 2 Sep 2022 23:12:00 -0500 Subject: [PATCH] hasvk: Drop CCS_E support Oh, for the days of Broadwell and earlier where compression was called fast-clear. That was a simpler time. The birds sang in the trees, the oceans weren't brown from oil spills, and Intel surface compression was actually comprehendable by humans. To help the reviewer, keep the following in mind: 1. CCS_E is SKL+ 2. Implicit CCS is TGL+ 3. The AUX TT (AKA aux map) is TGL+ 4. HIZ+CCS, stencil CCS, and CCS for storage images are all TGL+ 4. CCS_D surfaces only ever get full resolves and MCS surfaces only ever get partial resolves Reviewed-by: Lionel Landwerlin Part-of: --- src/intel/vulkan_hasvk/anv_allocator.c | 57 +----- src/intel/vulkan_hasvk/anv_blorp.c | 19 -- src/intel/vulkan_hasvk/anv_device.c | 68 +------ src/intel/vulkan_hasvk/anv_formats.c | 12 -- src/intel/vulkan_hasvk/anv_image.c | 319 ++----------------------------- src/intel/vulkan_hasvk/anv_private.h | 88 --------- src/intel/vulkan_hasvk/genX_cmd_buffer.c | 96 +--------- 7 files changed, 22 insertions(+), 637 deletions(-) diff --git a/src/intel/vulkan_hasvk/anv_allocator.c b/src/intel/vulkan_hasvk/anv_allocator.c index 8c4ffbf..9e9b8cc 100644 --- a/src/intel/vulkan_hasvk/anv_allocator.c +++ b/src/intel/vulkan_hasvk/anv_allocator.c @@ -29,7 +29,6 @@ #include "anv_private.h" -#include "common/intel_aux_map.h" #include "util/anon_file.h" #include "util/futex.h" @@ -1611,7 +1610,7 @@ static void anv_bo_finish(struct anv_device *device, struct anv_bo *bo) { if (bo->offset != 0 && anv_bo_is_pinned(bo) && !bo->has_fixed_address) - anv_vma_free(device, bo->offset, bo->size + bo->_ccs_size); + anv_vma_free(device, bo->offset, bo->size); if (bo->map && !bo->from_host_ptr) anv_device_unmap_bo(device, bo, bo->map, bo->size); @@ -1631,21 +1630,11 @@ anv_bo_vma_alloc_or_close(struct anv_device *device, uint32_t align = 4096; - /* Gen12 CCS surface addresses need to be 64K aligned. */ - if (device->info->ver >= 12 && (alloc_flags & ANV_BO_ALLOC_IMPLICIT_CCS)) - align = 64 * 1024; - - /* For XeHP, lmem and smem cannot share a single PDE, which means they - * can't live in the same 2MiB aligned region. - */ - if (device->info->verx10 >= 125) - align = 2 * 1024 * 1024; - if (alloc_flags & ANV_BO_ALLOC_FIXED_ADDRESS) { bo->has_fixed_address = true; bo->offset = explicit_address; } else { - bo->offset = anv_vma_alloc(device, bo->size + bo->_ccs_size, + bo->offset = anv_vma_alloc(device, bo->size, align, alloc_flags, explicit_address); if (bo->offset == 0) { anv_bo_finish(device, bo); @@ -1665,9 +1654,6 @@ anv_device_alloc_bo(struct anv_device *device, uint64_t explicit_address, struct anv_bo **bo_out) { - if (!device->physical->has_implicit_ccs) - assert(!(alloc_flags & ANV_BO_ALLOC_IMPLICIT_CCS)); - const uint32_t bo_flags = anv_bo_alloc_flags_to_bo_flags(device, alloc_flags); assert(bo_flags == (bo_flags & ANV_BO_CACHE_SUPPORTED_FLAGS)); @@ -1675,18 +1661,7 @@ anv_device_alloc_bo(struct anv_device *device, /* The kernel is going to give us whole pages anyway */ size = align_u64(size, 4096); - uint64_t ccs_size = 0; - if (device->info->has_aux_map && (alloc_flags & ANV_BO_ALLOC_IMPLICIT_CCS)) { - /* Align the size up to the next multiple of 64K so we don't have any - * AUX-TT entries pointing from a 64K page to itself. - */ - size = align_u64(size, 64 * 1024); - - /* See anv_bo::_ccs_size */ - ccs_size = align_u64(DIV_ROUND_UP(size, INTEL_AUX_MAP_GFX12_CCS_SCALE), 4096); - } - - uint32_t gem_handle = anv_gem_create(device, size + ccs_size); + uint32_t gem_handle = anv_gem_create(device, size); if (gem_handle == 0) return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); @@ -1696,12 +1671,10 @@ anv_device_alloc_bo(struct anv_device *device, .refcount = 1, .offset = -1, .size = size, - ._ccs_size = ccs_size, .flags = bo_flags, .is_external = (alloc_flags & ANV_BO_ALLOC_EXTERNAL), .has_client_visible_address = (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) != 0, - .has_implicit_ccs = ccs_size > 0 || device->info->verx10 >= 125, }; if (alloc_flags & ANV_BO_ALLOC_MAPPED) { @@ -1743,14 +1716,6 @@ anv_device_alloc_bo(struct anv_device *device, assert(!new_bo.has_client_visible_address); } - if (new_bo._ccs_size > 0) { - assert(device->info->has_aux_map); - intel_aux_map_add_mapping(device->aux_map_ctx, - intel_canonical_address(new_bo.offset), - intel_canonical_address(new_bo.offset + new_bo.size), - new_bo.size, 0 /* format_bits */); - } - assert(new_bo.gem_handle); /* If we just got this gem_handle from anv_bo_init_new then we know no one @@ -1808,9 +1773,6 @@ anv_device_import_bo_from_host_ptr(struct anv_device *device, ANV_BO_ALLOC_SNOOPED | ANV_BO_ALLOC_FIXED_ADDRESS))); - assert(!(alloc_flags & ANV_BO_ALLOC_IMPLICIT_CCS) || - (device->physical->has_implicit_ccs && device->info->has_aux_map)); - struct anv_bo_cache *cache = &device->bo_cache; const uint32_t bo_flags = anv_bo_alloc_flags_to_bo_flags(device, alloc_flags); @@ -1898,9 +1860,6 @@ anv_device_import_bo(struct anv_device *device, ANV_BO_ALLOC_SNOOPED | ANV_BO_ALLOC_FIXED_ADDRESS))); - assert(!(alloc_flags & ANV_BO_ALLOC_IMPLICIT_CCS) || - (device->physical->has_implicit_ccs && device->info->has_aux_map)); - struct anv_bo_cache *cache = &device->bo_cache; const uint32_t bo_flags = anv_bo_alloc_flags_to_bo_flags(device, alloc_flags); @@ -1994,7 +1953,6 @@ anv_device_import_bo(struct anv_device *device, }; if (anv_bo_is_pinned(&new_bo)) { - assert(new_bo._ccs_size == 0); VkResult result = anv_bo_vma_alloc_or_close(device, &new_bo, alloc_flags, client_address); @@ -2114,15 +2072,6 @@ anv_device_release_bo(struct anv_device *device, } assert(bo->refcount == 0); - if (bo->_ccs_size > 0) { - assert(device->physical->has_implicit_ccs); - assert(device->info->has_aux_map); - assert(bo->has_implicit_ccs); - intel_aux_map_unmap_range(device->aux_map_ctx, - intel_canonical_address(bo->offset), - bo->size); - } - /* Memset the BO just in case. The refcount being zero should be enough to * prevent someone from assuming the data is valid but it's safer to just * stomp to zero just in case. We explicitly do this *before* we actually diff --git a/src/intel/vulkan_hasvk/anv_blorp.c b/src/intel/vulkan_hasvk/anv_blorp.c index 73d310b..209e332 100644 --- a/src/intel/vulkan_hasvk/anv_blorp.c +++ b/src/intel/vulkan_hasvk/anv_blorp.c @@ -1737,25 +1737,6 @@ anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer, ANV_PIPE_DEPTH_STALL_BIT, "before clear hiz"); - if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && - depth.aux_usage == ISL_AUX_USAGE_HIZ_CCS_WT) { - /* From Bspec 47010 (Depth Buffer Clear): - * - * Since the fast clear cycles to CCS are not cached in TileCache, - * any previous depth buffer writes to overlapping pixels must be - * flushed out of TileCache before a succeeding Depth Buffer Clear. - * This restriction only applies to Depth Buffer with write-thru - * enabled, since fast clears to CCS only occur for write-thru mode. - * - * There may have been a write to this depth buffer. Flush it from the - * tile cache just in case. - */ - anv_add_pending_pipe_bits(cmd_buffer, - ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | - ANV_PIPE_TILE_CACHE_FLUSH_BIT, - "before clear hiz_ccs_wt"); - } - blorp_hiz_clear_depth_stencil(&batch, &depth, &stencil, level, base_layer, layer_count, area.offset.x, area.offset.y, diff --git a/src/intel/vulkan_hasvk/anv_device.c b/src/intel/vulkan_hasvk/anv_device.c index 489b907..3e24468 100644 --- a/src/intel/vulkan_hasvk/anv_device.c +++ b/src/intel/vulkan_hasvk/anv_device.c @@ -54,7 +54,6 @@ #include "vk_util.h" #include "vk_deferred_operation.h" #include "vk_drm_syncobj.h" -#include "common/intel_aux_map.h" #include "common/intel_defines.h" #include "common/intel_uuid.h" #include "perf/intel_perf.h" @@ -869,9 +868,6 @@ anv_physical_device_try_create(struct vk_instance *vk_instance, */ device->has_bindless_samplers = device->info.ver >= 8; - device->has_implicit_ccs = device->info.has_aux_map || - device->info.verx10 >= 125; - /* Check if we can read the GPU timestamp register from the CPU */ uint64_t u64_ignore; device->has_reg_timestamp = intel_gem_read_render_timestamp(fd, &u64_ignore); @@ -2735,47 +2731,6 @@ decode_get_bo(void *v_batch, bool ppgtt, uint64_t address) return (struct intel_batch_decode_bo) { }; } -struct intel_aux_map_buffer { - struct intel_buffer base; - struct anv_state state; -}; - -static struct intel_buffer * -intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size) -{ - struct intel_aux_map_buffer *buf = malloc(sizeof(struct intel_aux_map_buffer)); - if (!buf) - return NULL; - - struct anv_device *device = (struct anv_device*)driver_ctx; - assert(device->physical->supports_48bit_addresses && - device->physical->use_softpin); - - struct anv_state_pool *pool = &device->dynamic_state_pool; - buf->state = anv_state_pool_alloc(pool, size, size); - - buf->base.gpu = pool->block_pool.bo->offset + buf->state.offset; - buf->base.gpu_end = buf->base.gpu + buf->state.alloc_size; - buf->base.map = buf->state.map; - buf->base.driver_bo = &buf->state; - return &buf->base; -} - -static void -intel_aux_map_buffer_free(void *driver_ctx, struct intel_buffer *buffer) -{ - struct intel_aux_map_buffer *buf = (struct intel_aux_map_buffer*)buffer; - struct anv_device *device = (struct anv_device*)driver_ctx; - struct anv_state_pool *pool = &device->dynamic_state_pool; - anv_state_pool_free(pool, buf->state); - free(buf); -} - -static struct intel_mapped_pinned_buffer_alloc aux_map_allocator = { - .alloc = intel_aux_map_buffer_alloc, - .free = intel_aux_map_buffer_free, -}; - static VkResult anv_device_check_status(struct vk_device *vk_device); static VkResult @@ -3114,20 +3069,13 @@ VkResult anv_CreateDevice( if (result != VK_SUCCESS) goto fail_surface_state_pool; - if (device->info->has_aux_map) { - device->aux_map_ctx = intel_aux_map_init(device, &aux_map_allocator, - &physical_device->info); - if (!device->aux_map_ctx) - goto fail_binding_table_pool; - } - result = anv_device_alloc_bo(device, "workaround", 4096, ANV_BO_ALLOC_CAPTURE | ANV_BO_ALLOC_MAPPED, 0 /* explicit_address */, &device->workaround_bo); if (result != VK_SUCCESS) - goto fail_surface_aux_map_pool; + goto fail_binding_table_pool; device->workaround_address = (struct anv_address) { .bo = device->workaround_bo, @@ -3204,11 +3152,6 @@ VkResult anv_CreateDevice( anv_device_release_bo(device, device->trivial_batch_bo); fail_workaround_bo: anv_device_release_bo(device, device->workaround_bo); - fail_surface_aux_map_pool: - if (device->info->has_aux_map) { - intel_aux_map_finish(device->aux_map_ctx); - device->aux_map_ctx = NULL; - } fail_binding_table_pool: if (!anv_use_relocations(physical_device)) anv_state_pool_finish(&device->binding_table_pool); @@ -3282,11 +3225,6 @@ void anv_DestroyDevice( anv_device_release_bo(device, device->workaround_bo); anv_device_release_bo(device, device->trivial_batch_bo); - if (device->info->has_aux_map) { - intel_aux_map_finish(device->aux_map_ctx); - device->aux_map_ctx = NULL; - } - if (!anv_use_relocations(device->physical)) anv_state_pool_finish(&device->binding_table_pool); anv_state_pool_finish(&device->surface_state_pool); @@ -3532,10 +3470,6 @@ VkResult anv_AllocateMemory( } } - /* By default, we want all VkDeviceMemory objects to support CCS */ - if (device->physical->has_implicit_ccs && device->info->has_aux_map) - alloc_flags |= ANV_BO_ALLOC_IMPLICIT_CCS; - if (vk_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT) alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS; diff --git a/src/intel/vulkan_hasvk/anv_formats.c b/src/intel/vulkan_hasvk/anv_formats.c index 872fe62..2dba195 100644 --- a/src/intel/vulkan_hasvk/anv_formats.c +++ b/src/intel/vulkan_hasvk/anv_formats.c @@ -795,11 +795,6 @@ anv_get_image_format_features2(const struct intel_device_info *devinfo, } } - if (isl_mod_info->aux_usage == ISL_AUX_USAGE_CCS_E && - !isl_format_supports_ccs_e(devinfo, plane_format.isl_format)) { - return 0; - } - if (isl_mod_info->aux_usage != ISL_AUX_USAGE_NONE) { /* Rejection DISJOINT for consistency with the GL driver. In * eglCreateImage, we require that the dma_buf for the primary surface @@ -1120,13 +1115,6 @@ anv_get_image_format_properties( maxArraySize = 1; maxMipLevels = 1; sampleCounts = VK_SAMPLE_COUNT_1_BIT; - - if (isl_mod_info->aux_usage == ISL_AUX_USAGE_CCS_E && - !anv_formats_ccs_e_compatible(devinfo, info->flags, info->format, - info->tiling, image_usage, - format_list_info)) { - goto unsupported; - } } /* Our hardware doesn't support 1D compressed textures. diff --git a/src/intel/vulkan_hasvk/anv_image.c b/src/intel/vulkan_hasvk/anv_image.c index f10f464..a065330 100644 --- a/src/intel/vulkan_hasvk/anv_image.c +++ b/src/intel/vulkan_hasvk/anv_image.c @@ -381,17 +381,6 @@ can_fast_clear_with_non_zero_color(const struct intel_device_info *devinfo, if (!isl_aux_usage_has_fast_clears(image->planes[plane].aux_usage)) return false; - /* On TGL, if a block of fragment shader outputs match the surface's clear - * color, the HW may convert them to fast-clears (see HSD 14010672564). - * This can lead to rendering corruptions if not handled properly. We - * restrict the clear color to zero to avoid issues that can occur with: - * - Texture view rendering (including blorp_copy calls) - * - Images with multiple levels or array layers - */ - if (devinfo->ver >= 12 && - image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E) - return false; - /* Non mutable image, we can fast clear with any color supported by HW. */ if (!(image->vk.create_flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT)) @@ -439,46 +428,6 @@ can_fast_clear_with_non_zero_color(const struct intel_device_info *devinfo, return true; } -/** - * Return true if the storage image could be used with atomics. - * - * If the image was created with an explicit format, we check it for typed - * atomic support. If MUTABLE_FORMAT_BIT is set, then we check the optional - * format list, seeing if /any/ of the formats support typed atomics. If no - * list is supplied, we fall back to using the bpb, as the application could - * make an image view with a format that does use atomics. - */ -static bool -storage_image_format_supports_atomic(const struct intel_device_info *devinfo, - VkImageCreateFlags create_flags, - enum isl_format format, - VkImageTiling vk_tiling, - const VkImageFormatListCreateInfo *fmt_list) -{ - if (isl_format_supports_typed_atomics(devinfo, format)) - return true; - - if (!(create_flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT)) - return false; - - if (fmt_list) { - for (uint32_t i = 0; i < fmt_list->viewFormatCount; i++) { - enum isl_format view_format = - anv_get_isl_format(devinfo, fmt_list->pViewFormats[i], - VK_IMAGE_ASPECT_COLOR_BIT, vk_tiling); - - if (isl_format_supports_typed_atomics(devinfo, view_format)) - return true; - } - - return false; - } - - /* No explicit format list. Any 16/32/64bpp format could be used with atomics. */ - unsigned bpb = isl_format_get_layout(format)->bpb; - return bpb == 16 || bpb == 32 || bpb == 64; -} - static enum isl_format anv_get_isl_format_with_usage(const struct intel_device_info *devinfo, VkFormat vk_format, @@ -510,82 +459,6 @@ anv_get_isl_format_with_usage(const struct intel_device_info *devinfo, return format.isl_format; } -static bool -formats_ccs_e_compatible(const struct intel_device_info *devinfo, - VkImageCreateFlags create_flags, - enum isl_format format, VkImageTiling vk_tiling, - VkImageUsageFlags vk_usage, - const VkImageFormatListCreateInfo *fmt_list) -{ - if (!isl_format_supports_ccs_e(devinfo, format)) - return false; - - /* For images created without MUTABLE_FORMAT_BIT set, we know that they will - * always be used with the original format. In particular, they will always - * be used with a format that supports color compression. - */ - if (!(create_flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT)) - return true; - - if (!fmt_list || fmt_list->viewFormatCount == 0) - return false; - - for (uint32_t i = 0; i < fmt_list->viewFormatCount; i++) { - enum isl_format view_format = - anv_get_isl_format_with_usage(devinfo, fmt_list->pViewFormats[i], - VK_IMAGE_ASPECT_COLOR_BIT, vk_usage, - vk_tiling); - - if (!isl_formats_are_ccs_e_compatible(devinfo, format, view_format)) - return false; - } - - return true; -} - -bool -anv_formats_ccs_e_compatible(const struct intel_device_info *devinfo, - VkImageCreateFlags create_flags, - VkFormat vk_format, VkImageTiling vk_tiling, - VkImageUsageFlags vk_usage, - const VkImageFormatListCreateInfo *fmt_list) -{ - enum isl_format format = - anv_get_isl_format_with_usage(devinfo, vk_format, - VK_IMAGE_ASPECT_COLOR_BIT, - VK_IMAGE_USAGE_SAMPLED_BIT, vk_tiling); - - if (!formats_ccs_e_compatible(devinfo, create_flags, format, vk_tiling, - VK_IMAGE_USAGE_SAMPLED_BIT, fmt_list)) - return false; - - if (vk_usage & VK_IMAGE_USAGE_STORAGE_BIT) { - if (devinfo->verx10 < 125) - return false; - - enum isl_format lower_format = - anv_get_isl_format_with_usage(devinfo, vk_format, - VK_IMAGE_ASPECT_COLOR_BIT, - VK_IMAGE_USAGE_STORAGE_BIT, vk_tiling); - - if (!isl_formats_are_ccs_e_compatible(devinfo, format, lower_format)) - return false; - - if (!formats_ccs_e_compatible(devinfo, create_flags, format, vk_tiling, - VK_IMAGE_USAGE_STORAGE_BIT, fmt_list)) - return false; - - /* Disable compression when surface can be potentially used for atomic - * operation. - */ - if (storage_image_format_supports_atomic(devinfo, create_flags, format, - vk_tiling, fmt_list)) - return false; - } - - return true; -} - /** * For color images that have an auxiliary surface, request allocation for an * additional buffer that mainly stores fast-clear values. Use of this buffer @@ -657,16 +530,6 @@ add_aux_state_tracking_buffer(struct anv_device *device, /* Clear color and fast clear type */ unsigned state_size = clear_color_state_size + 4; - /* We only need to track compression on CCS_E surfaces. */ - if (image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E) { - if (image->vk.image_type == VK_IMAGE_TYPE_3D) { - for (uint32_t l = 0; l < image->vk.mip_levels; l++) - state_size += anv_minify(image->vk.extent.depth, l) * 4; - } else { - state_size += image->vk.mip_levels * image->vk.array_layers * 4; - } - } - enum anv_image_memory_binding binding = ANV_IMAGE_MEMORY_BINDING_PLANE_0 + plane; @@ -755,48 +618,6 @@ add_aux_surface_if_supported(struct anv_device *device, &image->planes[plane].aux_surface.isl); if (!ok) return VK_SUCCESS; - - if (!isl_surf_supports_ccs(&device->isl_dev, - &image->planes[plane].primary_surface.isl, - &image->planes[plane].aux_surface.isl)) { - image->planes[plane].aux_usage = ISL_AUX_USAGE_HIZ; - } else if (image->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) && - image->vk.samples == 1) { - /* If it's used as an input attachment or a texture and it's - * single-sampled (this is a requirement for HiZ+CCS write-through - * mode), use write-through mode so that we don't need to resolve - * before texturing. This will make depth testing a bit slower but - * texturing faster. - * - * TODO: This is a heuristic trade-off; we haven't tuned it at all. - */ - assert(device->info->ver >= 12); - image->planes[plane].aux_usage = ISL_AUX_USAGE_HIZ_CCS_WT; - } else { - assert(device->info->ver >= 12); - image->planes[plane].aux_usage = ISL_AUX_USAGE_HIZ_CCS; - } - - result = add_surface(device, image, &image->planes[plane].aux_surface, - ANV_IMAGE_MEMORY_BINDING_PLANE_0 + plane, - ANV_OFFSET_IMPLICIT); - if (result != VK_SUCCESS) - return result; - - if (image->planes[plane].aux_usage == ISL_AUX_USAGE_HIZ_CCS_WT) - return add_aux_state_tracking_buffer(device, image, plane); - } else if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) { - - if (INTEL_DEBUG(DEBUG_NO_CCS)) - return VK_SUCCESS; - - if (!isl_surf_supports_ccs(&device->isl_dev, - &image->planes[plane].primary_surface.isl, - NULL)) - return VK_SUCCESS; - - image->planes[plane].aux_usage = ISL_AUX_USAGE_STC_CCS; } else if ((aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) && image->vk.samples == 1) { if (image->n_planes != 1) { /* Multiplanar images seem to hit a sampler bug with CCS and R16G16 @@ -829,34 +650,18 @@ add_aux_surface_if_supported(struct anv_device *device, if (!ok) return VK_SUCCESS; - /* Choose aux usage */ - if (anv_formats_ccs_e_compatible(device->info, image->vk.create_flags, - image->vk.format, image->vk.tiling, - image->vk.usage, fmt_list)) { - image->planes[plane].aux_usage = ISL_AUX_USAGE_CCS_E; - } else if (device->info->ver >= 12) { - anv_perf_warn(VK_LOG_OBJS(&image->vk.base), - "The CCS_D aux mode is not yet handled on " - "Gfx12+. Not allocating a CCS buffer."); - image->planes[plane].aux_surface.isl.size_B = 0; - return VK_SUCCESS; - } else { - image->planes[plane].aux_usage = ISL_AUX_USAGE_CCS_D; - } + image->planes[plane].aux_usage = ISL_AUX_USAGE_CCS_D; - if (!device->physical->has_implicit_ccs) { - enum anv_image_memory_binding binding = - ANV_IMAGE_MEMORY_BINDING_PLANE_0 + plane; + enum anv_image_memory_binding binding = + ANV_IMAGE_MEMORY_BINDING_PLANE_0 + plane; - if (image->vk.drm_format_mod != DRM_FORMAT_MOD_INVALID && - !isl_drm_modifier_has_aux(image->vk.drm_format_mod)) - binding = ANV_IMAGE_MEMORY_BINDING_PRIVATE; + if (image->vk.drm_format_mod != DRM_FORMAT_MOD_INVALID) + binding = ANV_IMAGE_MEMORY_BINDING_PRIVATE; - result = add_surface(device, image, &image->planes[plane].aux_surface, - binding, offset); - if (result != VK_SUCCESS) - return result; - } + result = add_surface(device, image, &image->planes[plane].aux_surface, + binding, offset); + if (result != VK_SUCCESS) + return result; return add_aux_state_tracking_buffer(device, image, plane); } else if ((aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) && image->vk.samples > 1) { @@ -1955,38 +1760,6 @@ VkResult anv_BindImageMemory2( did_bind = true; } - - /* On platforms that use implicit CCS, if the plane's bo lacks implicit - * CCS then disable compression on the plane. - */ - for (int p = 0; p < image->n_planes; ++p) { - enum anv_image_memory_binding binding = - image->planes[p].primary_surface.memory_range.binding; - const struct anv_bo *bo = - image->bindings[binding].address.bo; - - if (!bo || bo->has_implicit_ccs) - continue; - - if (!device->physical->has_implicit_ccs) - continue; - - if (!isl_aux_usage_has_ccs(image->planes[p].aux_usage)) - continue; - - anv_perf_warn(VK_LOG_OBJS(&image->vk.base), - "BO lacks implicit CCS. Disabling the CCS aux usage."); - - if (image->planes[p].aux_surface.memory_range.size > 0) { - assert(image->planes[p].aux_usage == ISL_AUX_USAGE_HIZ_CCS || - image->planes[p].aux_usage == ISL_AUX_USAGE_HIZ_CCS_WT); - image->planes[p].aux_usage = ISL_AUX_USAGE_HIZ; - } else { - assert(image->planes[p].aux_usage == ISL_AUX_USAGE_CCS_E || - image->planes[p].aux_usage == ISL_AUX_USAGE_STC_CCS); - image->planes[p].aux_usage = ISL_AUX_USAGE_NONE; - } - } } return VK_SUCCESS; @@ -2205,14 +1978,6 @@ anv_layout_to_aux_state(const struct intel_device_info * const devinfo, } break; - case ISL_AUX_USAGE_HIZ_CCS: - aux_supported = false; - clear_supported = false; - break; - - case ISL_AUX_USAGE_HIZ_CCS_WT: - break; - case ISL_AUX_USAGE_CCS_D: aux_supported = false; clear_supported = false; @@ -2223,28 +1988,12 @@ anv_layout_to_aux_state(const struct intel_device_info * const devinfo, clear_supported = false; break; - case ISL_AUX_USAGE_CCS_E: - case ISL_AUX_USAGE_STC_CCS: - break; - default: unreachable("Unsupported aux usage"); } } switch (aux_usage) { - case ISL_AUX_USAGE_HIZ: - case ISL_AUX_USAGE_HIZ_CCS: - case ISL_AUX_USAGE_HIZ_CCS_WT: - if (aux_supported) { - assert(clear_supported); - return ISL_AUX_STATE_COMPRESSED_CLEAR; - } else if (read_only) { - return ISL_AUX_STATE_RESOLVED; - } else { - return ISL_AUX_STATE_AUX_INVALID; - } - case ISL_AUX_USAGE_CCS_D: /* We only support clear in exactly one state */ if (layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) { @@ -2255,14 +2004,6 @@ anv_layout_to_aux_state(const struct intel_device_info * const devinfo, return ISL_AUX_STATE_PASS_THROUGH; } - case ISL_AUX_USAGE_CCS_E: - if (aux_supported) { - assert(clear_supported); - return ISL_AUX_STATE_COMPRESSED_CLEAR; - } else { - return ISL_AUX_STATE_PASS_THROUGH; - } - case ISL_AUX_USAGE_MCS: assert(aux_supported); if (clear_supported) { @@ -2271,11 +2012,6 @@ anv_layout_to_aux_state(const struct intel_device_info * const devinfo, return ISL_AUX_STATE_COMPRESSED_NO_CLEAR; } - case ISL_AUX_USAGE_STC_CCS: - assert(aux_supported); - assert(!clear_supported); - return ISL_AUX_STATE_COMPRESSED_NO_CLEAR; - default: unreachable("Unsupported aux usage"); } @@ -2404,26 +2140,13 @@ anv_layout_to_fast_clear_type(const struct intel_device_info * const devinfo, * must get partially resolved before we leave the render pass. */ return ANV_FAST_CLEAR_ANY; - } else if (image->planes[plane].aux_usage == ISL_AUX_USAGE_MCS || - image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E) { - if (devinfo->ver >= 11) { - /* The image might not support non zero fast clears when mutable. */ - if (!image->planes[plane].can_non_zero_fast_clear) - return ANV_FAST_CLEAR_DEFAULT_VALUE; - - /* On ICL and later, the sampler hardware uses a copy of the clear - * value that is encoded as a pixel value. Therefore, we can use - * any clear color we like for sampling. - */ - return ANV_FAST_CLEAR_ANY; - } else { - /* If the image has MCS or CCS_E enabled all the time then we can - * use fast-clear as long as the clear color is the default value - * of zero since this is the default value we program into every - * surface state used for texturing. - */ - return ANV_FAST_CLEAR_DEFAULT_VALUE; - } + } else if (image->planes[plane].aux_usage == ISL_AUX_USAGE_MCS) { + /* If the image has MCS or CCS_E enabled all the time then we can + * use fast-clear as long as the clear color is the default value + * of zero since this is the default value we program into every + * surface state used for texturing. + */ + return ANV_FAST_CLEAR_DEFAULT_VALUE; } else { return ANV_FAST_CLEAR_NONE; } @@ -2516,8 +2239,6 @@ anv_image_fill_surface_state(struct anv_device *device, * value (SKL+), define the clear value to the optimal constant. */ union isl_color_value default_clear_color = { .u32 = { 0, } }; - if (device->info->ver >= 9 && aspect == VK_IMAGE_ASPECT_DEPTH_BIT) - default_clear_color.f32[0] = ANV_HZ_FC_VAL; if (!clear_color) clear_color = &default_clear_color; @@ -2552,13 +2273,6 @@ anv_image_fill_surface_state(struct anv_device *device, */ enum isl_format lower_format = isl_lower_storage_image_format(device->info, view.format); - if (aux_usage != ISL_AUX_USAGE_NONE) { - assert(device->info->verx10 >= 125); - assert(aux_usage == ISL_AUX_USAGE_CCS_E); - assert(isl_formats_are_ccs_e_compatible(device->info, - view.format, - lower_format)); - } /* If we lower the format, we should ensure either they both match in * bits per channel or that there is no swizzle, because we can't use @@ -2805,7 +2519,6 @@ anv_CreateImageView(VkDevice _device, general_aux_usage, NULL, ANV_IMAGE_VIEW_STATE_STORAGE_LOWERED, &iview->planes[vplane].lowered_storage_surface_state, - device->info->ver >= 9 ? NULL : &iview->planes[vplane].lowered_storage_image_param); } else { /* In this case, we support the format but, because there's no diff --git a/src/intel/vulkan_hasvk/anv_private.h b/src/intel/vulkan_hasvk/anv_private.h index a014b31..9b1b277 100644 --- a/src/intel/vulkan_hasvk/anv_private.h +++ b/src/intel/vulkan_hasvk/anv_private.h @@ -100,7 +100,6 @@ struct anv_buffer_view; struct anv_image_view; struct anv_instance; -struct intel_aux_map_context; struct intel_perf_config; struct intel_perf_counter_pass; struct intel_perf_query_result; @@ -474,30 +473,6 @@ struct anv_bo { */ void *map; - /** Size of the implicit CCS range at the end of the buffer - * - * On Gfx12, CCS data is always a direct 1/256 scale-down. A single 64K - * page of main surface data maps to a 256B chunk of CCS data and that - * mapping is provided on TGL-LP by the AUX table which maps virtual memory - * addresses in the main surface to virtual memory addresses for CCS data. - * - * Because we can't change these maps around easily and because Vulkan - * allows two VkImages to be bound to overlapping memory regions (as long - * as the app is careful), it's not feasible to make this mapping part of - * the image. (On Gfx11 and earlier, the mapping was provided via - * RENDER_SURFACE_STATE so each image had its own main -> CCS mapping.) - * Instead, we attach the CCS data directly to the buffer object and setup - * the AUX table mapping at BO creation time. - * - * This field is for internal tracking use by the BO allocator only and - * should not be touched by other parts of the code. If something wants to - * know if a BO has implicit CCS data, it should instead look at the - * has_implicit_ccs boolean below. - * - * This data is not included in maps of this buffer. - */ - uint32_t _ccs_size; - /** Flags to pass to the kernel through drm_i915_exec_object2::flags */ uint32_t flags; @@ -521,9 +496,6 @@ struct anv_bo { /** See also ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS */ bool has_client_visible_address:1; - - /** True if this BO has implicit CCS data attached to it */ - bool has_implicit_ccs:1; }; static inline struct anv_bo * @@ -985,13 +957,6 @@ struct anv_physical_device { */ bool has_reg_timestamp; - /** True if this device has implicit AUX - * - * If true, CCS is handled as an implicit attachment to the BO rather than - * as an explicitly bound surface. - */ - bool has_implicit_ccs; - bool always_flush_cache; struct { @@ -1180,8 +1145,6 @@ struct anv_device { int perf_fd; /* -1 if no opened */ uint64_t perf_metric; /* 0 if unset */ - struct intel_aux_map_context *aux_map_ctx; - const struct intel_l3_config *l3_config; struct intel_debug_block_frame *debug_frame_desc; @@ -1289,9 +1252,6 @@ enum anv_bo_alloc_flags { /** Has an address which is visible to the client */ ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS = (1 << 8), - - /** This buffer has implicit CCS data attached to it */ - ANV_BO_ALLOC_IMPLICIT_CCS = (1 << 9), }; VkResult anv_device_alloc_bo(struct anv_device *device, @@ -2578,7 +2538,6 @@ struct anv_cmd_state { /* PIPELINE_SELECT.PipelineSelection */ uint32_t current_pipeline; const struct intel_l3_config * current_l3_config; - uint32_t last_aux_map_state; struct anv_cmd_graphics_state gfx; struct anv_cmd_compute_state compute; @@ -3204,12 +3163,6 @@ anv_get_isl_format(const struct intel_device_info *devinfo, VkFormat vk_format, return anv_get_format_aspect(devinfo, vk_format, aspect, tiling).isl_format; } -bool anv_formats_ccs_e_compatible(const struct intel_device_info *devinfo, - VkImageCreateFlags create_flags, - VkFormat vk_format, VkImageTiling vk_tiling, - VkImageUsageFlags vk_usage, - const VkImageFormatListCreateInfo *fmt_list); - extern VkFormat vk_format_from_android(unsigned android_format, unsigned android_usage); @@ -3503,38 +3456,6 @@ anv_image_get_fast_clear_type_addr(const struct anv_device *device, return anv_address_add(addr, clear_color_state_size); } -static inline struct anv_address -anv_image_get_compression_state_addr(const struct anv_device *device, - const struct anv_image *image, - VkImageAspectFlagBits aspect, - uint32_t level, uint32_t array_layer) -{ - assert(level < anv_image_aux_levels(image, aspect)); - assert(array_layer < anv_image_aux_layers(image, aspect, level)); - UNUSED uint32_t plane = anv_image_aspect_to_plane(image, aspect); - assert(image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E); - - /* Relative to start of the plane's fast clear memory range */ - uint32_t offset; - - offset = 4; /* Go past the fast clear type */ - - if (image->vk.image_type == VK_IMAGE_TYPE_3D) { - for (uint32_t l = 0; l < level; l++) - offset += anv_minify(image->vk.extent.depth, l) * 4; - } else { - offset += level * image->vk.array_layers * 4; - } - - offset += array_layer * 4; - - assert(offset < image->planes[plane].fast_clear_memory_range.size); - - return anv_address_add( - anv_image_get_fast_clear_type_addr(device, image, aspect), - offset); -} - /* Returns true if a HiZ-enabled depth buffer can be sampled from. */ static inline bool anv_can_sample_with_hiz(const struct intel_device_info * const devinfo, @@ -3590,15 +3511,6 @@ anv_can_sample_mcs_with_clear(const struct intel_device_info * const devinfo, return true; } -static inline bool -anv_image_plane_uses_aux_map(const struct anv_device *device, - const struct anv_image *image, - uint32_t plane) -{ - return device->info->has_aux_map && - isl_aux_usage_has_ccs(image->planes[plane].aux_usage); -} - void anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, diff --git a/src/intel/vulkan_hasvk/genX_cmd_buffer.c b/src/intel/vulkan_hasvk/genX_cmd_buffer.c index 2288934..1c2d495 100644 --- a/src/intel/vulkan_hasvk/genX_cmd_buffer.c +++ b/src/intel/vulkan_hasvk/genX_cmd_buffer.c @@ -31,7 +31,6 @@ #include "vk_util.h" #include "util/fast_idiv_by_const.h" -#include "common/intel_aux_map.h" #include "common/intel_l3_config.h" #include "genxml/gen_macros.h" #include "genxml/genX_pack.h" @@ -564,31 +563,6 @@ transition_stencil_buffer(struct anv_cmd_buffer *cmd_buffer, #define MI_PREDICATE_RESULT 0x2418 static void -set_image_compressed_bit(struct anv_cmd_buffer *cmd_buffer, - const struct anv_image *image, - VkImageAspectFlagBits aspect, - uint32_t level, - uint32_t base_layer, uint32_t layer_count, - bool compressed) -{ - const uint32_t plane = anv_image_aspect_to_plane(image, aspect); - - /* We only have compression tracking for CCS_E */ - if (image->planes[plane].aux_usage != ISL_AUX_USAGE_CCS_E) - return; - - for (uint32_t a = 0; a < layer_count; a++) { - uint32_t layer = base_layer + a; - anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) { - sdi.Address = anv_image_get_compression_state_addr(cmd_buffer->device, - image, aspect, - level, layer); - sdi.ImmediateData = compressed ? UINT32_MAX : 0; - } - } -} - -static void set_image_fast_clear_state(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, VkImageAspectFlagBits aspect, @@ -599,12 +573,6 @@ set_image_fast_clear_state(struct anv_cmd_buffer *cmd_buffer, image, aspect); sdi.ImmediateData = fast_clear; } - - /* Whenever we have fast-clear, we consider that slice to be compressed. - * This makes building predicates much easier. - */ - if (fast_clear != ANV_FAST_CLEAR_NONE) - set_image_compressed_bit(cmd_buffer, image, aspect, 0, 0, 1, true); } /* This is only really practical on haswell and above because it requires @@ -626,34 +594,8 @@ anv_cmd_compute_resolve_predicate(struct anv_cmd_buffer *cmd_buffer, mi_mem32(anv_image_get_fast_clear_type_addr(cmd_buffer->device, image, aspect)); - if (resolve_op == ISL_AUX_OP_FULL_RESOLVE) { - /* In this case, we're doing a full resolve which means we want the - * resolve to happen if any compression (including fast-clears) is - * present. - * - * In order to simplify the logic a bit, we make the assumption that, - * if the first slice has been fast-cleared, it is also marked as - * compressed. See also set_image_fast_clear_state. - */ - const struct mi_value compression_state = - mi_mem32(anv_image_get_compression_state_addr(cmd_buffer->device, - image, aspect, - level, array_layer)); - mi_store(&b, mi_reg64(MI_PREDICATE_SRC0), compression_state); - mi_store(&b, compression_state, mi_imm(0)); - - if (level == 0 && array_layer == 0) { - /* If the predicate is true, we want to write 0 to the fast clear type - * and, if it's false, leave it alone. We can do this by writing - * - * clear_type = clear_type & ~predicate; - */ - struct mi_value new_fast_clear_type = - mi_iand(&b, fast_clear_type, - mi_inot(&b, mi_reg64(MI_PREDICATE_SRC0))); - mi_store(&b, fast_clear_type, new_fast_clear_type); - } - } else if (level == 0 && array_layer == 0) { + assert(resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE); + if (level == 0 && array_layer == 0) { /* In this case, we are doing a partial resolve to get rid of fast-clear * colors. We don't care about the compression state but we do care * about how much fast clear is allowed by the final layout. @@ -797,18 +739,6 @@ genX(cmd_buffer_mark_image_written)(struct anv_cmd_buffer *cmd_buffer, { /* The aspect must be exactly one of the image aspects. */ assert(util_bitcount(aspect) == 1 && (aspect & image->vk.aspects)); - - /* The only compression types with more than just fast-clears are MCS, - * CCS_E, and HiZ. With HiZ we just trust the layout and don't actually - * track the current fast-clear and compression state. This leaves us - * with just MCS and CCS_E. - */ - if (aux_usage != ISL_AUX_USAGE_CCS_E && - aux_usage != ISL_AUX_USAGE_MCS) - return; - - set_image_compressed_bit(cmd_buffer, image, aspect, - level, base_layer, layer_count, true); } static void @@ -1076,8 +1006,6 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, } } - assert(!(device->physical->has_implicit_ccs && devinfo->has_aux_map)); - if (must_init_fast_clear_state) { if (base_level == 0 && base_layer == 0) init_fast_clear_color(cmd_buffer, image, aspect); @@ -1145,12 +1073,6 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, ISL_SWIZZLE_IDENTITY, aspect, level, base_layer, level_layer_count, ISL_AUX_OP_AMBIGUATE, NULL, false); - - if (image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E) { - set_image_compressed_bit(cmd_buffer, image, aspect, - level, base_layer, level_layer_count, - false); - } } } else { if (image->vk.samples == 4 || image->vk.samples == 16) { @@ -1220,10 +1142,6 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, if (final_fast_clear < initial_fast_clear) resolve_op = ISL_AUX_OP_PARTIAL_RESOLVE; - if (initial_aux_usage == ISL_AUX_USAGE_CCS_E && - final_aux_usage != ISL_AUX_USAGE_CCS_E) - resolve_op = ISL_AUX_OP_FULL_RESOLVE; - if (resolve_op == ISL_AUX_OP_NONE) return; @@ -1412,16 +1330,6 @@ genX(BeginCommandBuffer)( ANV_PIPE_VF_CACHE_INVALIDATE_BIT, "new cmd buffer"); - /* Re-emit the aux table register in every command buffer. This way we're - * ensured that we have the table even if this command buffer doesn't - * initialize any images. - */ - if (cmd_buffer->device->info->has_aux_map) { - anv_add_pending_pipe_bits(cmd_buffer, - ANV_PIPE_AUX_TABLE_INVALIDATE_BIT, - "new cmd buffer with aux-tt"); - } - /* We send an "Indirect State Pointers Disable" packet at * EndCommandBuffer, so all push constant packets are ignored during a * context restore. Documentation says after that command, we need to -- 2.7.4