2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 #include "ac_drm_fourcc.h"
29 #include "util/u_debug.h"
30 #include "util/u_atomic.h"
31 #include "vulkan/util/vk_format.h"
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_radeon_winsys.h"
36 #include "vk_format.h"
37 #include "vk_render_pass.h"
40 #include "gfx10_format_table.h"
43 radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateInfo,
46 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
47 assert(pCreateInfo->samples <= 1);
48 return RADEON_SURF_MODE_LINEAR_ALIGNED;
51 /* MSAA resources must be 2D tiled. */
52 if (pCreateInfo->samples > 1)
53 return RADEON_SURF_MODE_2D;
55 if (!vk_format_is_compressed(format) && !vk_format_is_depth_or_stencil(format) &&
56 device->physical_device->rad_info.gfx_level <= GFX8) {
57 /* this causes hangs in some VK CTS tests on GFX9. */
58 /* Textures with a very small height are recommended to be linear. */
59 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
60 /* Only very thin and long 2D textures should benefit from
62 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
63 return RADEON_SURF_MODE_LINEAR_ALIGNED;
66 return RADEON_SURF_MODE_2D;
70 radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo,
73 /* TC-compat HTILE is only available for GFX8+. */
74 if (device->physical_device->rad_info.gfx_level < GFX8)
77 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
80 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
83 /* Do not enable TC-compatible HTILE if the image isn't readable by a
84 * shader because no texture fetches will happen.
86 if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
87 VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
90 if (device->physical_device->rad_info.gfx_level < GFX9) {
91 /* TC-compat HTILE for MSAA depth/stencil images is broken
92 * on GFX8 because the tiling doesn't match.
94 if (pCreateInfo->samples >= 2 && format == VK_FORMAT_D32_SFLOAT_S8_UINT)
97 /* GFX9+ supports compression for both 32-bit and 16-bit depth
98 * surfaces, while GFX8 only supports 32-bit natively. Though,
99 * the driver allows TC-compat HTILE for 16-bit depth surfaces
100 * with no Z planes compression.
102 if (format != VK_FORMAT_D32_SFLOAT_S8_UINT && format != VK_FORMAT_D32_SFLOAT &&
103 format != VK_FORMAT_D16_UNORM)
111 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
113 if (info->bo_metadata) {
114 if (device->physical_device->rad_info.gfx_level >= GFX9)
115 return info->bo_metadata->u.gfx9.scanout;
117 return info->bo_metadata->u.legacy.scanout;
120 return info->scanout;
124 radv_image_use_fast_clear_for_image_early(const struct radv_device *device,
125 const struct radv_image *image)
127 if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
130 if (image->info.samples <= 1 && image->info.width * image->info.height <= 512 * 512) {
131 /* Do not enable CMASK or DCC for small surfaces where the cost
132 * of the eliminate pass can be higher than the benefit of fast
133 * clear. RadeonSI does this, but the image threshold is
139 return !!(image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
143 radv_image_use_fast_clear_for_image(const struct radv_device *device,
144 const struct radv_image *image)
146 if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
149 return radv_image_use_fast_clear_for_image_early(device, image) &&
151 /* Enable DCC for concurrent images if stores are
152 * supported because that means we can keep DCC compressed on
153 * all layouts/queues.
155 radv_image_use_dcc_image_stores(device, image));
159 radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext,
160 VkFormat format, VkImageCreateFlags flags, bool *sign_reinterpret)
164 if (!radv_is_colorbuffer_format_supported(pdev, format, &blendable))
167 if (sign_reinterpret != NULL)
168 *sign_reinterpret = false;
170 if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
171 const struct VkImageFormatListCreateInfo *format_list =
172 (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(
173 pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
175 /* We have to ignore the existence of the list if viewFormatCount = 0 */
176 if (format_list && format_list->viewFormatCount) {
177 /* compatibility is transitive, so we only need to check
178 * one format with everything else. */
179 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
180 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
183 if (!radv_dcc_formats_compatible(pdev->rad_info.gfx_level, format,
184 format_list->pViewFormats[i], sign_reinterpret))
196 radv_format_is_atomic_allowed(struct radv_device *device, VkFormat format)
198 if (format == VK_FORMAT_R32_SFLOAT && !device->image_float32_atomics)
201 return radv_is_atomic_format_supported(format);
205 radv_formats_is_atomic_allowed(struct radv_device *device, const void *pNext, VkFormat format,
206 VkImageCreateFlags flags)
208 if (radv_format_is_atomic_allowed(device, format))
211 if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
212 const struct VkImageFormatListCreateInfo *format_list =
213 (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(
214 pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
216 /* We have to ignore the existence of the list if viewFormatCount = 0 */
217 if (format_list && format_list->viewFormatCount) {
218 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
219 if (radv_format_is_atomic_allowed(device, format_list->pViewFormats[i]))
229 radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *image,
230 const VkImageCreateInfo *pCreateInfo, VkFormat format,
231 bool *sign_reinterpret)
233 /* DCC (Delta Color Compression) is only available for GFX8+. */
234 if (device->physical_device->rad_info.gfx_level < GFX8)
237 if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
240 if (image->shareable && image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
244 * TODO: Enable DCC for storage images on GFX9 and earlier.
246 * Also disable DCC with atomics because even when DCC stores are
247 * supported atomics will always decompress. So if we are
248 * decompressing a lot anyway we might as well not have DCC.
250 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
251 (device->physical_device->rad_info.gfx_level < GFX10 ||
252 radv_formats_is_atomic_allowed(device, pCreateInfo->pNext, format, pCreateInfo->flags)))
255 /* Do not enable DCC for fragment shading rate attachments. */
256 if (pCreateInfo->usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)
259 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
262 if (vk_format_is_subsampled(format) || vk_format_get_plane_count(format) > 1)
265 if (!radv_image_use_fast_clear_for_image_early(device, image) &&
266 image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
269 /* Do not enable DCC for mipmapped arrays because performance is worse. */
270 if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
273 if (device->physical_device->rad_info.gfx_level < GFX10) {
274 /* TODO: Add support for DCC MSAA on GFX8-9. */
275 if (pCreateInfo->samples > 1 && !device->physical_device->dcc_msaa_allowed)
278 /* TODO: Add support for DCC layers/mipmaps on GFX9. */
279 if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
280 device->physical_device->rad_info.gfx_level == GFX9)
284 /* FIXME: Figure out how to use DCC for MSAA images without FMASK. */
285 if (pCreateInfo->samples > 1 && !device->physical_device->use_fmask)
288 return radv_are_formats_dcc_compatible(device->physical_device, pCreateInfo->pNext, format,
289 pCreateInfo->flags, sign_reinterpret);
293 radv_use_dcc_for_image_late(struct radv_device *device, struct radv_image *image)
295 if (!radv_image_has_dcc(image))
298 if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
301 if (!radv_image_use_fast_clear_for_image(device, image))
304 /* TODO: Fix storage images with DCC without DCC image stores.
305 * Disabling it for now. */
306 if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
307 !radv_image_use_dcc_image_stores(device, image))
314 * Whether to enable image stores with DCC compression for this image. If
315 * this function returns false the image subresource should be decompressed
316 * before using it with image stores.
318 * Note that this can have mixed performance implications, see
319 * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6796#note_643299
321 * This function assumes the image uses DCC compression.
324 radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image)
326 return ac_surface_supports_dcc_image_stores(device->physical_device->rad_info.gfx_level,
327 &image->planes[0].surface);
331 * Whether to use a predicate to determine whether DCC is in a compressed
332 * state. This can be used to avoid decompressing an image multiple times.
335 radv_image_use_dcc_predication(const struct radv_device *device, const struct radv_image *image)
337 return radv_image_has_dcc(image) && !radv_image_use_dcc_image_stores(device, image);
341 radv_use_fmask_for_image(const struct radv_device *device, const struct radv_image *image)
343 return device->physical_device->use_fmask && image->info.samples > 1 &&
344 ((image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) ||
345 (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
349 radv_use_htile_for_image(const struct radv_device *device, const struct radv_image *image)
352 * - Investigate about mips+layers.
353 * - Enable on other gens.
355 bool use_htile_for_mips =
356 image->info.array_size == 1 && device->physical_device->rad_info.gfx_level >= GFX10;
358 /* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. */
359 if (device->physical_device->rad_info.gfx_level == GFX10 &&
360 image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT && image->info.levels > 1)
363 /* Do not enable HTILE for very small images because it seems less performant but make sure it's
364 * allowed with VRS attachments because we need HTILE.
366 if (image->info.width * image->info.height < 8 * 8 &&
367 !(device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS) &&
368 !device->attachment_vrs_enabled)
371 return (image->info.levels == 1 || use_htile_for_mips) && !image->shareable;
375 radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image *image)
377 /* TC-compat CMASK is only available for GFX8+. */
378 if (device->physical_device->rad_info.gfx_level < GFX8)
381 if (device->instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK)
384 /* TC-compat CMASK with storage images is supported on GFX10+. */
385 if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
386 device->physical_device->rad_info.gfx_level < GFX10)
389 /* Do not enable TC-compatible if the image isn't readable by a shader
390 * because no texture fetches will happen.
392 if (!(image->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
393 VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
396 /* If the image doesn't have FMASK, it can't be fetchable. */
397 if (!radv_image_has_fmask(image))
404 si_get_bo_metadata_word1(const struct radv_device *device)
406 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
410 radv_is_valid_opaque_metadata(const struct radv_device *device, const struct radeon_bo_metadata *md)
412 if (md->metadata[0] != 1 || md->metadata[1] != si_get_bo_metadata_word1(device))
415 if (md->size_metadata < 40)
422 radv_patch_surface_from_metadata(struct radv_device *device, struct radeon_surf *surface,
423 const struct radeon_bo_metadata *md)
425 surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
427 if (device->physical_device->rad_info.gfx_level >= GFX9) {
428 if (md->u.gfx9.swizzle_mode > 0)
429 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
431 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
433 surface->u.gfx9.swizzle_mode = md->u.gfx9.swizzle_mode;
435 surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
436 surface->u.legacy.bankw = md->u.legacy.bankw;
437 surface->u.legacy.bankh = md->u.legacy.bankh;
438 surface->u.legacy.tile_split = md->u.legacy.tile_split;
439 surface->u.legacy.mtilea = md->u.legacy.mtilea;
440 surface->u.legacy.num_banks = md->u.legacy.num_banks;
442 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
443 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
444 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
445 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
447 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
452 radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image,
453 const struct radv_image_create_info *create_info,
454 struct ac_surf_info *image_info)
456 unsigned width = image->info.width;
457 unsigned height = image->info.height;
460 * minigbm sometimes allocates bigger images which is going to result in
461 * weird strides and other properties. Lets be lenient where possible and
462 * fail it on GFX10 (as we cannot cope there).
464 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
466 if (create_info->bo_metadata &&
467 radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
468 const struct radeon_bo_metadata *md = create_info->bo_metadata;
470 if (device->physical_device->rad_info.gfx_level >= GFX10) {
471 width = G_00A004_WIDTH_LO(md->metadata[3]) + (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
472 height = G_00A008_HEIGHT(md->metadata[4]) + 1;
474 width = G_008F18_WIDTH(md->metadata[4]) + 1;
475 height = G_008F18_HEIGHT(md->metadata[4]) + 1;
479 if (image->info.width == width && image->info.height == height)
482 if (width < image->info.width || height < image->info.height) {
484 "The imported image has smaller dimensions than the internal\n"
485 "dimensions. Using it is going to fail badly, so we reject\n"
487 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
488 image->info.width, image->info.height, width, height);
489 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
490 } else if (device->physical_device->rad_info.gfx_level >= GFX10) {
492 "Tried to import an image with inconsistent width on GFX10.\n"
493 "As GFX10 has no separate stride fields we cannot cope with\n"
494 "an inconsistency in width and will fail this import.\n"
495 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
496 image->info.width, image->info.height, width, height);
497 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
500 "Tried to import an image with inconsistent width on pre-GFX10.\n"
501 "As GFX10 has no separate stride fields we cannot cope with\n"
502 "an inconsistency and would fail on GFX10.\n"
503 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
504 image->info.width, image->info.height, width, height);
506 image_info->width = width;
507 image_info->height = height;
513 radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *image,
514 const struct radv_image_create_info *create_info,
515 struct ac_surf_info *image_info)
517 VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
518 if (result != VK_SUCCESS)
521 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
522 if (create_info->bo_metadata) {
523 radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
524 create_info->bo_metadata);
527 if (radv_surface_has_scanout(device, create_info)) {
528 image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
529 if (device->instance->debug_flags & RADV_DEBUG_NO_DISPLAY_DCC)
530 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
532 image->info.surf_index = NULL;
535 if (create_info->prime_blit_src && device->physical_device->rad_info.gfx_level == GFX9) {
536 /* Older SDMA hw can't handle DCC */
537 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
544 etc2_emulation_format(VkFormat format)
547 case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
548 case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
549 case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
550 return VK_FORMAT_R8G8B8A8_UNORM;
551 case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
552 case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
553 case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
554 return VK_FORMAT_R8G8B8A8_SRGB;
555 case VK_FORMAT_EAC_R11_UNORM_BLOCK:
556 return VK_FORMAT_R16_UNORM;
557 case VK_FORMAT_EAC_R11_SNORM_BLOCK:
558 return VK_FORMAT_R16_SNORM;
559 case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
560 return VK_FORMAT_R16G16_UNORM;
561 case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
562 return VK_FORMAT_R16G16_SNORM;
564 unreachable("Unhandled ETC format");
569 radv_image_get_plane_format(const struct radv_physical_device *pdev, const struct radv_image *image,
572 if (pdev->emulate_etc2 &&
573 vk_format_description(image->vk.format)->layout == UTIL_FORMAT_LAYOUT_ETC) {
575 return image->vk.format;
576 return etc2_emulation_format(image->vk.format);
578 return vk_format_get_plane_format(image->vk.format, plane);
582 radv_get_surface_flags(struct radv_device *device, struct radv_image *image, unsigned plane_id,
583 const VkImageCreateInfo *pCreateInfo, VkFormat image_format)
586 unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
587 VkFormat format = radv_image_get_plane_format(device->physical_device, image, plane_id);
588 const struct util_format_description *desc = vk_format_description(format);
589 bool is_depth, is_stencil;
591 is_depth = util_format_has_depth(desc);
592 is_stencil = util_format_has_stencil(desc);
594 flags = RADEON_SURF_SET(array_mode, MODE);
596 switch (pCreateInfo->imageType) {
597 case VK_IMAGE_TYPE_1D:
598 if (pCreateInfo->arrayLayers > 1)
599 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
601 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
603 case VK_IMAGE_TYPE_2D:
604 if (pCreateInfo->arrayLayers > 1)
605 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
607 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
609 case VK_IMAGE_TYPE_3D:
610 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
613 unreachable("unhandled image type");
616 /* Required for clearing/initializing a specific layer on GFX8. */
617 flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS;
620 flags |= RADEON_SURF_ZBUFFER;
622 if (radv_use_htile_for_image(device, image) &&
623 !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
624 if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
625 flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
627 flags |= RADEON_SURF_NO_HTILE;
632 flags |= RADEON_SURF_SBUFFER;
634 if (device->physical_device->rad_info.gfx_level >= GFX9 &&
635 pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
636 vk_format_get_blocksizebits(image_format) == 128 && vk_format_is_compressed(image_format))
637 flags |= RADEON_SURF_NO_RENDER_TARGET;
639 if (!radv_use_dcc_for_image_early(device, image, pCreateInfo, image_format,
640 &image->dcc_sign_reinterpret))
641 flags |= RADEON_SURF_DISABLE_DCC;
643 if (!radv_use_fmask_for_image(device, image))
644 flags |= RADEON_SURF_NO_FMASK;
646 if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
648 RADEON_SURF_PRT | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE | RADEON_SURF_DISABLE_DCC;
654 static inline unsigned
655 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
658 return plane->surface.u.legacy.zs.stencil_tiling_index[level];
660 return plane->surface.u.legacy.tiling_index[level];
664 radv_map_swizzle(unsigned swizzle)
668 return V_008F0C_SQ_SEL_Y;
670 return V_008F0C_SQ_SEL_Z;
672 return V_008F0C_SQ_SEL_W;
674 return V_008F0C_SQ_SEL_0;
676 return V_008F0C_SQ_SEL_1;
677 default: /* PIPE_SWIZZLE_X */
678 return V_008F0C_SQ_SEL_X;
683 radv_compose_swizzle(const struct util_format_description *desc, const VkComponentMapping *mapping,
684 enum pipe_swizzle swizzle[4])
686 if (desc->format == PIPE_FORMAT_R64_UINT || desc->format == PIPE_FORMAT_R64_SINT) {
687 /* 64-bit formats only support storage images and storage images
688 * require identity component mappings. We use 32-bit
689 * instructions to access 64-bit images, so we need a special
692 * The zw components are 1,0 so that they can be easily be used
693 * by loads to create the w component, which has to be 0 for
696 swizzle[0] = PIPE_SWIZZLE_X;
697 swizzle[1] = PIPE_SWIZZLE_Y;
698 swizzle[2] = PIPE_SWIZZLE_1;
699 swizzle[3] = PIPE_SWIZZLE_0;
700 } else if (!mapping) {
701 for (unsigned i = 0; i < 4; i++)
702 swizzle[i] = desc->swizzle[i];
703 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
704 const unsigned char swizzle_xxxx[4] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, PIPE_SWIZZLE_0,
706 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
708 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
713 radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFormat vk_format,
714 unsigned offset, unsigned range, uint32_t *state)
716 const struct util_format_description *desc;
718 unsigned num_format, data_format;
720 enum pipe_swizzle swizzle[4];
721 desc = vk_format_description(vk_format);
722 first_non_void = vk_format_get_first_non_void_channel(vk_format);
723 stride = desc->block.bits / 8;
725 radv_compose_swizzle(desc, NULL, swizzle);
729 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
731 if (device->physical_device->rad_info.gfx_level != GFX8 && stride) {
736 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
737 S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
738 S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
739 S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3]));
741 if (device->physical_device->rad_info.gfx_level >= GFX10) {
742 const struct gfx10_format *fmt = &ac_get_gfx10_format_table(&device->physical_device->rad_info)[vk_format_to_pipe_format(vk_format)];
744 /* OOB_SELECT chooses the out-of-bounds check:
745 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
746 * - 1: index >= NUM_RECORDS
747 * - 2: NUM_RECORDS == 0
748 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
749 * else: swizzle_address >= NUM_RECORDS
751 state[3] |= S_008F0C_FORMAT(fmt->img_format) |
752 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
753 S_008F0C_RESOURCE_LEVEL(device->physical_device->rad_info.gfx_level < GFX11);
755 num_format = radv_translate_buffer_numformat(desc, first_non_void);
756 data_format = radv_translate_buffer_dataformat(desc, first_non_void);
758 assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
759 assert(num_format != ~0);
761 state[3] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format);
766 si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *image,
767 const struct legacy_surf_level *base_level_info, unsigned plane_id,
768 unsigned base_level, unsigned first_level, unsigned block_width,
769 bool is_stencil, bool is_storage_image, bool disable_compression,
770 bool enable_write_compression, uint32_t *state,
771 const struct ac_surf_nbc_view *nbc_view)
773 struct radv_image_plane *plane = &image->planes[plane_id];
774 struct radv_image_binding *binding = image->disjoint ? &image->bindings[plane_id] : &image->bindings[0];
775 uint64_t gpu_address = binding->bo ? radv_buffer_get_va(binding->bo) + binding->offset : 0;
776 uint64_t va = gpu_address;
777 uint8_t swizzle = plane->surface.tile_swizzle;
778 enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
779 uint64_t meta_va = 0;
780 if (gfx_level >= GFX9) {
782 va += plane->surface.u.gfx9.zs.stencil_offset;
784 va += plane->surface.u.gfx9.surf_offset;
785 if (nbc_view && nbc_view->valid) {
786 va += nbc_view->base_address_offset;
787 swizzle = nbc_view->tile_swizzle;
790 va += (uint64_t)base_level_info->offset_256B * 256;
793 if (gfx_level >= GFX9 || base_level_info->mode == RADEON_SURF_MODE_2D)
795 state[1] &= C_008F14_BASE_ADDRESS_HI;
796 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
798 if (gfx_level >= GFX8) {
799 state[6] &= C_008F28_COMPRESSION_EN;
801 if (!disable_compression && radv_dcc_enabled(image, first_level)) {
802 meta_va = gpu_address + plane->surface.meta_offset;
803 if (gfx_level <= GFX8)
804 meta_va += plane->surface.u.legacy.color.dcc_level[base_level].dcc_offset;
806 unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
807 dcc_tile_swizzle &= (1 << plane->surface.meta_alignment_log2) - 1;
808 meta_va |= dcc_tile_swizzle;
809 } else if (!disable_compression && radv_image_is_tc_compat_htile(image)) {
810 meta_va = gpu_address + plane->surface.meta_offset;
814 state[6] |= S_008F28_COMPRESSION_EN(1);
815 if (gfx_level <= GFX9)
816 state[7] = meta_va >> 8;
820 if (gfx_level >= GFX10) {
821 state[3] &= C_00A00C_SW_MODE;
824 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.zs.stencil_swizzle_mode);
826 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.swizzle_mode);
829 state[6] &= C_00A018_META_DATA_ADDRESS_LO & C_00A018_META_PIPE_ALIGNED;
832 struct gfx9_surf_meta_flags meta = {
837 if (!(plane->surface.flags & RADEON_SURF_Z_OR_SBUFFER))
838 meta = plane->surface.u.gfx9.color.dcc;
840 if (radv_dcc_enabled(image, first_level) && is_storage_image && enable_write_compression)
841 state[6] |= S_00A018_WRITE_COMPRESS_ENABLE(1);
843 state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
844 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
847 state[7] = meta_va >> 16;
848 } else if (gfx_level == GFX9) {
849 state[3] &= C_008F1C_SW_MODE;
850 state[4] &= C_008F20_PITCH;
853 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.zs.stencil_swizzle_mode);
854 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.zs.stencil_epitch);
856 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.swizzle_mode);
857 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.epitch);
861 C_008F24_META_DATA_ADDRESS & C_008F24_META_PIPE_ALIGNED & C_008F24_META_RB_ALIGNED;
863 struct gfx9_surf_meta_flags meta = {
868 if (!(plane->surface.flags & RADEON_SURF_Z_OR_SBUFFER))
869 meta = plane->surface.u.gfx9.color.dcc;
871 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
872 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
873 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
877 unsigned pitch = base_level_info->nblk_x * block_width;
878 unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
880 state[3] &= C_008F1C_TILING_INDEX;
881 state[3] |= S_008F1C_TILING_INDEX(index);
882 state[4] &= C_008F20_PITCH;
883 state[4] |= S_008F20_PITCH(pitch - 1);
888 radv_tex_dim(VkImageType image_type, VkImageViewType view_type, unsigned nr_layers,
889 unsigned nr_samples, bool is_storage_image, bool gfx9)
891 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
892 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
894 /* GFX9 allocates 1D textures as 2D. */
895 if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
896 image_type = VK_IMAGE_TYPE_2D;
897 switch (image_type) {
898 case VK_IMAGE_TYPE_1D:
899 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
900 case VK_IMAGE_TYPE_2D:
902 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
904 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
905 case VK_IMAGE_TYPE_3D:
906 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
907 return V_008F1C_SQ_RSRC_IMG_3D;
909 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
911 unreachable("illegal image type");
916 gfx9_border_color_swizzle(const struct util_format_description *desc)
918 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
920 if (desc->swizzle[3] == PIPE_SWIZZLE_X) {
921 /* For the pre-defined border color values (white, opaque
922 * black, transparent black), the only thing that matters is
923 * that the alpha channel winds up in the correct place
924 * (because the RGB channels are all the same) so either of
925 * these enumerations will work.
927 if (desc->swizzle[2] == PIPE_SWIZZLE_Y)
928 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
930 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
931 } else if (desc->swizzle[0] == PIPE_SWIZZLE_X) {
932 if (desc->swizzle[1] == PIPE_SWIZZLE_Y)
933 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
935 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
936 } else if (desc->swizzle[1] == PIPE_SWIZZLE_X) {
937 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
938 } else if (desc->swizzle[2] == PIPE_SWIZZLE_X) {
939 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
946 vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
948 const struct util_format_description *desc = vk_format_description(format);
950 if (device->physical_device->rad_info.gfx_level >= GFX10 && desc->nr_channels == 1)
951 return desc->swizzle[3] == PIPE_SWIZZLE_X;
953 return radv_translate_colorswap(format, false) <= 1;
956 * Build the sampler view descriptor for a texture (GFX10).
959 gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
960 bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
961 const VkComponentMapping *mapping, unsigned first_level,
962 unsigned last_level, unsigned first_layer, unsigned last_layer,
963 unsigned width, unsigned height, unsigned depth, float min_lod,
964 uint32_t *state, uint32_t *fmask_state,
965 VkImageCreateFlags img_create_flags,
966 const struct ac_surf_nbc_view *nbc_view)
968 const struct util_format_description *desc;
969 enum pipe_swizzle swizzle[4];
973 desc = vk_format_description(vk_format);
975 /* For emulated ETC2 without alpha we need to override the format to a 3-componenent format, so
976 * that border colors work correctly (alpha forced to 1). Since Vulkan has no such format,
977 * this uses the Gallium formats to set the description. */
978 if (image->vk.format == VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK &&
979 vk_format == VK_FORMAT_R8G8B8A8_UNORM) {
980 desc = util_format_description(PIPE_FORMAT_R8G8B8X8_UNORM);
981 } else if (image->vk.format == VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK &&
982 vk_format == VK_FORMAT_R8G8B8A8_SRGB) {
983 desc = util_format_description(PIPE_FORMAT_R8G8B8X8_SRGB);
986 img_format = ac_get_gfx10_format_table(&device->physical_device->rad_info)[vk_format_to_pipe_format(vk_format)].img_format;
988 radv_compose_swizzle(desc, mapping, swizzle);
990 if (img_create_flags & VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT) {
991 assert(image->vk.image_type == VK_IMAGE_TYPE_3D);
992 type = V_008F1C_SQ_RSRC_IMG_3D;
994 type = radv_tex_dim(image->vk.image_type, view_type, image->info.array_size, image->info.samples,
995 is_storage_image, device->physical_device->rad_info.gfx_level == GFX9);
998 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
1000 depth = image->info.array_size;
1001 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
1002 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
1003 depth = image->info.array_size;
1004 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
1005 depth = image->info.array_size / 6;
1008 state[1] = S_00A004_FORMAT(img_format) |
1009 S_00A004_WIDTH_LO(width - 1);
1010 state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
1011 S_00A008_RESOURCE_LEVEL(device->physical_device->rad_info.gfx_level < GFX11);
1012 state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
1013 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
1014 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
1015 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
1016 S_00A00C_BASE_LEVEL(image->info.samples > 1 ? 0 : first_level) |
1017 S_00A00C_LAST_LEVEL(image->info.samples > 1 ? util_logbase2(image->info.samples)
1019 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(desc)) | S_00A00C_TYPE(type);
1020 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
1021 * to know the total number of layers.
1023 state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
1024 S_00A010_BASE_ARRAY(first_layer);
1025 state[5] = S_00A014_ARRAY_PITCH(0) |
1026 S_00A014_PERF_MOD(4);
1030 if (img_create_flags & VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT) {
1031 assert(type == V_008F1C_SQ_RSRC_IMG_3D);
1033 /* ARRAY_PITCH is only meaningful for 3D images, 0 means SRV, 1 means UAV.
1034 * In SRV mode, BASE_ARRAY is ignored and DEPTH is the last slice of mipmap level 0.
1035 * In UAV mode, BASE_ARRAY is the first slice and DEPTH is the last slice of the bound level.
1037 state[4] &= C_00A010_DEPTH;
1038 state[4] |= S_00A010_DEPTH(!is_storage_image ? depth - 1 : u_minify(depth, first_level) - 1);
1039 state[5] |= S_00A014_ARRAY_PITCH(is_storage_image);
1043 image->info.samples > 1 ? util_logbase2(image->info.samples) : image->info.levels - 1;
1044 if (nbc_view && nbc_view->valid)
1045 max_mip = nbc_view->max_mip;
1047 unsigned min_lod_clamped = radv_float_to_ufixed(CLAMP(min_lod, 0, 15), 8);
1048 if (device->physical_device->rad_info.gfx_level >= GFX11) {
1049 state[1] |= S_00A004_MAX_MIP(max_mip);
1050 state[5] |= S_00A014_MIN_LOD_LO(min_lod_clamped);
1051 state[6] |= S_00A018_MIN_LOD_HI(min_lod_clamped >> 5);
1053 state[1] |= S_00A004_MIN_LOD(min_lod_clamped);
1054 state[5] |= S_00A014_MAX_MIP(max_mip);
1057 if (radv_dcc_enabled(image, first_level)) {
1058 state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
1059 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(
1060 image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size) |
1061 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
1064 if (radv_image_get_iterate256(device, image)) {
1065 state[6] |= S_00A018_ITERATE_256(1);
1068 /* Initialize the sampler view for FMASK. */
1070 if (radv_image_has_fmask(image)) {
1071 uint64_t gpu_address = radv_buffer_get_va(image->bindings[0].bo);
1075 assert(image->plane_count == 1);
1077 va = gpu_address + image->bindings[0].offset + image->planes[0].surface.fmask_offset;
1079 switch (image->info.samples) {
1081 format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F2;
1084 format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F4;
1087 format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F8;
1090 unreachable("invalid nr_samples");
1093 fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
1094 fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) | S_00A004_FORMAT(format) |
1095 S_00A004_WIDTH_LO(width - 1);
1096 fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
1097 S_00A008_RESOURCE_LEVEL(1);
1099 S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1100 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1101 S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode) |
1103 radv_tex_dim(image->vk.image_type, view_type, image->info.array_size, 0, false, false));
1104 fmask_state[4] = S_00A010_DEPTH(last_layer) | S_00A010_BASE_ARRAY(first_layer);
1106 fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
1109 if (radv_image_is_tc_compat_cmask(image)) {
1110 va = gpu_address + image->bindings[0].offset + image->planes[0].surface.cmask_offset;
1112 fmask_state[6] |= S_00A018_COMPRESSION_EN(1);
1113 fmask_state[6] |= S_00A018_META_DATA_ADDRESS_LO(va >> 8);
1114 fmask_state[7] |= va >> 16;
1117 memset(fmask_state, 0, 8 * 4);
1122 * Build the sampler view descriptor for a texture (SI-GFX9)
1125 si_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
1126 bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
1127 const VkComponentMapping *mapping, unsigned first_level,
1128 unsigned last_level, unsigned first_layer, unsigned last_layer,
1129 unsigned width, unsigned height, unsigned depth, float min_lod,
1130 uint32_t *state, uint32_t *fmask_state,
1131 VkImageCreateFlags img_create_flags)
1133 const struct util_format_description *desc;
1134 enum pipe_swizzle swizzle[4];
1136 unsigned num_format, data_format, type;
1138 desc = vk_format_description(vk_format);
1140 /* For emulated ETC2 without alpha we need to override the format to a 3-componenent format, so
1141 * that border colors work correctly (alpha forced to 1). Since Vulkan has no such format,
1142 * this uses the Gallium formats to set the description. */
1143 if (image->vk.format == VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK &&
1144 vk_format == VK_FORMAT_R8G8B8A8_UNORM) {
1145 desc = util_format_description(PIPE_FORMAT_R8G8B8X8_UNORM);
1146 } else if (image->vk.format == VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK &&
1147 vk_format == VK_FORMAT_R8G8B8A8_SRGB) {
1148 desc = util_format_description(PIPE_FORMAT_R8G8B8X8_SRGB);
1151 radv_compose_swizzle(desc, mapping, swizzle);
1153 first_non_void = vk_format_get_first_non_void_channel(vk_format);
1155 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
1156 if (num_format == ~0) {
1160 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
1161 if (data_format == ~0) {
1165 /* S8 with either Z16 or Z32 HTILE need a special format. */
1166 if (device->physical_device->rad_info.gfx_level == GFX9 && vk_format == VK_FORMAT_S8_UINT &&
1167 radv_image_is_tc_compat_htile(image)) {
1168 if (image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT)
1169 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
1170 else if (image->vk.format == VK_FORMAT_D16_UNORM_S8_UINT)
1171 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
1174 if (device->physical_device->rad_info.gfx_level == GFX9 &&
1175 img_create_flags & VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT) {
1176 assert(image->vk.image_type == VK_IMAGE_TYPE_3D);
1177 type = V_008F1C_SQ_RSRC_IMG_3D;
1179 type = radv_tex_dim(image->vk.image_type, view_type, image->info.array_size, image->info.samples,
1180 is_storage_image, device->physical_device->rad_info.gfx_level == GFX9);
1183 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
1185 depth = image->info.array_size;
1186 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
1187 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
1188 depth = image->info.array_size;
1189 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
1190 depth = image->info.array_size / 6;
1193 state[1] = (S_008F14_MIN_LOD(radv_float_to_ufixed(CLAMP(min_lod, 0, 15), 8)) |
1194 S_008F14_DATA_FORMAT(data_format) |
1195 S_008F14_NUM_FORMAT(num_format));
1196 state[2] = (S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1) | S_008F18_PERF_MOD(4));
1197 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
1198 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
1199 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
1200 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
1201 S_008F1C_BASE_LEVEL(image->info.samples > 1 ? 0 : first_level) |
1202 S_008F1C_LAST_LEVEL(image->info.samples > 1 ? util_logbase2(image->info.samples)
1204 S_008F1C_TYPE(type));
1206 state[5] = S_008F24_BASE_ARRAY(first_layer);
1210 if (device->physical_device->rad_info.gfx_level == GFX9) {
1211 unsigned bc_swizzle = gfx9_border_color_swizzle(desc);
1213 /* Depth is the last accessible layer on Gfx9.
1214 * The hw doesn't need to know the total number of layers.
1216 if (type == V_008F1C_SQ_RSRC_IMG_3D)
1217 state[4] |= S_008F20_DEPTH(depth - 1);
1219 state[4] |= S_008F20_DEPTH(last_layer);
1221 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
1222 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ? util_logbase2(image->info.samples)
1223 : image->info.levels - 1);
1225 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
1226 state[4] |= S_008F20_DEPTH(depth - 1);
1227 state[5] |= S_008F24_LAST_ARRAY(last_layer);
1229 if (!(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) &&
1230 image->planes[0].surface.meta_offset) {
1231 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
1233 if (device->instance->disable_aniso_single_level) {
1234 /* The last dword is unused by hw. The shader uses it to clear
1235 * bits in the first dword of sampler state.
1237 if (device->physical_device->rad_info.gfx_level <= GFX7 && image->info.samples <= 1) {
1238 if (first_level == last_level)
1239 state[7] = C_008F30_MAX_ANISO_RATIO;
1241 state[7] = 0xffffffff;
1246 /* Initialize the sampler view for FMASK. */
1248 if (radv_image_has_fmask(image)) {
1249 uint32_t fmask_format;
1250 uint64_t gpu_address = radv_buffer_get_va(image->bindings[0].bo);
1253 assert(image->plane_count == 1);
1255 va = gpu_address + image->bindings[0].offset + image->planes[0].surface.fmask_offset;
1257 if (device->physical_device->rad_info.gfx_level == GFX9) {
1258 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
1259 switch (image->info.samples) {
1261 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2;
1264 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4;
1267 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8;
1270 unreachable("invalid nr_samples");
1273 switch (image->info.samples) {
1275 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
1278 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
1281 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
1285 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
1287 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
1290 fmask_state[0] = va >> 8;
1291 fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
1292 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | S_008F14_DATA_FORMAT(fmask_format) |
1293 S_008F14_NUM_FORMAT(num_format);
1294 fmask_state[2] = S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1);
1296 S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1297 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1299 radv_tex_dim(image->vk.image_type, view_type, image->info.array_size, 0, false, false));
1301 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
1305 if (device->physical_device->rad_info.gfx_level == GFX9) {
1306 fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode);
1307 fmask_state[4] |= S_008F20_DEPTH(last_layer) |
1308 S_008F20_PITCH(image->planes[0].surface.u.gfx9.color.fmask_epitch);
1309 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) | S_008F24_META_RB_ALIGNED(1);
1311 if (radv_image_is_tc_compat_cmask(image)) {
1312 va = gpu_address + image->bindings[0].offset + image->planes[0].surface.cmask_offset;
1314 fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
1315 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1316 fmask_state[7] |= va >> 8;
1320 S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.color.fmask.tiling_index);
1322 S_008F20_DEPTH(depth - 1) |
1323 S_008F20_PITCH(image->planes[0].surface.u.legacy.color.fmask.pitch_in_pixels - 1);
1324 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
1326 if (radv_image_is_tc_compat_cmask(image)) {
1327 va = gpu_address + image->bindings[0].offset + image->planes[0].surface.cmask_offset;
1329 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1330 fmask_state[7] |= va >> 8;
1334 memset(fmask_state, 0, 8 * 4);
1339 radv_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
1340 bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
1341 const VkComponentMapping *mapping, unsigned first_level,
1342 unsigned last_level, unsigned first_layer, unsigned last_layer,
1343 unsigned width, unsigned height, unsigned depth, float min_lod, uint32_t *state,
1344 uint32_t *fmask_state, VkImageCreateFlags img_create_flags,
1345 const struct ac_surf_nbc_view *nbc_view)
1347 if (device->physical_device->rad_info.gfx_level >= GFX10) {
1348 gfx10_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping,
1349 first_level, last_level, first_layer, last_layer, width, height,
1350 depth, min_lod, state, fmask_state, img_create_flags, nbc_view);
1352 si_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping,
1353 first_level, last_level, first_layer, last_layer, width, height,
1354 depth, min_lod, state, fmask_state, img_create_flags);
1359 radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image,
1360 struct radeon_bo_metadata *md)
1362 static const VkComponentMapping fixedmapping;
1365 assert(image->plane_count == 1);
1367 radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->vk.image_type,
1368 image->vk.format, &fixedmapping, 0, image->info.levels - 1, 0,
1369 image->info.array_size - 1, image->info.width, image->info.height,
1370 image->info.depth, 0.0f, desc, NULL, 0, NULL);
1372 si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0,
1373 0, image->planes[0].surface.blk_w, false, false, false, false,
1376 ac_surface_get_umd_metadata(&device->physical_device->rad_info, &image->planes[0].surface,
1377 image->info.levels, desc, &md->size_metadata, md->metadata);
1381 radv_init_metadata(struct radv_device *device, struct radv_image *image,
1382 struct radeon_bo_metadata *metadata)
1384 struct radeon_surf *surface = &image->planes[0].surface;
1386 memset(metadata, 0, sizeof(*metadata));
1388 if (device->physical_device->rad_info.gfx_level >= GFX9) {
1389 uint64_t dcc_offset =
1390 image->bindings[0].offset +
1391 (surface->display_dcc_offset ? surface->display_dcc_offset : surface->meta_offset);
1392 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.swizzle_mode;
1393 metadata->u.gfx9.dcc_offset_256b = dcc_offset >> 8;
1394 metadata->u.gfx9.dcc_pitch_max = surface->u.gfx9.color.display_dcc_pitch_max;
1395 metadata->u.gfx9.dcc_independent_64b_blocks = surface->u.gfx9.color.dcc.independent_64B_blocks;
1396 metadata->u.gfx9.dcc_independent_128b_blocks = surface->u.gfx9.color.dcc.independent_128B_blocks;
1397 metadata->u.gfx9.dcc_max_compressed_block_size =
1398 surface->u.gfx9.color.dcc.max_compressed_block_size;
1399 metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1401 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D
1402 ? RADEON_LAYOUT_TILED
1403 : RADEON_LAYOUT_LINEAR;
1404 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D
1405 ? RADEON_LAYOUT_TILED
1406 : RADEON_LAYOUT_LINEAR;
1407 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
1408 metadata->u.legacy.bankw = surface->u.legacy.bankw;
1409 metadata->u.legacy.bankh = surface->u.legacy.bankh;
1410 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
1411 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
1412 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
1413 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
1414 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1416 radv_query_opaque_metadata(device, image, metadata);
1420 radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image,
1421 uint64_t offset, uint32_t stride)
1423 ac_surface_override_offset_stride(&device->physical_device->rad_info, &image->planes[0].surface,
1424 image->info.levels, offset, stride);
1428 radv_image_alloc_single_sample_cmask(const struct radv_device *device,
1429 const struct radv_image *image, struct radeon_surf *surf)
1431 if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 || image->info.levels > 1 ||
1432 image->info.depth > 1 || radv_image_has_dcc(image) ||
1433 !radv_image_use_fast_clear_for_image(device, image) ||
1434 (image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT))
1437 assert(image->info.storage_samples == 1);
1439 surf->cmask_offset = align64(surf->total_size, 1 << surf->cmask_alignment_log2);
1440 surf->total_size = surf->cmask_offset + surf->cmask_size;
1441 surf->alignment_log2 = MAX2(surf->alignment_log2, surf->cmask_alignment_log2);
1445 radv_image_alloc_values(const struct radv_device *device, struct radv_image *image)
1447 /* images with modifiers can be potentially imported */
1448 if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
1451 if (radv_image_has_cmask(image) || (radv_image_has_dcc(image) && !image->support_comp_to_single)) {
1452 image->fce_pred_offset = image->size;
1453 image->size += 8 * image->info.levels;
1456 if (radv_image_use_dcc_predication(device, image)) {
1457 image->dcc_pred_offset = image->size;
1458 image->size += 8 * image->info.levels;
1461 if ((radv_image_has_dcc(image) && !image->support_comp_to_single) ||
1462 radv_image_has_cmask(image) || radv_image_has_htile(image)) {
1463 image->clear_value_offset = image->size;
1464 image->size += 8 * image->info.levels;
1467 if (radv_image_is_tc_compat_htile(image) &&
1468 device->physical_device->rad_info.has_tc_compat_zrange_bug) {
1469 /* Metadata for the TC-compatible HTILE hardware bug which
1470 * have to be fixed by updating ZRANGE_PRECISION when doing
1471 * fast depth clears to 0.0f.
1473 image->tc_compat_zrange_offset = image->size;
1474 image->size += image->info.levels * 4;
1478 /* Determine if the image is affected by the pipe misaligned metadata issue
1479 * which requires to invalidate L2.
1482 radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image)
1484 struct radeon_info *rad_info = &device->physical_device->rad_info;
1485 int log2_samples = util_logbase2(image->info.samples);
1487 assert(rad_info->gfx_level >= GFX10);
1489 for (unsigned i = 0; i < image->plane_count; ++i) {
1490 VkFormat fmt = radv_image_get_plane_format(device->physical_device, image, i);
1491 int log2_bpp = util_logbase2(vk_format_get_blocksize(fmt));
1492 int log2_bpp_and_samples;
1494 if (rad_info->gfx_level >= GFX10_3) {
1495 log2_bpp_and_samples = log2_bpp + log2_samples;
1497 if (vk_format_has_depth(image->vk.format) && image->info.array_size >= 8) {
1501 log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples);
1504 int num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config);
1505 int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8);
1507 if (vk_format_has_depth(image->vk.format)) {
1508 if (radv_image_is_tc_compat_htile(image) && overlap) {
1512 int max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config);
1513 int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags);
1514 int samples_overlap = MIN2(log2_samples, overlap);
1516 /* TODO: It shouldn't be necessary if the image has DCC but
1517 * not readable by shader.
1519 if ((radv_image_has_dcc(image) || radv_image_is_tc_compat_cmask(image)) &&
1520 (samples_overlap > log2_samples_frag_diff)) {
1530 radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
1532 if (device->physical_device->rad_info.gfx_level >= GFX10) {
1533 return !device->physical_device->rad_info.tcc_rb_non_coherent &&
1534 !radv_image_is_pipe_misaligned(device, image);
1535 } else if (device->physical_device->rad_info.gfx_level == GFX9) {
1536 if (image->info.samples == 1 &&
1538 (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
1539 !vk_format_has_stencil(image->vk.format)) {
1540 /* Single-sample color and single-sample depth
1541 * (not stencil) are coherent with shaders on
1552 * Determine if the given image can be fast cleared.
1555 radv_image_can_fast_clear(const struct radv_device *device, const struct radv_image *image)
1557 if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
1560 if (vk_format_is_color(image->vk.format)) {
1561 if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image))
1564 /* RB+ doesn't work with CMASK fast clear on Stoney. */
1565 if (!radv_image_has_dcc(image) && device->physical_device->rad_info.family == CHIP_STONEY)
1568 if (!radv_image_has_htile(image))
1572 /* Do not fast clears 3D images. */
1573 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
1580 * Determine if the given image can be fast cleared using comp-to-single.
1583 radv_image_use_comp_to_single(const struct radv_device *device, const struct radv_image *image)
1585 /* comp-to-single is only available for GFX10+. */
1586 if (device->physical_device->rad_info.gfx_level < GFX10)
1589 /* If the image can't be fast cleared, comp-to-single can't be used. */
1590 if (!radv_image_can_fast_clear(device, image))
1593 /* If the image doesn't have DCC, it can't be fast cleared using comp-to-single */
1594 if (!radv_image_has_dcc(image))
1597 /* It seems 8bpp and 16bpp require RB+ to work. */
1598 unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk.format);
1599 if (bytes_per_pixel <= 2 && !device->physical_device->rad_info.rbplus_allowed)
1606 radv_get_internal_plane_count(const struct radv_physical_device *pdev, VkFormat fmt)
1608 if (pdev->emulate_etc2 && vk_format_description(fmt)->layout == UTIL_FORMAT_LAYOUT_ETC)
1610 return vk_format_get_plane_count(fmt);
1614 radv_image_reset_layout(const struct radv_physical_device *pdev, struct radv_image *image)
1617 image->alignment = 1;
1619 image->tc_compatible_cmask = 0;
1620 image->fce_pred_offset = image->dcc_pred_offset = 0;
1621 image->clear_value_offset = image->tc_compat_zrange_offset = 0;
1623 unsigned plane_count = radv_get_internal_plane_count(pdev, image->vk.format);
1624 for (unsigned i = 0; i < plane_count; ++i) {
1625 VkFormat format = radv_image_get_plane_format(pdev, image, i);
1626 if (vk_format_has_depth(format))
1627 format = vk_format_depth_only(format);
1629 uint64_t flags = image->planes[i].surface.flags;
1630 uint64_t modifier = image->planes[i].surface.modifier;
1631 memset(image->planes + i, 0, sizeof(image->planes[i]));
1633 image->planes[i].surface.flags = flags;
1634 image->planes[i].surface.modifier = modifier;
1635 image->planes[i].surface.blk_w = vk_format_get_blockwidth(format);
1636 image->planes[i].surface.blk_h = vk_format_get_blockheight(format);
1637 image->planes[i].surface.bpe = vk_format_get_blocksize(format);
1639 /* align byte per element on dword */
1640 if (image->planes[i].surface.bpe == 3) {
1641 image->planes[i].surface.bpe = 4;
1647 radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
1648 const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
1649 struct radv_image *image)
1651 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1652 * common internal case. */
1653 create_info.vk_info = NULL;
1655 struct ac_surf_info image_info = image->info;
1656 VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1657 if (result != VK_SUCCESS)
1660 assert(!mod_info || mod_info->drmFormatModifierPlaneCount >= image->plane_count);
1662 radv_image_reset_layout(device->physical_device, image);
1664 unsigned plane_count = radv_get_internal_plane_count(device->physical_device, image->vk.format);
1665 for (unsigned plane = 0; plane < plane_count; ++plane) {
1666 struct ac_surf_info info = image_info;
1670 info.width = vk_format_get_plane_width(image->vk.format, plane, info.width);
1671 info.height = vk_format_get_plane_height(image->vk.format, plane, info.height);
1673 if (create_info.no_metadata_planes || plane_count > 1) {
1674 image->planes[plane].surface.flags |=
1675 RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE;
1678 device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1681 if (!radv_use_dcc_for_image_late(device, image))
1682 ac_surface_zero_dcc_fields(&image->planes[0].surface);
1685 if (create_info.bo_metadata && !mod_info &&
1686 !ac_surface_set_umd_metadata(&device->physical_device->rad_info,
1687 &image->planes[plane].surface, image_info.storage_samples,
1688 image_info.levels, create_info.bo_metadata->size_metadata,
1689 create_info.bo_metadata->metadata))
1690 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1692 if (!create_info.no_metadata_planes && !create_info.bo_metadata && plane_count == 1 &&
1694 radv_image_alloc_single_sample_cmask(device, image, &image->planes[plane].surface);
1697 if (mod_info->pPlaneLayouts[plane].rowPitch % image->planes[plane].surface.bpe ||
1698 !mod_info->pPlaneLayouts[plane].rowPitch)
1699 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1701 offset = mod_info->pPlaneLayouts[plane].offset;
1702 stride = mod_info->pPlaneLayouts[plane].rowPitch / image->planes[plane].surface.bpe;
1704 offset = image->disjoint ? 0 :
1705 align64(image->size, 1 << image->planes[plane].surface.alignment_log2);
1706 stride = 0; /* 0 means no override */
1709 if (!ac_surface_override_offset_stride(&device->physical_device->rad_info,
1710 &image->planes[plane].surface, image->info.levels,
1712 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1714 /* Validate DCC offsets in modifier layout. */
1715 if (plane_count == 1 && mod_info) {
1716 unsigned mem_planes = ac_surface_get_nplanes(&image->planes[plane].surface);
1717 if (mod_info->drmFormatModifierPlaneCount != mem_planes)
1718 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1720 for (unsigned i = 1; i < mem_planes; ++i) {
1721 if (ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level,
1722 &image->planes[plane].surface, i,
1723 0) != mod_info->pPlaneLayouts[i].offset)
1724 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1728 image->size = MAX2(image->size, offset + image->planes[plane].surface.total_size);
1729 image->alignment = MAX2(image->alignment, 1 << image->planes[plane].surface.alignment_log2);
1731 image->planes[plane].format =
1732 radv_image_get_plane_format(device->physical_device, image, plane);
1735 image->tc_compatible_cmask =
1736 radv_image_has_cmask(image) && radv_use_tc_compat_cmask_for_image(device, image);
1738 image->l2_coherent = radv_image_is_l2_coherent(device, image);
1740 image->support_comp_to_single = radv_image_use_comp_to_single(device, image);
1742 radv_image_alloc_values(device, image);
1744 assert(image->planes[0].surface.surf_size);
1745 assert(image->planes[0].surface.modifier == DRM_FORMAT_MOD_INVALID ||
1746 ac_modifier_has_dcc(image->planes[0].surface.modifier) == radv_image_has_dcc(image));
1751 radv_destroy_image(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
1752 struct radv_image *image)
1754 if ((image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && image->bindings[0].bo)
1755 device->ws->buffer_destroy(device->ws, image->bindings[0].bo);
1757 if (image->owned_memory != VK_NULL_HANDLE) {
1758 RADV_FROM_HANDLE(radv_device_memory, mem, image->owned_memory);
1759 radv_free_memory(device, pAllocator, mem);
1762 vk_image_finish(&image->vk);
1763 vk_free2(&device->vk.alloc, pAllocator, image);
1767 radv_image_print_info(struct radv_device *device, struct radv_image *image)
1769 fprintf(stderr, "Image:\n");
1771 " Info: size=%" PRIu64 ", alignment=%" PRIu32 ", "
1772 "width=%" PRIu32 ", height=%" PRIu32 ", "
1773 "array_size=%" PRIu32 ", levels=%" PRIu32 "\n",
1774 image->size, image->alignment, image->info.width, image->info.height,
1775 image->info.array_size, image->info.levels);
1776 for (unsigned i = 0; i < image->plane_count; ++i) {
1777 const struct radv_image_plane *plane = &image->planes[i];
1778 const struct radeon_surf *surf = &plane->surface;
1779 const struct util_format_description *desc = vk_format_description(plane->format);
1780 uint64_t offset = ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level,
1781 &plane->surface, 0, 0);
1783 fprintf(stderr, " Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n", i, desc->name, offset);
1785 ac_surface_print_info(stderr, &device->physical_device->rad_info, surf);
1790 radv_select_modifier(const struct radv_device *dev, VkFormat format,
1791 const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list)
1793 const struct radv_physical_device *pdev = dev->physical_device;
1796 assert(mod_list->drmFormatModifierCount);
1798 /* We can allow everything here as it does not affect order and the application
1799 * is only allowed to specify modifiers that we support. */
1800 const struct ac_modifier_options modifier_options = {
1805 ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format),
1808 uint64_t *mods = calloc(mod_count, sizeof(*mods));
1810 /* If allocations fail, fall back to a dumber solution. */
1812 return mod_list->pDrmFormatModifiers[0];
1814 ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format),
1817 for (unsigned i = 0; i < mod_count; ++i) {
1818 for (uint32_t j = 0; j < mod_list->drmFormatModifierCount; ++j) {
1819 if (mods[i] == mod_list->pDrmFormatModifiers[j]) {
1821 return mod_list->pDrmFormatModifiers[j];
1825 unreachable("App specified an invalid modifier");
1829 radv_image_create(VkDevice _device, const struct radv_image_create_info *create_info,
1830 const VkAllocationCallbacks *alloc, VkImage *pImage)
1832 RADV_FROM_HANDLE(radv_device, device, _device);
1833 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1834 uint64_t modifier = DRM_FORMAT_MOD_INVALID;
1835 struct radv_image *image = NULL;
1836 VkFormat format = radv_select_android_external_format(pCreateInfo->pNext, pCreateInfo->format);
1837 const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list =
1838 vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
1839 const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod =
1840 vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT);
1841 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1843 unsigned plane_count = radv_get_internal_plane_count(device->physical_device, format);
1845 const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1847 radv_assert(pCreateInfo->mipLevels > 0);
1848 radv_assert(pCreateInfo->arrayLayers > 0);
1849 radv_assert(pCreateInfo->samples > 0);
1850 radv_assert(pCreateInfo->extent.width > 0);
1851 radv_assert(pCreateInfo->extent.height > 0);
1852 radv_assert(pCreateInfo->extent.depth > 0);
1855 vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1857 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1859 vk_image_init(&device->vk, &image->vk, pCreateInfo);
1861 image->info.width = pCreateInfo->extent.width;
1862 image->info.height = pCreateInfo->extent.height;
1863 image->info.depth = pCreateInfo->extent.depth;
1864 image->info.samples = pCreateInfo->samples;
1865 image->info.storage_samples = pCreateInfo->samples;
1866 image->info.array_size = pCreateInfo->arrayLayers;
1867 image->info.levels = pCreateInfo->mipLevels;
1868 image->info.num_channels = vk_format_get_nr_components(format);
1870 image->plane_count = vk_format_get_plane_count(format);
1871 image->disjoint = image->plane_count > 1 && pCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT;
1873 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1874 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1875 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1876 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1877 pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1878 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1880 image->queue_family_mask |= 1u << vk_queue_to_radv(device->physical_device,
1881 pCreateInfo->pQueueFamilyIndices[i]);
1884 const VkExternalMemoryImageCreateInfo *external_info =
1885 vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO);
1887 image->shareable = external_info;
1888 if (!vk_format_is_depth_or_stencil(format) && !image->shareable &&
1889 !(image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_ALIASED_BIT) &&
1890 pCreateInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
1891 image->info.surf_index = &device->image_mrt_offset_counter;
1895 modifier = radv_select_modifier(device, format, mod_list);
1896 else if (explicit_mod)
1897 modifier = explicit_mod->drmFormatModifier;
1899 for (unsigned plane = 0; plane < plane_count; ++plane) {
1900 image->planes[plane].surface.flags =
1901 radv_get_surface_flags(device, image, plane, pCreateInfo, format);
1902 image->planes[plane].surface.modifier = modifier;
1906 external_info && (external_info->handleTypes &
1907 VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
1910 *pImage = radv_image_to_handle(image);
1911 assert(!(image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1915 VkResult result = radv_image_create_layout(device, *create_info, explicit_mod, image);
1916 if (result != VK_SUCCESS) {
1917 radv_destroy_image(device, alloc, image);
1921 if (image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1922 image->alignment = MAX2(image->alignment, 4096);
1923 image->size = align64(image->size, image->alignment);
1924 image->bindings[0].offset = 0;
1927 device->ws->buffer_create(device->ws, image->size, image->alignment, 0,
1928 RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL, 0,
1929 &image->bindings[0].bo);
1930 if (result != VK_SUCCESS) {
1931 radv_destroy_image(device, alloc, image);
1932 return vk_error(device, result);
1936 if (device->instance->debug_flags & RADV_DEBUG_IMG) {
1937 radv_image_print_info(device, image);
1940 *pImage = radv_image_to_handle(image);
1946 compute_non_block_compressed_view(const struct radv_device *device,
1947 const struct radv_image_view *iview,
1948 struct ac_surf_nbc_view *nbc_view)
1950 const struct radv_image *image = iview->image;
1951 const struct radeon_surf *surf = &image->planes[0].surface;
1952 struct ac_addrlib *addrlib = device->ws->get_addrlib(device->ws);
1954 ac_surface_compute_nbc_view(addrlib, &device->physical_device->rad_info, surf, &image->info,
1955 iview->vk.base_mip_level, iview->vk.base_array_layer, nbc_view);
1959 radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_device *device,
1960 VkFormat vk_format, const VkComponentMapping *components,
1962 bool is_storage_image, bool disable_compression,
1963 bool enable_compression, unsigned plane_id,
1964 unsigned descriptor_plane_id, VkImageCreateFlags img_create_flags,
1965 const struct ac_surf_nbc_view *nbc_view)
1967 struct radv_image *image = iview->image;
1968 struct radv_image_plane *plane = &image->planes[plane_id];
1969 bool is_stencil = iview->vk.aspects == VK_IMAGE_ASPECT_STENCIL_BIT;
1971 union radv_descriptor *descriptor;
1972 uint32_t hw_level = 0;
1974 if (is_storage_image) {
1975 descriptor = &iview->storage_descriptor;
1977 descriptor = &iview->descriptor;
1980 assert(vk_format_get_plane_count(vk_format) == 1);
1981 assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
1982 blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) *
1983 vk_format_get_blockwidth(vk_format);
1985 if (device->physical_device->rad_info.gfx_level >= GFX9) {
1986 hw_level = iview->vk.base_mip_level;
1987 if (nbc_view->valid) {
1988 hw_level = nbc_view->level;
1989 iview->extent.width = nbc_view->width;
1990 iview->extent.height = nbc_view->height;
1994 radv_make_texture_descriptor(
1995 device, image, is_storage_image, iview->vk.view_type, vk_format, components, hw_level,
1996 hw_level + iview->vk.level_count - 1, iview->vk.base_array_layer,
1997 iview->vk.base_array_layer + iview->vk.layer_count - 1,
1998 vk_format_get_plane_width(image->vk.format, plane_id, iview->extent.width),
1999 vk_format_get_plane_height(image->vk.format, plane_id, iview->extent.height),
2000 iview->extent.depth, min_lod, descriptor->plane_descriptors[descriptor_plane_id],
2001 descriptor_plane_id || is_storage_image ? NULL : descriptor->fmask_descriptor,
2002 img_create_flags, nbc_view);
2004 const struct legacy_surf_level *base_level_info = NULL;
2005 if (device->physical_device->rad_info.gfx_level <= GFX9) {
2007 base_level_info = &plane->surface.u.legacy.zs.stencil_level[iview->vk.base_mip_level];
2009 base_level_info = &plane->surface.u.legacy.level[iview->vk.base_mip_level];
2012 bool enable_write_compression = radv_image_use_dcc_image_stores(device, image);
2013 if (is_storage_image && !(enable_write_compression || enable_compression))
2014 disable_compression = true;
2015 si_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, iview->vk.base_mip_level,
2016 iview->vk.base_mip_level, blk_w, is_stencil, is_storage_image,
2017 disable_compression, enable_write_compression,
2018 descriptor->plane_descriptors[descriptor_plane_id], nbc_view);
2022 radv_plane_from_aspect(VkImageAspectFlags mask)
2025 case VK_IMAGE_ASPECT_PLANE_1_BIT:
2026 case VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT:
2028 case VK_IMAGE_ASPECT_PLANE_2_BIT:
2029 case VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT:
2031 case VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT:
2039 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
2042 case VK_IMAGE_ASPECT_PLANE_0_BIT:
2043 return image->planes[0].format;
2044 case VK_IMAGE_ASPECT_PLANE_1_BIT:
2045 return image->planes[1].format;
2046 case VK_IMAGE_ASPECT_PLANE_2_BIT:
2047 return image->planes[2].format;
2048 case VK_IMAGE_ASPECT_STENCIL_BIT:
2049 return vk_format_stencil_only(image->vk.format);
2050 case VK_IMAGE_ASPECT_DEPTH_BIT:
2051 return vk_format_depth_only(image->vk.format);
2052 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
2053 return vk_format_depth_only(image->vk.format);
2055 return image->vk.format;
2060 * Determine if the given image view can be fast cleared.
2063 radv_image_view_can_fast_clear(const struct radv_device *device,
2064 const struct radv_image_view *iview)
2066 struct radv_image *image;
2070 image = iview->image;
2072 /* Only fast clear if the image itself can be fast cleared. */
2073 if (!radv_image_can_fast_clear(device, image))
2076 /* Only fast clear if all layers are bound. */
2077 if (iview->vk.base_array_layer > 0 || iview->vk.layer_count != image->info.array_size)
2080 /* Only fast clear if the view covers the whole image. */
2081 if (!radv_image_extent_compare(image, &iview->extent))
2088 radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
2089 const VkImageViewCreateInfo *pCreateInfo,
2090 VkImageCreateFlags img_create_flags,
2091 const struct radv_image_view_extra_create_info *extra_create_info)
2093 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
2094 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
2095 uint32_t plane_count = 1;
2096 float min_lod = 0.0f;
2097 struct ac_surf_nbc_view nbc_view = {0};
2099 const struct VkImageViewMinLodCreateInfoEXT *min_lod_info =
2100 vk_find_struct_const(pCreateInfo->pNext, IMAGE_VIEW_MIN_LOD_CREATE_INFO_EXT);
2103 min_lod = min_lod_info->minLod;
2105 bool from_client = extra_create_info && extra_create_info->from_client;
2106 vk_image_view_init(&device->vk, &iview->vk, !from_client, pCreateInfo);
2108 switch (image->vk.image_type) {
2109 case VK_IMAGE_TYPE_1D:
2110 case VK_IMAGE_TYPE_2D:
2111 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <=
2112 image->info.array_size);
2114 case VK_IMAGE_TYPE_3D:
2115 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <=
2116 radv_minify(image->info.depth, range->baseMipLevel));
2119 unreachable("bad VkImageType");
2121 iview->image = image;
2122 iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
2124 /* If the image has an Android external format, pCreateInfo->format will be
2125 * VK_FORMAT_UNDEFINED. */
2126 if (iview->vk.format == VK_FORMAT_UNDEFINED) {
2127 iview->vk.format = image->vk.format;
2128 iview->vk.view_format = image->vk.format;
2131 /* Split out the right aspect. Note that for internal meta code we sometimes
2132 * use an equivalent color format for the aspect so we first have to check
2133 * if we actually got depth/stencil formats. */
2134 if (iview->vk.aspects == VK_IMAGE_ASPECT_STENCIL_BIT) {
2135 if (vk_format_has_stencil(iview->vk.view_format))
2136 iview->vk.view_format = vk_format_stencil_only(iview->vk.view_format);
2137 } else if (iview->vk.aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
2138 if (vk_format_has_depth(iview->vk.view_format))
2139 iview->vk.view_format = vk_format_depth_only(iview->vk.view_format);
2142 if (vk_format_get_plane_count(image->vk.format) > 1 &&
2143 pCreateInfo->subresourceRange.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
2144 plane_count = vk_format_get_plane_count(iview->vk.format);
2147 if (device->physical_device->emulate_etc2 &&
2148 vk_format_description(image->vk.format)->layout == UTIL_FORMAT_LAYOUT_ETC) {
2149 const struct util_format_description *desc = vk_format_description(iview->vk.format);
2150 if (desc->layout == UTIL_FORMAT_LAYOUT_ETC) {
2151 iview->plane_id = 1;
2152 iview->vk.view_format = etc2_emulation_format(iview->vk.format);
2153 iview->vk.format = etc2_emulation_format(iview->vk.format);
2159 if (device->physical_device->rad_info.gfx_level >= GFX9) {
2160 iview->extent = (VkExtent3D){
2161 .width = image->info.width,
2162 .height = image->info.height,
2163 .depth = image->info.depth,
2166 iview->extent = iview->vk.extent;
2169 if (iview->vk.format != image->planes[iview->plane_id].format) {
2170 unsigned view_bw = vk_format_get_blockwidth(iview->vk.format);
2171 unsigned view_bh = vk_format_get_blockheight(iview->vk.format);
2172 unsigned img_bw = vk_format_get_blockwidth(image->planes[iview->plane_id].format);
2173 unsigned img_bh = vk_format_get_blockheight(image->planes[iview->plane_id].format);
2175 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
2176 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
2178 /* Comment ported from amdvlk -
2179 * If we have the following image:
2180 * Uncompressed pixels Compressed block sizes (4x4)
2181 * mip0: 22 x 22 6 x 6
2182 * mip1: 11 x 11 3 x 3
2187 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and
2188 * the HW is calculating the degradation of the block sizes down the mip-chain as follows
2189 * (straight-up divide-by-two integer math): mip0: 6x6 mip1: 3x3 mip2: 1x1 mip3: 1x1
2191 * This means that mip2 will be missing texels.
2193 * Fix this by calculating the base mip's width and height, then convert
2194 * that, and round it back up to get the level 0 size. Clamp the
2195 * converted size between the original values, and the physical extent
2196 * of the base mipmap.
2198 * On GFX10 we have to take care to not go over the physical extent
2199 * of the base mipmap as otherwise the GPU computes a different layout.
2200 * Note that the GPU does use the same base-mip dimensions for both a
2201 * block compatible format and the compressed format, so even if we take
2202 * the plain converted dimensions the physical layout is correct.
2204 if (device->physical_device->rad_info.gfx_level >= GFX9 &&
2205 vk_format_is_block_compressed(image->vk.format) &&
2206 !vk_format_is_block_compressed(iview->vk.format)) {
2207 /* If we have multiple levels in the view we should ideally take the last level,
2208 * but the mip calculation has a max(..., 1) so walking back to the base mip in an
2209 * useful way is hard. */
2210 if (iview->vk.level_count > 1) {
2211 iview->extent.width = iview->image->planes[0].surface.u.gfx9.base_mip_width;
2212 iview->extent.height = iview->image->planes[0].surface.u.gfx9.base_mip_height;
2214 unsigned lvl_width = radv_minify(image->info.width, range->baseMipLevel);
2215 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
2217 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
2218 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
2220 iview->extent.width = CLAMP(lvl_width << range->baseMipLevel, iview->extent.width,
2221 iview->image->planes[0].surface.u.gfx9.base_mip_width);
2222 iview->extent.height = CLAMP(lvl_height << range->baseMipLevel, iview->extent.height,
2223 iview->image->planes[0].surface.u.gfx9.base_mip_height);
2225 /* If the hardware-computed extent is still be too small, on GFX10
2226 * we can attempt another workaround provided by addrlib that
2227 * changes the descriptor's base level, and adjusts the address and
2228 * extents accordingly.
2230 if (device->physical_device->rad_info.gfx_level >= GFX10 &&
2231 (radv_minify(iview->extent.width, range->baseMipLevel) < lvl_width ||
2232 radv_minify(iview->extent.height, range->baseMipLevel) < lvl_height) &&
2233 iview->vk.layer_count == 1) {
2234 compute_non_block_compressed_view(device, iview, &nbc_view);
2240 iview->support_fast_clear = radv_image_view_can_fast_clear(device, iview);
2241 iview->disable_dcc_mrt = extra_create_info ? extra_create_info->disable_dcc_mrt : false;
2243 bool disable_compression = extra_create_info ? extra_create_info->disable_compression : false;
2244 bool enable_compression = extra_create_info ? extra_create_info->enable_compression : false;
2245 for (unsigned i = 0; i < plane_count; ++i) {
2246 VkFormat format = vk_format_get_plane_format(iview->vk.view_format, i);
2247 radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, min_lod, false,
2248 disable_compression, enable_compression, iview->plane_id + i,
2249 i, img_create_flags, &nbc_view);
2250 radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, min_lod, true,
2251 disable_compression, enable_compression, iview->plane_id + i,
2252 i, img_create_flags, &nbc_view);
2257 radv_image_view_finish(struct radv_image_view *iview)
2259 vk_image_view_finish(&iview->vk);
2263 radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image,
2264 VkImageLayout layout, unsigned queue_mask)
2267 case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
2268 case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL:
2269 case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL:
2270 case VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL:
2271 return radv_image_has_htile(image);
2272 case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
2273 return radv_image_is_tc_compat_htile(image) ||
2274 (radv_image_has_htile(image) && queue_mask == (1u << RADV_QUEUE_GENERAL));
2275 case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR:
2276 case VK_IMAGE_LAYOUT_GENERAL:
2277 /* It should be safe to enable TC-compat HTILE with
2278 * VK_IMAGE_LAYOUT_GENERAL if we are not in a render loop and
2279 * if the image doesn't have the storage bit set. This
2280 * improves performance for apps that use GENERAL for the main
2281 * depth pass because this allows compression and this reduces
2282 * the number of decompressions from/to GENERAL.
2284 /* FIXME: Enabling TC-compat HTILE in GENERAL on the compute
2285 * queue is likely broken for eg. depth/stencil copies.
2287 if (radv_image_is_tc_compat_htile(image) && queue_mask & (1u << RADV_QUEUE_GENERAL) &&
2288 !device->instance->disable_tc_compat_htile_in_general) {
2293 case VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT:
2294 /* Do not compress HTILE with feedback loops because we can't read&write it without
2295 * introducing corruption.
2298 case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
2299 if (radv_image_is_tc_compat_htile(image) ||
2300 (radv_image_has_htile(image) &&
2301 !(image->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
2302 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)))) {
2303 /* Keep HTILE compressed if the image is only going to
2304 * be used as a depth/stencil read-only attachment.
2312 return radv_image_is_tc_compat_htile(image);
2317 radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image,
2318 unsigned level, VkImageLayout layout,
2319 unsigned queue_mask)
2321 if (radv_dcc_enabled(image, level) &&
2322 !radv_layout_dcc_compressed(device, image, level, layout, queue_mask))
2325 if (!(image->vk.usage & RADV_IMAGE_USAGE_WRITE_BITS))
2328 if (layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
2329 layout != VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL)
2332 /* Exclusive images with CMASK or DCC can always be fast-cleared on the gfx queue. Concurrent
2333 * images can only be fast-cleared if comp-to-single is supported because we don't yet support
2334 * FCE on the compute queue.
2336 return queue_mask == (1u << RADV_QUEUE_GENERAL) || radv_image_use_comp_to_single(device, image);
2340 radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image,
2341 unsigned level, VkImageLayout layout, unsigned queue_mask)
2343 if (!radv_dcc_enabled(image, level))
2346 if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT &&
2347 queue_mask & (1u << RADV_QUEUE_FOREIGN))
2350 /* If the image is read-only, we can always just keep it compressed */
2351 if (!(image->vk.usage & RADV_IMAGE_USAGE_WRITE_BITS))
2354 /* Don't compress compute transfer dst when image stores are not supported. */
2355 if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || layout == VK_IMAGE_LAYOUT_GENERAL) &&
2356 (queue_mask & (1u << RADV_QUEUE_COMPUTE)) && !radv_image_use_dcc_image_stores(device, image))
2359 if (layout == VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT) {
2360 /* Do not compress DCC with feedback loops because we can't read&write it without introducing
2366 return device->physical_device->rad_info.gfx_level >= GFX10 || layout != VK_IMAGE_LAYOUT_GENERAL;
2370 radv_layout_fmask_compressed(const struct radv_device *device, const struct radv_image *image,
2371 VkImageLayout layout, unsigned queue_mask)
2373 if (!radv_image_has_fmask(image))
2376 /* Don't compress compute transfer dst because image stores ignore FMASK and it needs to be
2379 if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || layout == VK_IMAGE_LAYOUT_GENERAL) &&
2380 (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
2383 /* Only compress concurrent images if TC-compat CMASK is enabled (no FMASK decompression). */
2384 return layout != VK_IMAGE_LAYOUT_GENERAL &&
2385 (queue_mask == (1u << RADV_QUEUE_GENERAL) || radv_image_is_tc_compat_cmask(image));
2389 radv_image_queue_family_mask(const struct radv_image *image,
2390 enum radv_queue_family family,
2391 enum radv_queue_family queue_family)
2393 if (!image->exclusive)
2394 return image->queue_family_mask;
2395 if (family == RADV_QUEUE_FOREIGN)
2396 return ((1u << RADV_MAX_QUEUE_FAMILIES) - 1u) | (1u << RADV_QUEUE_FOREIGN);
2397 if (family == RADV_QUEUE_IGNORED)
2398 return 1u << queue_family;
2399 return 1u << family;
2402 VKAPI_ATTR VkResult VKAPI_CALL
2403 radv_CreateImage(VkDevice _device, const VkImageCreateInfo *pCreateInfo,
2404 const VkAllocationCallbacks *pAllocator, VkImage *pImage)
2407 const VkNativeBufferANDROID *gralloc_info =
2408 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
2411 return radv_image_from_gralloc(_device, pCreateInfo, gralloc_info, pAllocator, pImage);
2414 #ifdef RADV_USE_WSI_PLATFORM
2415 /* Ignore swapchain creation info on Android. Since we don't have an implementation in Mesa,
2416 * we're guaranteed to access an Android object incorrectly.
2418 RADV_FROM_HANDLE(radv_device, device, _device);
2419 const VkImageSwapchainCreateInfoKHR *swapchain_info =
2420 vk_find_struct_const(pCreateInfo->pNext, IMAGE_SWAPCHAIN_CREATE_INFO_KHR);
2421 if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) {
2422 return wsi_common_create_swapchain_image(device->physical_device->vk.wsi_device,
2424 swapchain_info->swapchain,
2429 const struct wsi_image_create_info *wsi_info =
2430 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
2431 bool scanout = wsi_info && wsi_info->scanout;
2432 bool prime_blit_src = wsi_info && wsi_info->buffer_blit_src;
2434 return radv_image_create(_device,
2435 &(struct radv_image_create_info){
2436 .vk_info = pCreateInfo,
2438 .prime_blit_src = prime_blit_src,
2440 pAllocator, pImage);
2443 VKAPI_ATTR void VKAPI_CALL
2444 radv_DestroyImage(VkDevice _device, VkImage _image, const VkAllocationCallbacks *pAllocator)
2446 RADV_FROM_HANDLE(radv_device, device, _device);
2447 RADV_FROM_HANDLE(radv_image, image, _image);
2452 radv_destroy_image(device, pAllocator, image);
2455 VKAPI_ATTR void VKAPI_CALL
2456 radv_GetImageSubresourceLayout(VkDevice _device, VkImage _image,
2457 const VkImageSubresource *pSubresource, VkSubresourceLayout *pLayout)
2459 RADV_FROM_HANDLE(radv_image, image, _image);
2460 RADV_FROM_HANDLE(radv_device, device, _device);
2461 int level = pSubresource->mipLevel;
2462 int layer = pSubresource->arrayLayer;
2464 unsigned plane_id = 0;
2465 if (vk_format_get_plane_count(image->vk.format) > 1)
2466 plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
2468 struct radv_image_plane *plane = &image->planes[plane_id];
2469 struct radeon_surf *surface = &plane->surface;
2471 if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
2472 unsigned mem_plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
2477 pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level,
2478 surface, mem_plane_id, 0);
2479 pLayout->rowPitch = ac_surface_get_plane_stride(device->physical_device->rad_info.gfx_level,
2480 surface, mem_plane_id, level);
2481 pLayout->arrayPitch = 0;
2482 pLayout->depthPitch = 0;
2483 pLayout->size = ac_surface_get_plane_size(surface, mem_plane_id);
2484 } else if (device->physical_device->rad_info.gfx_level >= GFX9) {
2485 uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
2487 pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level,
2488 &plane->surface, 0, layer) +
2490 if (image->vk.format == VK_FORMAT_R32G32B32_UINT ||
2491 image->vk.format == VK_FORMAT_R32G32B32_SINT ||
2492 image->vk.format == VK_FORMAT_R32G32B32_SFLOAT) {
2493 /* Adjust the number of bytes between each row because
2494 * the pitch is actually the number of components per
2497 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
2500 surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
2502 assert(util_is_power_of_two_nonzero(surface->bpe));
2503 pLayout->rowPitch = pitch * surface->bpe;
2506 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
2507 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
2508 pLayout->size = surface->u.gfx9.surf_slice_size;
2509 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
2510 pLayout->size *= u_minify(image->info.depth, level);
2512 pLayout->offset = (uint64_t)surface->u.legacy.level[level].offset_256B * 256 +
2513 (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
2514 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
2515 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
2516 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
2517 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
2518 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
2519 pLayout->size *= u_minify(image->info.depth, level);
2523 VKAPI_ATTR VkResult VKAPI_CALL
2524 radv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device, VkImage _image,
2525 VkImageDrmFormatModifierPropertiesEXT *pProperties)
2527 RADV_FROM_HANDLE(radv_image, image, _image);
2529 pProperties->drmFormatModifier = image->planes[0].surface.modifier;
2533 VKAPI_ATTR VkResult VKAPI_CALL
2534 radv_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo,
2535 const VkAllocationCallbacks *pAllocator, VkImageView *pView)
2537 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
2538 RADV_FROM_HANDLE(radv_device, device, _device);
2539 struct radv_image_view *view;
2542 vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2544 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2546 radv_image_view_init(view, device, pCreateInfo, image->vk.create_flags,
2547 &(struct radv_image_view_extra_create_info){.from_client = true});
2549 *pView = radv_image_view_to_handle(view);
2554 VKAPI_ATTR void VKAPI_CALL
2555 radv_DestroyImageView(VkDevice _device, VkImageView _iview, const VkAllocationCallbacks *pAllocator)
2557 RADV_FROM_HANDLE(radv_device, device, _device);
2558 RADV_FROM_HANDLE(radv_image_view, iview, _iview);
2563 radv_image_view_finish(iview);
2564 vk_free2(&device->vk.alloc, pAllocator, iview);
2568 radv_buffer_view_init(struct radv_buffer_view *view, struct radv_device *device,
2569 const VkBufferViewCreateInfo *pCreateInfo)
2571 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
2572 uint64_t va = radv_buffer_get_va(buffer->bo) + buffer->offset;
2574 vk_object_base_init(&device->vk, &view->base, VK_OBJECT_TYPE_BUFFER_VIEW);
2576 view->bo = buffer->bo;
2577 view->range = vk_buffer_range(&buffer->vk, pCreateInfo->offset, pCreateInfo->range);
2579 radv_make_texel_buffer_descriptor(device, va, pCreateInfo->format, pCreateInfo->offset,
2580 view->range, view->state);
2584 radv_buffer_view_finish(struct radv_buffer_view *view)
2586 vk_object_base_finish(&view->base);
2589 VKAPI_ATTR VkResult VKAPI_CALL
2590 radv_CreateBufferView(VkDevice _device, const VkBufferViewCreateInfo *pCreateInfo,
2591 const VkAllocationCallbacks *pAllocator, VkBufferView *pView)
2593 RADV_FROM_HANDLE(radv_device, device, _device);
2594 struct radv_buffer_view *view;
2597 vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2599 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2601 radv_buffer_view_init(view, device, pCreateInfo);
2603 *pView = radv_buffer_view_to_handle(view);
2608 VKAPI_ATTR void VKAPI_CALL
2609 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
2610 const VkAllocationCallbacks *pAllocator)
2612 RADV_FROM_HANDLE(radv_device, device, _device);
2613 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
2618 radv_buffer_view_finish(view);
2619 vk_free2(&device->vk.alloc, pAllocator, view);