From 3572f5cd7e273f101fa36c98dc73b52d5b6be567 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 13 Jul 2023 16:13:29 -0500 Subject: [PATCH] nvk: add support for D32_SFLOAT_S8_UINT Part-of: --- src/nouveau/vulkan/nvk_cmd_copy.c | 68 +++++++++++++++++++++++++++++++++++++++ src/nouveau/vulkan/nvk_image.c | 43 ++++++++++++++++++++++--- src/nouveau/vulkan/nvk_image.h | 8 +++++ 3 files changed, 115 insertions(+), 4 deletions(-) diff --git a/src/nouveau/vulkan/nvk_cmd_copy.c b/src/nouveau/vulkan/nvk_cmd_copy.c index e895ef1..6d85b88 100644 --- a/src/nouveau/vulkan/nvk_cmd_copy.c +++ b/src/nouveau/vulkan/nvk_cmd_copy.c @@ -379,8 +379,40 @@ nvk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, .extent_el = nil_extent4d_px_to_el(extent4d_px, dst->planes[plane].nil.format, dst->planes[plane].nil.sample_layout), }; + struct nouveau_copy copy2 = { 0 }; switch (dst->vk.format) { + case VK_FORMAT_D32_SFLOAT_S8_UINT: + if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) { + copy.remap.comp_size = 4; + copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X; + copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE; + copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE; + copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE; + //copy.dst.bpp = 8; + } else { + assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT); + copy2.dst = copy.dst; + copy2.extent_el = copy.extent_el; + copy.dst = copy2.src = + nouveau_copy_rect_image(dst, &dst->stencil_copy_temp, + region->imageOffset, + ®ion->imageSubresource); + + copy.remap.comp_size = 1; + copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X; + copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE; + copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE; + copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE; + + copy2.remap.comp_size = 2; + copy2.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE; + copy2.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE; + copy2.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X; + copy2.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE; + //copy2.dst.bpp = 8; + } + break; case VK_FORMAT_D24_UNORM_S8_UINT: if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) { copy.remap.comp_size = 1; @@ -403,6 +435,8 @@ nvk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, } nouveau_copy_rect(cmd, ©); + if (copy2.extent_el.w > 0) + nouveau_copy_rect(cmd, ©2); vk_foreach_struct_const(ext, region->pNext) { switch (ext->sType) { @@ -452,8 +486,40 @@ nvk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, .extent_el = nil_extent4d_px_to_el(extent4d_px, src->planes[plane].nil.format, src->planes[plane].nil.sample_layout), }; + struct nouveau_copy copy2 = { 0 }; switch (src->vk.format) { + case VK_FORMAT_D32_SFLOAT_S8_UINT: + if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) { + copy.remap.comp_size = 4; + copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X; + copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE; + copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE; + copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE; + //copy.src.bpp = 8; + } else { + assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT); + copy2.dst = copy.dst; + copy2.extent_el = copy.extent_el; + copy.dst = copy2.src = + nouveau_copy_rect_image(src, &src->stencil_copy_temp, + region->imageOffset, + ®ion->imageSubresource); + + copy.remap.comp_size = 2; + copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z; + copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE; + copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE; + copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE; + //copy.src.bpp = 8; + + copy2.remap.comp_size = 1; + copy2.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X; + copy2.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE; + copy2.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE; + copy2.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE; + } + break; case VK_FORMAT_D24_UNORM_S8_UINT: if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) { copy.remap.comp_size = 1; @@ -476,6 +542,8 @@ nvk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, } nouveau_copy_rect(cmd, ©); + if (copy2.extent_el.w > 0) + nouveau_copy_rect(cmd, ©2); vk_foreach_struct_const(ext, region->pNext) { switch (ext->sType) { diff --git a/src/nouveau/vulkan/nvk_image.c b/src/nouveau/vulkan/nvk_image.c index 1853b1a..eed1e5d 100644 --- a/src/nouveau/vulkan/nvk_image.c +++ b/src/nouveau/vulkan/nvk_image.c @@ -54,9 +54,6 @@ nvk_get_image_format_features(struct nvk_physical_device *pdev, } if (vk_format_is_depth_or_stencil(vk_format)) { - if (vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) - return 0; /* TODO */ - if (!nil_format_supports_depth_stencil(&pdev->info, p_format)) return 0; @@ -281,6 +278,27 @@ nvk_image_init(struct nvk_device *dev, assert(ok); } + if (image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { + struct nil_image_init_info stencil_nil_info = { + .dim = vk_image_type_to_nil_dim(pCreateInfo->imageType), + .format = PIPE_FORMAT_R32_UINT, + .extent_px = { + .w = pCreateInfo->extent.width, + .h = pCreateInfo->extent.height, + .d = pCreateInfo->extent.depth, + .a = pCreateInfo->arrayLayers, + }, + .levels = pCreateInfo->mipLevels, + .samples = pCreateInfo->samples, + .usage = usage, + }; + + ASSERTED bool ok = nil_image_init(&nvk_device_physical(dev)->info, + &image->stencil_copy_temp.nil, + &stencil_nil_info); + assert(ok); + } + return VK_SUCCESS; } @@ -293,6 +311,9 @@ nvk_image_finish(struct nvk_device *dev, struct nvk_image *image, nvk_free_memory(dev, image->planes[plane].internal, pAllocator); } + if (image->stencil_copy_temp.internal) + nvk_free_memory(dev, image->stencil_copy_temp.internal, pAllocator); + vk_image_finish(&image->vk); } @@ -357,6 +378,15 @@ nvk_CreateImage(VkDevice device, } } + if (image->stencil_copy_temp.nil.size_B > 0) { + result = nvk_image_plane_alloc_internal(dev, &image->stencil_copy_temp, + pAllocator); + if (result != VK_SUCCESS) { + nvk_image_finish(dev, image, pAllocator); + vk_free2(&dev->vk.alloc, pAllocator, image); + return result; + } + } *pImage = nvk_image_to_handle(image); @@ -374,7 +404,6 @@ nvk_DestroyImage(VkDevice device, if (!image) return; - nvk_image_finish(dev, image, pAllocator); vk_free2(&dev->vk.alloc, pAllocator, image); } @@ -418,6 +447,9 @@ nvk_GetImageMemoryRequirements2(VkDevice device, nvk_image_plane_add_req(&image->planes[plane], &size_B, &align_B); } + if (image->stencil_copy_temp.nil.size_B > 0) + nvk_image_plane_add_req(&image->stencil_copy_temp, &size_B, &align_B); + pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types; pMemoryRequirements->memoryRequirements.alignment = align_B; pMemoryRequirements->memoryRequirements.size = size_B; @@ -533,6 +565,9 @@ nvk_BindImageMemory2(VkDevice device, nvk_image_plane_bind(&image->planes[plane], mem, &offset_B); } } + + if (image->stencil_copy_temp.nil.size_B > 0) + nvk_image_plane_bind(&image->stencil_copy_temp, mem, &offset_B); } return VK_SUCCESS; diff --git a/src/nouveau/vulkan/nvk_image.h b/src/nouveau/vulkan/nvk_image.h index f243431..2e731970 100644 --- a/src/nouveau/vulkan/nvk_image.h +++ b/src/nouveau/vulkan/nvk_image.h @@ -34,6 +34,14 @@ struct nvk_image { uint8_t plane_count; struct nvk_image_plane planes[3]; + + /* In order to support D32_SFLOAT_S8_UINT, a temp area is + * needed. The stencil plane can't be a copied using the DMA + * engine in a single pass since it would need 8 components support. + * Instead we allocate a 16-bit temp, that gets copied into, then + * copied again down to the 8-bit result. + */ + struct nvk_image_plane stencil_copy_temp; }; VK_DEFINE_NONDISP_HANDLE_CASTS(nvk_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE) -- 2.7.4