nvk: add support for D32_SFLOAT_S8_UINT
authorDave Airlie <airlied@redhat.com>
Thu, 13 Jul 2023 21:13:29 +0000 (16:13 -0500)
committerMarge Bot <emma+marge@anholt.net>
Fri, 4 Aug 2023 21:32:05 +0000 (21:32 +0000)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24326>

src/nouveau/vulkan/nvk_cmd_copy.c
src/nouveau/vulkan/nvk_image.c
src/nouveau/vulkan/nvk_image.h

index e895ef1..6d85b88 100644 (file)
@@ -379,8 +379,40 @@ nvk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
          .extent_el = nil_extent4d_px_to_el(extent4d_px, dst->planes[plane].nil.format,
                                             dst->planes[plane].nil.sample_layout),
       };
+      struct nouveau_copy copy2 = { 0 };
 
       switch (dst->vk.format) {
+      case VK_FORMAT_D32_SFLOAT_S8_UINT:
+         if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
+            copy.remap.comp_size = 4;
+            copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
+            copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
+            copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
+            copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
+            //copy.dst.bpp = 8;
+         } else {
+            assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
+            copy2.dst = copy.dst;
+            copy2.extent_el = copy.extent_el;
+            copy.dst = copy2.src =
+               nouveau_copy_rect_image(dst, &dst->stencil_copy_temp,
+                                       region->imageOffset,
+                                       &region->imageSubresource);
+
+            copy.remap.comp_size = 1;
+            copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
+            copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
+            copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
+            copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
+
+            copy2.remap.comp_size = 2;
+            copy2.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE;
+            copy2.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
+            copy2.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X;
+            copy2.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
+            //copy2.dst.bpp = 8;
+         }
+         break;
       case VK_FORMAT_D24_UNORM_S8_UINT:
          if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
             copy.remap.comp_size = 1;
@@ -403,6 +435,8 @@ nvk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
       }
 
       nouveau_copy_rect(cmd, &copy);
+      if (copy2.extent_el.w > 0)
+         nouveau_copy_rect(cmd, &copy2);
 
       vk_foreach_struct_const(ext, region->pNext) {
          switch (ext->sType) {
@@ -452,8 +486,40 @@ nvk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
          .extent_el = nil_extent4d_px_to_el(extent4d_px, src->planes[plane].nil.format,
                                             src->planes[plane].nil.sample_layout),
       };
+      struct nouveau_copy copy2 = { 0 };
 
       switch (src->vk.format) {
+      case VK_FORMAT_D32_SFLOAT_S8_UINT:
+         if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
+            copy.remap.comp_size = 4;
+            copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
+            copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
+            copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
+            copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
+            //copy.src.bpp = 8;
+         } else {
+            assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
+            copy2.dst = copy.dst;
+            copy2.extent_el = copy.extent_el;
+            copy.dst = copy2.src =
+               nouveau_copy_rect_image(src, &src->stencil_copy_temp,
+                                       region->imageOffset,
+                                       &region->imageSubresource);
+
+            copy.remap.comp_size = 2;
+            copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z;
+            copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
+            copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
+            copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
+            //copy.src.bpp = 8;
+
+            copy2.remap.comp_size = 1;
+            copy2.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
+            copy2.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
+            copy2.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
+            copy2.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
+         }
+         break;
       case VK_FORMAT_D24_UNORM_S8_UINT:
          if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
             copy.remap.comp_size = 1;
@@ -476,6 +542,8 @@ nvk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
       }
 
       nouveau_copy_rect(cmd, &copy);
+      if (copy2.extent_el.w > 0)
+         nouveau_copy_rect(cmd, &copy2);
 
       vk_foreach_struct_const(ext, region->pNext) {
          switch (ext->sType) {
index 1853b1a..eed1e5d 100644 (file)
@@ -54,9 +54,6 @@ nvk_get_image_format_features(struct nvk_physical_device *pdev,
    }
 
    if (vk_format_is_depth_or_stencil(vk_format)) {
-      if (vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
-         return 0; /* TODO */
-
       if (!nil_format_supports_depth_stencil(&pdev->info, p_format))
          return 0;
 
@@ -281,6 +278,27 @@ nvk_image_init(struct nvk_device *dev,
       assert(ok);
    }
 
+   if (image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
+      struct nil_image_init_info stencil_nil_info = {
+         .dim = vk_image_type_to_nil_dim(pCreateInfo->imageType),
+         .format = PIPE_FORMAT_R32_UINT,
+         .extent_px = {
+            .w = pCreateInfo->extent.width,
+            .h = pCreateInfo->extent.height,
+            .d = pCreateInfo->extent.depth,
+            .a = pCreateInfo->arrayLayers,
+         },
+         .levels = pCreateInfo->mipLevels,
+         .samples = pCreateInfo->samples,
+         .usage = usage,
+      };
+
+      ASSERTED bool ok = nil_image_init(&nvk_device_physical(dev)->info,
+                                        &image->stencil_copy_temp.nil,
+                                        &stencil_nil_info);
+      assert(ok);
+   }
+
    return VK_SUCCESS;
 }
 
@@ -293,6 +311,9 @@ nvk_image_finish(struct nvk_device *dev, struct nvk_image *image,
          nvk_free_memory(dev, image->planes[plane].internal, pAllocator);
    }
 
+   if (image->stencil_copy_temp.internal)
+      nvk_free_memory(dev, image->stencil_copy_temp.internal, pAllocator);
+
    vk_image_finish(&image->vk);
 }
 
@@ -357,6 +378,15 @@ nvk_CreateImage(VkDevice device,
       }
    }
 
+   if (image->stencil_copy_temp.nil.size_B > 0) {
+      result = nvk_image_plane_alloc_internal(dev, &image->stencil_copy_temp,
+                                              pAllocator);
+      if (result != VK_SUCCESS) {
+         nvk_image_finish(dev, image, pAllocator);
+         vk_free2(&dev->vk.alloc, pAllocator, image);
+         return result;
+      }
+   }
 
    *pImage = nvk_image_to_handle(image);
 
@@ -374,7 +404,6 @@ nvk_DestroyImage(VkDevice device,
    if (!image)
       return;
 
-
    nvk_image_finish(dev, image, pAllocator);
    vk_free2(&dev->vk.alloc, pAllocator, image);
 }
@@ -418,6 +447,9 @@ nvk_GetImageMemoryRequirements2(VkDevice device,
          nvk_image_plane_add_req(&image->planes[plane], &size_B, &align_B);
    }
 
+   if (image->stencil_copy_temp.nil.size_B > 0)
+      nvk_image_plane_add_req(&image->stencil_copy_temp, &size_B, &align_B);
+
    pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
    pMemoryRequirements->memoryRequirements.alignment = align_B;
    pMemoryRequirements->memoryRequirements.size = size_B;
@@ -533,6 +565,9 @@ nvk_BindImageMemory2(VkDevice device,
             nvk_image_plane_bind(&image->planes[plane], mem, &offset_B);
          }
       }
+
+      if (image->stencil_copy_temp.nil.size_B > 0)
+         nvk_image_plane_bind(&image->stencil_copy_temp, mem, &offset_B);
    }
 
    return VK_SUCCESS;
index f243431..2e73197 100644 (file)
@@ -34,6 +34,14 @@ struct nvk_image {
 
    uint8_t plane_count;
    struct nvk_image_plane planes[3];
+
+   /* In order to support D32_SFLOAT_S8_UINT, a temp area is
+    * needed. The stencil plane can't be a copied using the DMA
+    * engine in a single pass since it would need 8 components support.
+    * Instead we allocate a 16-bit temp, that gets copied into, then
+    * copied again down to the 8-bit result.
+    */
+   struct nvk_image_plane stencil_copy_temp;
 };
 
 VK_DEFINE_NONDISP_HANDLE_CASTS(nvk_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)