tu: Implement sampling the fragment density map
authorConnor Abbott <cwabbott0@gmail.com>
Mon, 21 Nov 2022 13:34:31 +0000 (14:34 +0100)
committerMarge Bot <emma+marge@anholt.net>
Mon, 8 May 2023 19:59:26 +0000 (19:59 +0000)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20304>

src/freedreno/vulkan/tu_cmd_buffer.cc
src/freedreno/vulkan/tu_common.h
src/freedreno/vulkan/tu_device.cc
src/freedreno/vulkan/tu_formats.cc
src/freedreno/vulkan/tu_image.cc
src/freedreno/vulkan/tu_image.h

index 9e6bb57..dc9a633 100644 (file)
@@ -3892,7 +3892,8 @@ static enum tu_stage
 vk2tu_single_stage(VkPipelineStageFlags2 vk_stage, bool dst)
 {
    if (vk_stage == VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT ||
-       vk_stage == VK_PIPELINE_STAGE_2_CONDITIONAL_RENDERING_BIT_EXT)
+       vk_stage == VK_PIPELINE_STAGE_2_CONDITIONAL_RENDERING_BIT_EXT ||
+       vk_stage == VK_PIPELINE_STAGE_2_FRAGMENT_DENSITY_PROCESS_BIT_EXT)
       return TU_STAGE_CP;
 
    if (vk_stage == VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT ||
index 58ff178..48ec650 100644 (file)
 #define A6XX_TEX_CONST_DWORDS 16
 #define A6XX_TEX_SAMP_DWORDS 4
 
+/* We sample the fragment density map on the CPU, so technically the
+ * minimum/maximum texel size is arbitrary. However sizes smaller than the
+ * minimum tile width alignment of 32 are likely pointless, so we use that as
+ * the minimum value. For the maximum just pick a value larger than anyone
+ * would reasonably need.
+ */
+#define MIN_FDM_TEXEL_SIZE_LOG2 5
+#define MIN_FDM_TEXEL_SIZE (1u << MIN_FDM_TEXEL_SIZE_LOG2)
+#define MAX_FDM_TEXEL_SIZE_LOG2 10
+#define MAX_FDM_TEXEL_SIZE (1u << MAX_FDM_TEXEL_SIZE_LOG2)
+
 #define TU_FROM_HANDLE(__tu_type, __name, __handle)                          \
    VK_FROM_HANDLE(__tu_type, __name, __handle)
 
index 9bc016f..11a1070 100644 (file)
@@ -2870,10 +2870,12 @@ tu_BindBufferMemory2(VkDevice device,
 }
 
 VKAPI_ATTR VkResult VKAPI_CALL
-tu_BindImageMemory2(VkDevice device,
+tu_BindImageMemory2(VkDevice _device,
                     uint32_t bindInfoCount,
                     const VkBindImageMemoryInfo *pBindInfos)
 {
+   TU_FROM_HANDLE(tu_device, device, _device);
+
    for (uint32_t i = 0; i < bindInfoCount; ++i) {
       TU_FROM_HANDLE(tu_image, image, pBindInfos[i].image);
       TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory);
@@ -2881,8 +2883,21 @@ tu_BindImageMemory2(VkDevice device,
       if (mem) {
          image->bo = mem->bo;
          image->iova = mem->bo->iova + pBindInfos[i].memoryOffset;
+
+         if (image->vk.usage & VK_IMAGE_USAGE_FRAGMENT_DENSITY_MAP_BIT_EXT) {
+            if (!mem->bo->map) {
+               VkResult result = tu_bo_map(device, mem->bo);
+               if (result != VK_SUCCESS)
+                  return result;
+            }
+
+            image->map = (char *)mem->bo->map + pBindInfos[i].memoryOffset;
+         } else {
+            image->map = NULL;
+         }
       } else {
          image->bo = NULL;
+         image->map = NULL;
          image->iova = 0;
       }
    }
index 9c2c28b..bc38b26 100644 (file)
@@ -258,6 +258,17 @@ tu_physical_device_get_format_properties(
          if (physical_device->vk.supported_extensions.EXT_filter_cubic)
             optimal |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_CUBIC_BIT_EXT;
       }
+
+      /* We sample on the CPU so we can technically support anything as long
+       * as it's floating point, but this restricts it to "reasonable" formats
+       * to use, which means two channels and not something weird like
+       * luminance-alpha.
+       */
+      if (util_format_is_float(format) &&
+          desc->nr_channels == 2 && desc->swizzle[0] == PIPE_SWIZZLE_X &&
+          desc->swizzle[1] == PIPE_SWIZZLE_Y) {
+         optimal |= VK_FORMAT_FEATURE_FRAGMENT_DENSITY_MAP_BIT_EXT;
+      }
    }
 
    if (supported_color) {
index 69fe9e3..ef98afd 100644 (file)
@@ -207,6 +207,9 @@ tu_image_view_init(struct tu_device *device,
       layouts[2] = &image->layout[2];
    }
 
+   vk_component_mapping_to_pipe_swizzle(pCreateInfo->components,
+                                        iview->swizzle);
+
    struct fdl_view_args args = {};
    args.iova = image->iova;
    args.base_array_layer = range->baseArrayLayer;
@@ -457,6 +460,14 @@ tu_image_init(struct tu_device *device, struct tu_image *image,
       ubwc_enabled = false;
    }
 
+   /* Fragment density maps are sampled on the CPU and we don't support
+    * sampling tiled images on the CPU or UBWC at the moment.
+    */
+   if (pCreateInfo->usage & VK_IMAGE_USAGE_FRAGMENT_DENSITY_MAP_BIT_EXT) {
+      tile_mode = TILE6_LINEAR;
+      ubwc_enabled = false;
+   }
+
    enum pipe_format format =
       tu_vk_format_to_pipe_format(image->vk.format);
    /* Whether a view of the image with an R8G8 format could be made. */
@@ -942,3 +953,38 @@ tu_DestroyBufferView(VkDevice _device,
 
    vk_object_free(&device->vk, pAllocator, view);
 }
+
+/* Impelements the operations described in "Fragment Density Map Operations."
+ */
+void
+tu_fragment_density_map_sample(const struct tu_image_view *fdm,
+                               uint32_t x, uint32_t y,
+                               uint32_t width, uint32_t height,
+                               uint32_t layers,
+                               struct tu_frag_area *areas)
+{
+   assert(fdm->image->layout[0].tile_mode == TILE6_LINEAR);
+
+   uint32_t fdm_shift_x = util_logbase2_ceil(DIV_ROUND_UP(width, fdm->vk.extent.width));
+   uint32_t fdm_shift_y = util_logbase2_ceil(DIV_ROUND_UP(height, fdm->vk.extent.height));
+
+   fdm_shift_x = CLAMP(fdm_shift_x, MIN_FDM_TEXEL_SIZE_LOG2, MAX_FDM_TEXEL_SIZE_LOG2);
+   fdm_shift_y = CLAMP(fdm_shift_y, MIN_FDM_TEXEL_SIZE_LOG2, MAX_FDM_TEXEL_SIZE_LOG2);
+
+   uint32_t i = x >> fdm_shift_x;
+   uint32_t j = y >> fdm_shift_y;
+
+   unsigned cpp = fdm->image->layout[0].cpp;
+   unsigned pitch = fdm->view.pitch;
+
+   void *pixel = (char *)fdm->image->map + fdm->view.offset + cpp * i + pitch * j;
+   for (unsigned i = 0; i < layers; i++) {
+      float density_src[4], density[4];
+      util_format_unpack_rgba(fdm->view.format, density_src, pixel, 1);
+      pipe_swizzle_4f(density, density_src, fdm->swizzle);
+      areas[i].width = 1.0f / density[0];
+      areas[i].height = 1.0f / density[1];
+
+      pixel = (char *)pixel + fdm->view.layer_size;
+   }
+}
index fb22550..fbcb244 100644 (file)
@@ -34,6 +34,9 @@ struct tu_image
    struct tu_bo *bo;
    uint64_t iova;
 
+   /* For fragment density map */
+   void *map;
+
    uint32_t lrz_height;
    uint32_t lrz_pitch;
    uint32_t lrz_offset;
@@ -50,6 +53,8 @@ struct tu_image_view
 
    struct fdl6_view view;
 
+   unsigned char swizzle[4];
+
    /* for d32s8 separate depth */
    uint64_t depth_base_addr;
    uint32_t depth_layer_size;
@@ -115,4 +120,15 @@ tu_buffer_view_init(struct tu_buffer_view *view,
                     struct tu_device *device,
                     const VkBufferViewCreateInfo *pCreateInfo);
 
+struct tu_frag_area {
+   float width;
+   float height;
+};
+
+void
+tu_fragment_density_map_sample(const struct tu_image_view *fdm,
+                               uint32_t x, uint32_t y,
+                               uint32_t width, uint32_t height,
+                               uint32_t layers, struct tu_frag_area *areas);
+
 #endif /* TU_IMAGE_H */