anv/sparse: add the initial code for Sparse Resources
authorPaulo Zanoni <paulo.r.zanoni@intel.com>
Wed, 19 Apr 2023 00:26:05 +0000 (17:26 -0700)
committerMarge Bot <emma+marge@anholt.net>
Thu, 28 Sep 2023 06:16:40 +0000 (06:16 +0000)
This giant patch implements a huge chunk of the Vulkan Sparse
Resources API. I previously had this as a nice series of many smaller
patches that evolved as the xe.ko added more features, but once I was
asked to squash some of the major reworks I realized I wouldn't be
able easily rewrite history, so I just squased basically the whole
series into a giant patch. I may end up splitting this again later if
I find a way to properly do it.

If we want to support the DX12 API through vkd3d we need to support
part of the the Sparse Resources API. If we don't, a bunch of Steam
games won't work.

For now we only support the xe.ko backend, but the vast majority of
the code is KMD-independent and so an i915.ko implementation would use
most of what's here, just extending the part that binds and unbinds
memory.

v2+: There's no way to sanely track the version history of this patch
in this commit message. Please refer to Gitlab.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23045>

src/intel/vulkan/anv_batch_chain.c
src/intel/vulkan/anv_device.c
src/intel/vulkan/anv_formats.c
src/intel/vulkan/anv_image.c
src/intel/vulkan/anv_pipeline.c
src/intel/vulkan/anv_private.h
src/intel/vulkan/anv_sparse.c [new file with mode: 0644]
src/intel/vulkan/genX_cmd_buffer.c
src/intel/vulkan/meson.build

index fec72e2..d5c8681 100644 (file)
@@ -1344,23 +1344,106 @@ can_chain_query_pools(struct anv_query_pool *p1, struct anv_query_pool *p2)
 }
 
 static VkResult
-anv_queue_submit_locked(struct anv_queue *queue,
-                        struct vk_queue_submit *submit,
-                        struct anv_utrace_submit *utrace_submit)
+anv_queue_submit_sparse_bind_locked(struct anv_queue *queue,
+                                    struct vk_queue_submit *submit)
 {
+   struct anv_device *device = queue->device;
    VkResult result;
 
-   if (unlikely((submit->buffer_bind_count ||
-                 submit->image_opaque_bind_count ||
-                 submit->image_bind_count))) {
+   /* When fake sparse is enabled, while we do accept creating "sparse"
+    * resources we can't really handle sparse submission. Fake sparse is
+    * supposed to be used by applications that request sparse to be enabled
+    * but don't actually *use* it.
+    */
+   if (!device->physical->has_sparse) {
       if (INTEL_DEBUG(DEBUG_SPARSE))
          fprintf(stderr, "=== application submitting sparse operations: "
                "buffer_bind:%d image_opaque_bind:%d image_bind:%d\n",
                submit->buffer_bind_count, submit->image_opaque_bind_count,
                submit->image_bind_count);
-      fprintf(stderr, "Error: Using sparse operation. Sparse binding not supported.\n");
+      return vk_queue_set_lost(&queue->vk, "Sparse binding not supported");
+   }
+
+   device->using_sparse = true;
+
+   assert(submit->command_buffer_count == 0);
+
+   /* TODO: make both the syncs and signals be passed as part of the vm_bind
+    * ioctl so they can be waited asynchronously. For now this doesn't matter
+    * as we're doing synchronous vm_bind, but later when we make it async this
+    * will make a difference.
+    */
+   result = vk_sync_wait_many(&device->vk, submit->wait_count, submit->waits,
+                              VK_SYNC_WAIT_COMPLETE, INT64_MAX);
+   if (result != VK_SUCCESS)
+      return vk_queue_set_lost(&queue->vk, "vk_sync_wait failed");
+
+   /* Do the binds */
+   for (uint32_t i = 0; i < submit->buffer_bind_count; i++) {
+      VkSparseBufferMemoryBindInfo *bind_info = &submit->buffer_binds[i];
+      ANV_FROM_HANDLE(anv_buffer, buffer, bind_info->buffer);
+
+      assert(anv_buffer_is_sparse(buffer));
+
+      for (uint32_t j = 0; j < bind_info->bindCount; j++) {
+         result = anv_sparse_bind_resource_memory(device,
+                                                  &buffer->sparse_data,
+                                                  &bind_info->pBinds[j]);
+         if (result != VK_SUCCESS)
+            return result;
+      }
+   }
+
+   for (uint32_t i = 0; i < submit->image_opaque_bind_count; i++) {
+      VkSparseImageOpaqueMemoryBindInfo *bind_info =
+         &submit->image_opaque_binds[i];
+      ANV_FROM_HANDLE(anv_image, image, bind_info->image);
+
+      assert(anv_image_is_sparse(image));
+      assert(!image->disjoint);
+      struct anv_sparse_binding_data *sparse_data =
+         &image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN].sparse_data;
+
+      for (uint32_t j = 0; j < bind_info->bindCount; j++) {
+         result = anv_sparse_bind_resource_memory(device, sparse_data,
+                                                  &bind_info->pBinds[j]);
+         if (result != VK_SUCCESS)
+            return result;
+      }
+   }
+
+   for (uint32_t i = 0; i < submit->image_bind_count; i++) {
+      VkSparseImageMemoryBindInfo *bind_info = &submit->image_binds[i];
+      ANV_FROM_HANDLE(anv_image, image, bind_info->image);
+
+      assert(anv_image_is_sparse(image));
+      assert(image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT);
+
+      for (uint32_t j = 0; j < bind_info->bindCount; j++) {
+         result = anv_sparse_bind_image_memory(queue, image,
+                                               &bind_info->pBinds[j]);
+         if (result != VK_SUCCESS)
+            return result;
+      }
    }
 
+   for (uint32_t i = 0; i < submit->signal_count; i++) {
+      struct vk_sync_signal *s = &submit->signals[i];
+      result = vk_sync_signal(&device->vk, s->sync, s->signal_value);
+      if (result != VK_SUCCESS)
+         return vk_queue_set_lost(&queue->vk, "vk_sync_signal failed");
+   }
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+anv_queue_submit_cmd_buffers_locked(struct anv_queue *queue,
+                                    struct vk_queue_submit *submit,
+                                    struct anv_utrace_submit *utrace_submit)
+{
+   VkResult result;
+
    if (submit->command_buffer_count == 0) {
       result = anv_queue_exec_locked(queue, submit->wait_count, submit->waits,
                                      0 /* cmd_buffer_count */,
@@ -1477,7 +1560,16 @@ anv_queue_submit(struct vk_queue *vk_queue,
    pthread_mutex_lock(&device->mutex);
 
    uint64_t start_ts = intel_ds_begin_submit(&queue->ds);
-   result = anv_queue_submit_locked(queue, submit, utrace_submit);
+
+   if (submit->buffer_bind_count ||
+       submit->image_opaque_bind_count ||
+       submit->image_bind_count) {
+      result = anv_queue_submit_sparse_bind_locked(queue, submit);
+   } else {
+      result = anv_queue_submit_cmd_buffers_locked(queue, submit,
+                                                   utrace_submit);
+   }
+
    /* Take submission ID under lock */
    intel_ds_end_submit(&queue->ds, start_ts);
 
index b83d38b..292dcb6 100644 (file)
@@ -411,6 +411,9 @@ get_features(const struct anv_physical_device *pdevice,
    const bool mesh_shader =
       pdevice->vk.supported_extensions.EXT_mesh_shader;
 
+   const bool has_sparse_or_fake = pdevice->instance->has_fake_sparse ||
+                                   pdevice->has_sparse;
+
    *features = (struct vk_features) {
       /* Vulkan 1.0 */
       .robustBufferAccess                       = true,
@@ -461,17 +464,17 @@ get_features(const struct anv_physical_device *pdevice,
       .shaderFloat64                            = pdevice->info.has_64bit_float,
       .shaderInt64                              = true,
       .shaderInt16                              = true,
-      .shaderResourceResidency                  = pdevice->instance->has_fake_sparse,
       .shaderResourceMinLod                     = true,
-      .sparseBinding                            = pdevice->instance->has_fake_sparse,
-      .sparseResidencyBuffer                    = pdevice->instance->has_fake_sparse,
-      .sparseResidencyImage2D                   = pdevice->instance->has_fake_sparse,
-      .sparseResidencyImage3D                   = pdevice->instance->has_fake_sparse,
+      .shaderResourceResidency                  = has_sparse_or_fake,
+      .sparseBinding                            = has_sparse_or_fake,
+      .sparseResidencyAliased                   = has_sparse_or_fake,
+      .sparseResidencyBuffer                    = has_sparse_or_fake,
+      .sparseResidencyImage2D                   = has_sparse_or_fake,
+      .sparseResidencyImage3D                   = has_sparse_or_fake,
       .sparseResidency2Samples                  = false,
       .sparseResidency4Samples                  = false,
       .sparseResidency8Samples                  = false,
       .sparseResidency16Samples                 = false,
-      .sparseResidencyAliased                   = pdevice->instance->has_fake_sparse,
       .variableMultisampleRate                  = true,
       .inheritedQueries                         = true,
 
@@ -1123,7 +1126,8 @@ static void
 anv_physical_device_init_queue_families(struct anv_physical_device *pdevice)
 {
    uint32_t family_count = 0;
-   VkQueueFlags sparse_flags = pdevice->instance->has_fake_sparse ?
+   VkQueueFlags sparse_flags = (pdevice->instance->has_fake_sparse ||
+                                pdevice->has_sparse) ?
                                VK_QUEUE_SPARSE_BINDING_BIT : 0;
 
    if (pdevice->engine_info) {
@@ -1393,6 +1397,9 @@ anv_physical_device_try_create(struct vk_instance *vk_instance,
 
    device->uses_relocs = device->info.kmd_type != INTEL_KMD_TYPE_XE;
 
+   device->has_sparse = device->info.kmd_type == INTEL_KMD_TYPE_XE &&
+      debug_get_bool_option("ANV_SPARSE", false);
+
    device->always_flush_cache = INTEL_DEBUG(DEBUG_STALL) ||
       driQueryOptionb(&instance->dri_options, "always_flush_cache");
 
@@ -1668,6 +1675,9 @@ void anv_GetPhysicalDeviceProperties(
    const uint32_t max_workgroup_size =
       MIN2(1024, 32 * devinfo->max_cs_workgroup_threads);
 
+   const bool has_sparse_or_fake = pdevice->instance->has_fake_sparse ||
+                                   pdevice->has_sparse;
+
    VkSampleCountFlags sample_counts =
       isl_device_get_sample_counts(&pdevice->isl_dev);
 
@@ -1685,7 +1695,7 @@ void anv_GetPhysicalDeviceProperties(
       .maxMemoryAllocationCount                 = UINT32_MAX,
       .maxSamplerAllocationCount                = 64 * 1024,
       .bufferImageGranularity                   = 1,
-      .sparseAddressSpaceSize                   = pdevice->instance->has_fake_sparse ? (1uLL << 48) : 0,
+      .sparseAddressSpaceSize                   = has_sparse_or_fake ? (1uLL << 48) : 0,
       .maxBoundDescriptorSets                   = MAX_SETS,
       .maxPerStageDescriptorSamplers            = max_samplers,
       .maxPerStageDescriptorUniformBuffers      = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,
@@ -1811,11 +1821,11 @@ void anv_GetPhysicalDeviceProperties(
                     VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
       .limits = limits,
       .sparseProperties = {
-         .residencyStandard2DBlockShape = pdevice->instance->has_fake_sparse,
-         .residencyStandard2DMultisampleBlockShape = pdevice->instance->has_fake_sparse,
-         .residencyStandard3DBlockShape = pdevice->instance->has_fake_sparse,
+         .residencyStandard2DBlockShape = has_sparse_or_fake,
+         .residencyStandard2DMultisampleBlockShape = false,
+         .residencyStandard3DBlockShape = has_sparse_or_fake,
          .residencyAlignedMipSize = false,
-         .residencyNonResidentStrict = pdevice->instance->has_fake_sparse,
+         .residencyNonResidentStrict = has_sparse_or_fake,
       },
    };
 
@@ -4322,6 +4332,7 @@ anv_bind_buffer_memory(const VkBindBufferMemoryInfo *pBindInfo)
    ANV_FROM_HANDLE(anv_buffer, buffer, pBindInfo->buffer);
 
    assert(pBindInfo->sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO);
+   assert(!anv_buffer_is_sparse(buffer));
 
    if (mem) {
       assert(pBindInfo->memoryOffset < mem->vk.size);
@@ -4346,22 +4357,6 @@ VkResult anv_BindBufferMemory2(
    return VK_SUCCESS;
 }
 
-VkResult anv_QueueBindSparse(
-    VkQueue                                     _queue,
-    uint32_t                                    bindInfoCount,
-    const VkBindSparseInfo*                     pBindInfo,
-    VkFence                                     fence)
-{
-   ANV_FROM_HANDLE(anv_queue, queue, _queue);
-   if (vk_device_is_lost(&queue->device->vk))
-      return VK_ERROR_DEVICE_LOST;
-
-   if (INTEL_DEBUG(DEBUG_SPARSE))
-      fprintf(stderr, "=== [%s:%d] [%s]\n", __FILE__, __LINE__, __func__);
-
-   return vk_error(queue, VK_ERROR_FEATURE_NOT_PRESENT);
-}
-
 // Event functions
 
 VkResult anv_CreateEvent(
@@ -4446,6 +4441,7 @@ static void
 anv_get_buffer_memory_requirements(struct anv_device *device,
                                    VkDeviceSize size,
                                    VkBufferUsageFlags usage,
+                                   bool is_sparse,
                                    VkMemoryRequirements2* pMemoryRequirements)
 {
    /* The Vulkan spec (git aaed022) says:
@@ -4463,6 +4459,18 @@ anv_get_buffer_memory_requirements(struct anv_device *device,
     */
    uint32_t alignment = 64;
 
+   /* From the spec, section "Sparse Buffer and Fully-Resident Image Block
+    * Size":
+    *   "The sparse block size in bytes for sparse buffers and fully-resident
+    *    images is reported as VkMemoryRequirements::alignment. alignment
+    *    represents both the memory alignment requirement and the binding
+    *    granularity (in bytes) for sparse resources."
+    */
+   if (is_sparse) {
+      alignment = ANV_SPARSE_BLOCK_SIZE;
+      size = align64(size, alignment);
+   }
+
    pMemoryRequirements->memoryRequirements.size = size;
    pMemoryRequirements->memoryRequirements.alignment = alignment;
 
@@ -4500,17 +4508,21 @@ void anv_GetDeviceBufferMemoryRequirementsKHR(
     VkMemoryRequirements2*                      pMemoryRequirements)
 {
    ANV_FROM_HANDLE(anv_device, device, _device);
-
-   if (INTEL_DEBUG(DEBUG_SPARSE) && pInfo->pCreateInfo->flags &
-           (VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
-            VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT |
-            VK_BUFFER_CREATE_SPARSE_ALIASED_BIT))
+   const bool is_sparse =
+      pInfo->pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT;
+
+   if (!device->physical->has_sparse &&
+       INTEL_DEBUG(DEBUG_SPARSE) &&
+       pInfo->pCreateInfo->flags & (VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
+                                    VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT |
+                                    VK_BUFFER_CREATE_SPARSE_ALIASED_BIT))
       fprintf(stderr, "=== %s %s:%d flags:0x%08x\n", __func__, __FILE__,
               __LINE__, pInfo->pCreateInfo->flags);
 
    anv_get_buffer_memory_requirements(device,
                                       pInfo->pCreateInfo->size,
                                       pInfo->pCreateInfo->usage,
+                                      is_sparse,
                                       pMemoryRequirements);
 }
 
@@ -4523,10 +4535,11 @@ VkResult anv_CreateBuffer(
    ANV_FROM_HANDLE(anv_device, device, _device);
    struct anv_buffer *buffer;
 
-   if (INTEL_DEBUG(DEBUG_SPARSE) && (pCreateInfo->flags &
-           (VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
-            VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT |
-            VK_BUFFER_CREATE_SPARSE_ALIASED_BIT)))
+   if (!device->physical->has_sparse &&
+       INTEL_DEBUG(DEBUG_SPARSE) &&
+       pCreateInfo->flags & (VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
+                             VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT |
+                             VK_BUFFER_CREATE_SPARSE_ALIASED_BIT))
       fprintf(stderr, "=== %s %s:%d flags:0x%08x\n", __func__, __FILE__,
               __LINE__, pCreateInfo->flags);
 
@@ -4544,6 +4557,27 @@ VkResult anv_CreateBuffer(
       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
 
    buffer->address = ANV_NULL_ADDRESS;
+   if (anv_buffer_is_sparse(buffer)) {
+      const VkBufferOpaqueCaptureAddressCreateInfo *opaque_addr_info =
+         vk_find_struct_const(pCreateInfo->pNext,
+                              BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO);
+      enum anv_bo_alloc_flags alloc_flags = 0;
+      uint64_t client_address = 0;
+
+      if (opaque_addr_info) {
+         alloc_flags = ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;
+         client_address = opaque_addr_info->opaqueCaptureAddress;
+      }
+
+      VkResult result = anv_init_sparse_bindings(device, buffer->vk.size,
+                                                 &buffer->sparse_data,
+                                                 alloc_flags, client_address,
+                                                 &buffer->address);
+      if (result != VK_SUCCESS) {
+         vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
+         return result;
+      }
+   }
 
    *pBuffer = anv_buffer_to_handle(buffer);
 
@@ -4561,6 +4595,11 @@ void anv_DestroyBuffer(
    if (!buffer)
       return;
 
+   if (anv_buffer_is_sparse(buffer)) {
+      assert(buffer->address.offset == buffer->sparse_data.address);
+      anv_free_sparse_bindings(device, &buffer->sparse_data);
+   }
+
    vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
 }
 
@@ -4579,7 +4618,9 @@ uint64_t anv_GetBufferOpaqueCaptureAddress(
     VkDevice                                    device,
     const VkBufferDeviceAddressInfo*            pInfo)
 {
-   return 0;
+   ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
+
+   return anv_address_physical(buffer->address);
 }
 
 uint64_t anv_GetDeviceMemoryOpaqueCaptureAddress(
index 6fd9a89..03d41d2 100644 (file)
@@ -1784,33 +1784,113 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties2(
    return result;
 }
 
-void anv_GetPhysicalDeviceSparseImageFormatProperties(
-    VkPhysicalDevice                            physicalDevice,
-    VkFormat                                    format,
-    VkImageType                                 type,
-    VkSampleCountFlagBits                       samples,
-    VkImageUsageFlags                           usage,
-    VkImageTiling                               tiling,
-    uint32_t*                                   pNumProperties,
-    VkSparseImageFormatProperties*              pProperties)
-{
-   if (INTEL_DEBUG(DEBUG_SPARSE))
-      fprintf(stderr, "=== [%s:%d] [%s]\n", __FILE__, __LINE__, __func__);
-   /* Sparse images are not yet supported. */
-   *pNumProperties = 0;
-}
-
 void anv_GetPhysicalDeviceSparseImageFormatProperties2(
     VkPhysicalDevice                            physicalDevice,
     const VkPhysicalDeviceSparseImageFormatInfo2* pFormatInfo,
     uint32_t*                                   pPropertyCount,
     VkSparseImageFormatProperties2*             pProperties)
 {
-   if (INTEL_DEBUG(DEBUG_SPARSE))
-      fprintf(stderr, "=== [%s:%d] [%s]\n", __FILE__, __LINE__, __func__);
+   ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
+   const struct intel_device_info *devinfo = &physical_device->info;
+   VkImageAspectFlags aspects = vk_format_aspects(pFormatInfo->format);
+   VK_OUTARRAY_MAKE_TYPED(VkSparseImageFormatProperties2, props,
+                          pProperties, pPropertyCount);
+
+   if (!physical_device->has_sparse) {
+      if (INTEL_DEBUG(DEBUG_SPARSE))
+         fprintf(stderr, "=== [%s:%d] [%s]\n", __FILE__, __LINE__, __func__);
+      return;
+   }
 
-   /* Sparse images are not yet supported. */
-   *pPropertyCount = 0;
+   vk_foreach_struct_const(ext, pFormatInfo->pNext)
+      anv_debug_ignored_stype(ext->sType);
+
+   if (anv_sparse_image_check_support(physical_device,
+                                      VK_IMAGE_CREATE_SPARSE_BINDING_BIT |
+                                      VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT,
+                                      pFormatInfo->tiling,
+                                      pFormatInfo->samples,
+                                      pFormatInfo->type,
+                                      pFormatInfo->format) != VK_SUCCESS) {
+      return;
+   }
+
+   VkExtent3D ds_granularity = {};
+   VkSparseImageFormatProperties2 *ds_props_ptr = NULL;
+
+   u_foreach_bit(b, aspects) {
+      VkImageAspectFlagBits aspect = 1 << b;
+
+      const uint32_t plane =
+         anv_aspect_to_plane(vk_format_aspects(pFormatInfo->format), aspect);
+      struct anv_format_plane anv_format_plane =
+         anv_get_format_plane(devinfo, pFormatInfo->format, plane,
+                              pFormatInfo->tiling);
+      enum isl_format isl_format = anv_format_plane.isl_format;
+      assert(isl_format != ISL_FORMAT_UNSUPPORTED);
+
+      VkImageCreateFlags vk_create_flags =
+         VK_IMAGE_CREATE_SPARSE_BINDING_BIT |
+         VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT;
+
+      isl_surf_usage_flags_t isl_usage =
+         anv_image_choose_isl_surf_usage(vk_create_flags, pFormatInfo->usage,
+                                         0, aspect);
+
+      const enum isl_surf_dim isl_surf_dim =
+         pFormatInfo->type == VK_IMAGE_TYPE_1D ? ISL_SURF_DIM_1D :
+         pFormatInfo->type == VK_IMAGE_TYPE_2D ? ISL_SURF_DIM_2D :
+         ISL_SURF_DIM_3D;
+
+      struct isl_surf isl_surf;
+      bool ok = isl_surf_init(&physical_device->isl_dev, &isl_surf,
+                  .dim = isl_surf_dim,
+                  .format = isl_format,
+                  .width = 1,
+                  .height = 1,
+                  .depth = 1,
+                  .levels = 1,
+                  .array_len = 1,
+                  .samples = pFormatInfo->samples,
+                  .min_alignment_B = 0,
+                  .row_pitch_B = 0,
+                  .usage = isl_usage,
+                  .tiling_flags = ISL_TILING_ANY_MASK);
+      if (!ok) {
+         /* There's no way to return an error code! */
+         assert(false);
+         *pPropertyCount = 0;
+         return;
+      }
+
+      VkSparseImageFormatProperties format_props =
+         anv_sparse_calc_image_format_properties(physical_device, aspect,
+                                                 pFormatInfo->type,
+                                                 &isl_surf);
+
+      /* If both depth and stencil are the same, unify them if possible. */
+      if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT |
+                    VK_IMAGE_ASPECT_STENCIL_BIT)) {
+         if (!ds_props_ptr) {
+            ds_granularity = format_props.imageGranularity;
+         } else if (ds_granularity.width ==
+                    format_props.imageGranularity.width &&
+                    ds_granularity.height ==
+                    format_props.imageGranularity.height &&
+                    ds_granularity.depth ==
+                    format_props.imageGranularity.depth) {
+            ds_props_ptr->properties.aspectMask |= aspect;
+            continue;
+         }
+      }
+
+      vk_outarray_append_typed(VkSparseImageFormatProperties2, &props, p) {
+         p->properties = format_props;
+         if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT |
+                       VK_IMAGE_ASPECT_STENCIL_BIT))
+            ds_props_ptr = p;
+      }
+   }
 }
 
 void anv_GetPhysicalDeviceExternalBufferProperties(
index 5fa0a1a..d8e8220 100644 (file)
@@ -56,8 +56,9 @@ memory_range_end(struct anv_image_memory_range memory_range)
  * Get binding for VkImagePlaneMemoryRequirementsInfo,
  * VkBindImagePlaneMemoryInfo and VkDeviceImageMemoryRequirements.
  */
-static struct anv_image_binding *
-image_aspect_to_binding(struct anv_image *image, VkImageAspectFlags aspect)
+struct anv_image_binding *
+anv_image_aspect_to_binding(struct anv_image *image,
+                            VkImageAspectFlags aspect)
 {
    uint32_t plane = 0;
 
@@ -200,11 +201,11 @@ memory_range_merge(struct anv_image_memory_range *a,
    a->size = MAX2(a->size, b.offset + b.size);
 }
 
-static isl_surf_usage_flags_t
-choose_isl_surf_usage(VkImageCreateFlags vk_create_flags,
-                      VkImageUsageFlags vk_usage,
-                      isl_surf_usage_flags_t isl_extra_usage,
-                      VkImageAspectFlagBits aspect)
+isl_surf_usage_flags_t
+anv_image_choose_isl_surf_usage(VkImageCreateFlags vk_create_flags,
+                                VkImageUsageFlags vk_usage,
+                                isl_surf_usage_flags_t isl_extra_usage,
+                                VkImageAspectFlagBits aspect)
 {
    isl_surf_usage_flags_t isl_usage = isl_extra_usage;
 
@@ -223,6 +224,10 @@ choose_isl_surf_usage(VkImageCreateFlags vk_create_flags,
    if (vk_usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)
       isl_usage |= ISL_SURF_USAGE_CPB_BIT;
 
+   if (vk_create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)
+      isl_usage |= ISL_SURF_USAGE_SPARSE_BIT |
+                   ISL_SURF_USAGE_DISABLE_AUX_BIT;
+
    if (vk_usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR ||
        vk_usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR)
       isl_usage |= ISL_SURF_USAGE_VIDEO_DECODE_BIT;
@@ -665,6 +670,10 @@ add_aux_surface_if_supported(struct anv_device *device,
    if ((isl_extra_usage_flags & ISL_SURF_USAGE_DISABLE_AUX_BIT))
       return VK_SUCCESS;
 
+   /* TODO: consider whether compression with sparse is workable. */
+   if (anv_image_is_sparse(image))
+      return VK_SUCCESS;
+
    if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) {
       /* We don't advertise that depth buffers could be used as storage
        * images.
@@ -1180,8 +1189,8 @@ add_all_surfaces_implicit_layout(
 
       VkImageUsageFlags vk_usage = vk_image_usage(&image->vk, aspect);
       isl_surf_usage_flags_t isl_usage =
-         choose_isl_surf_usage(image->vk.create_flags, vk_usage,
-                               isl_extra_usage_flags, aspect);
+         anv_image_choose_isl_surf_usage(image->vk.create_flags, vk_usage,
+                                         isl_extra_usage_flags, aspect);
 
       result = add_primary_surface(device, image, plane, plane_format,
                                    ANV_OFFSET_IMPLICIT, plane_stride,
@@ -1387,6 +1396,63 @@ alloc_private_binding(struct anv_device *device,
    return result;
 }
 
+static void
+anv_image_finish_sparse_bindings(struct anv_image *image)
+{
+   struct anv_device *device =
+      container_of(image->vk.base.device, struct anv_device, vk);
+
+   assert(anv_image_is_sparse(image));
+
+   for (int i = 0; i < ANV_IMAGE_MEMORY_BINDING_END; i++) {
+      struct anv_image_binding *b = &image->bindings[i];
+
+      if (b->sparse_data.size != 0) {
+         assert(b->memory_range.size == b->sparse_data.size);
+         assert(b->address.offset == b->sparse_data.address);
+         anv_free_sparse_bindings(device, &b->sparse_data);
+      }
+   }
+}
+
+static VkResult MUST_CHECK
+anv_image_init_sparse_bindings(struct anv_image *image)
+{
+   struct anv_device *device =
+      container_of(image->vk.base.device, struct anv_device, vk);
+   VkResult result;
+
+   assert(anv_image_is_sparse(image));
+
+   for (int i = 0; i < ANV_IMAGE_MEMORY_BINDING_END; i++) {
+      struct anv_image_binding *b = &image->bindings[i];
+
+      if (b->memory_range.size != 0) {
+         assert(b->sparse_data.size == 0);
+
+         /* From the spec, Custom Sparse Image Block Shapes section:
+          *   "... the size in bytes of the custom sparse image block shape
+          *    will be reported in VkMemoryRequirements::alignment."
+          *
+          * ISL should have set this for us, so just assert it here.
+          */
+         assert(b->memory_range.alignment == ANV_SPARSE_BLOCK_SIZE);
+         assert(b->memory_range.size % ANV_SPARSE_BLOCK_SIZE == 0);
+
+         result = anv_init_sparse_bindings(device,
+                                           b->memory_range.size,
+                                           &b->sparse_data, 0, 0,
+                                           &b->address);
+         if (result != VK_SUCCESS) {
+            anv_image_finish_sparse_bindings(image);
+            return result;
+         }
+      }
+   }
+
+   return VK_SUCCESS;
+}
+
 VkResult
 anv_image_init(struct anv_device *device, struct anv_image *image,
                const struct anv_image_create_info *create_info)
@@ -1502,6 +1568,12 @@ anv_image_init(struct anv_device *device, struct anv_image *image,
          can_fast_clear_with_non_zero_color(device->info, image, p, fmt_list);
    }
 
+   if (anv_image_is_sparse(image)) {
+      r = anv_image_init_sparse_bindings(image);
+      if (r != VK_SUCCESS)
+         goto fail;
+   }
+
    return VK_SUCCESS;
 
 fail:
@@ -1515,6 +1587,9 @@ anv_image_finish(struct anv_image *image)
    struct anv_device *device =
       container_of(image->vk.base.device, struct anv_device, vk);
 
+   if (anv_image_is_sparse(image))
+      anv_image_finish_sparse_bindings(image);
+
    if (image->from_gralloc) {
       assert(!image->disjoint);
       assert(image->n_planes == 1);
@@ -1549,6 +1624,18 @@ anv_image_init_from_create_info(struct anv_device *device,
                                 const VkImageCreateInfo *pCreateInfo,
                                 bool no_private_binding_alloc)
 {
+   if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
+      VkResult result =
+         anv_sparse_image_check_support(device->physical,
+                                        pCreateInfo->flags,
+                                        pCreateInfo->tiling,
+                                        pCreateInfo->samples,
+                                        pCreateInfo->imageType,
+                                        pCreateInfo->format);
+      if (result != VK_SUCCESS)
+         return result;
+   }
+
    const VkNativeBufferANDROID *gralloc_info =
       vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
    if (gralloc_info)
@@ -1583,10 +1670,11 @@ VkResult anv_CreateImage(
 {
    ANV_FROM_HANDLE(anv_device, device, _device);
 
-   if (INTEL_DEBUG(DEBUG_SPARSE) && (pCreateInfo->flags &
-         (VK_IMAGE_CREATE_SPARSE_BINDING_BIT |
-          VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT |
-          VK_IMAGE_CREATE_SPARSE_ALIASED_BIT)))
+   if (!device->physical->has_sparse &&
+       INTEL_DEBUG(DEBUG_SPARSE) &&
+       pCreateInfo->flags & (VK_IMAGE_CREATE_SPARSE_BINDING_BIT |
+                             VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT |
+                             VK_IMAGE_CREATE_SPARSE_ALIASED_BIT))
       fprintf(stderr, "=== %s %s:%d flags:0x%08x\n", __func__, __FILE__,
               __LINE__, pCreateInfo->flags);
 
@@ -1733,7 +1821,7 @@ anv_image_get_memory_requirements(struct anv_device *device,
    if (image->disjoint) {
       assert(util_bitcount(aspects) == 1);
       assert(aspects & image->vk.aspects);
-      binding = image_aspect_to_binding(image, aspects);
+      binding = anv_image_aspect_to_binding(image, aspects);
    } else {
       assert(aspects == image->vk.aspects);
       binding = &image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN];
@@ -1784,10 +1872,11 @@ void anv_GetDeviceImageMemoryRequirementsKHR(
    ANV_FROM_HANDLE(anv_device, device, _device);
    struct anv_image image = { 0 };
 
-   if (INTEL_DEBUG(DEBUG_SPARSE) && (pInfo->pCreateInfo->flags &
-         (VK_IMAGE_CREATE_SPARSE_BINDING_BIT |
-          VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT |
-          VK_IMAGE_CREATE_SPARSE_ALIASED_BIT)))
+   if (!device->physical->has_sparse &&
+       INTEL_DEBUG(DEBUG_SPARSE) &&
+       pInfo->pCreateInfo->flags & (VK_IMAGE_CREATE_SPARSE_BINDING_BIT |
+                                    VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT |
+                                    VK_IMAGE_CREATE_SPARSE_ALIASED_BIT))
       fprintf(stderr, "=== %s %s:%d flags:0x%08x\n", __func__, __FILE__,
               __LINE__, pInfo->pCreateInfo->flags);
 
@@ -1800,39 +1889,158 @@ void anv_GetDeviceImageMemoryRequirementsKHR(
 
    anv_image_get_memory_requirements(device, &image, aspects,
                                      pMemoryRequirements);
+   anv_image_finish(&image);
 }
 
-void anv_GetImageSparseMemoryRequirements(
-    VkDevice                                    device,
-    VkImage                                     image,
-    uint32_t*                                   pSparseMemoryRequirementCount,
-    VkSparseImageMemoryRequirements*            pSparseMemoryRequirements)
+static void
+anv_image_get_sparse_memory_requirements(
+      struct anv_device *device,
+      struct anv_image *image,
+      VkImageAspectFlags aspects,
+      uint32_t *pSparseMemoryRequirementCount,
+      VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
 {
-   if (INTEL_DEBUG(DEBUG_SPARSE))
-      fprintf(stderr, "=== [%s:%d] [%s]\n", __FILE__, __LINE__, __func__);
-   *pSparseMemoryRequirementCount = 0;
+   VK_OUTARRAY_MAKE_TYPED(VkSparseImageMemoryRequirements2, reqs,
+                          pSparseMemoryRequirements,
+                          pSparseMemoryRequirementCount);
+
+   /* From the spec:
+    *   "The sparse image must have been created using the
+    *    VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT flag to retrieve valid sparse
+    *    image memory requirements."
+    */
+   if (!(image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT))
+      return;
+
+   VkSparseImageMemoryRequirements ds_mem_reqs = {};
+   VkSparseImageMemoryRequirements2 *ds_reqs_ptr = NULL;
+
+   u_foreach_bit(b, aspects) {
+      VkImageAspectFlagBits aspect = 1 << b;
+      const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
+      struct isl_surf *surf = &image->planes[plane].primary_surface.isl;
+
+      VkSparseImageFormatProperties format_props =
+         anv_sparse_calc_image_format_properties(device->physical, aspect,
+                                                 image->vk.image_type, surf);
+
+      uint32_t miptail_first_lod;
+      VkDeviceSize miptail_size, miptail_offset, miptail_stride;
+      anv_sparse_calc_miptail_properties(device, image, aspect,
+                                         &miptail_first_lod, &miptail_size,
+                                         &miptail_offset, &miptail_stride);
+
+      VkSparseImageMemoryRequirements mem_reqs = {
+         .formatProperties = format_props,
+         .imageMipTailFirstLod = miptail_first_lod,
+         .imageMipTailSize = miptail_size,
+         .imageMipTailOffset = miptail_offset,
+         .imageMipTailStride = miptail_stride,
+      };
+
+      /* If both depth and stencil are the same, unify them if possible. */
+      if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT |
+                    VK_IMAGE_ASPECT_STENCIL_BIT)) {
+         if (!ds_reqs_ptr) {
+            ds_mem_reqs = mem_reqs;
+         } else if (ds_mem_reqs.formatProperties.imageGranularity.width ==
+                       mem_reqs.formatProperties.imageGranularity.width &&
+                    ds_mem_reqs.formatProperties.imageGranularity.height ==
+                       mem_reqs.formatProperties.imageGranularity.height &&
+                    ds_mem_reqs.formatProperties.imageGranularity.depth ==
+                       mem_reqs.formatProperties.imageGranularity.depth &&
+                    ds_mem_reqs.imageMipTailFirstLod ==
+                       mem_reqs.imageMipTailFirstLod &&
+                    ds_mem_reqs.imageMipTailSize ==
+                       mem_reqs.imageMipTailSize &&
+                    ds_mem_reqs.imageMipTailOffset ==
+                       mem_reqs.imageMipTailOffset &&
+                    ds_mem_reqs.imageMipTailStride ==
+                       mem_reqs.imageMipTailStride) {
+            ds_reqs_ptr->memoryRequirements.formatProperties.aspectMask |=
+               aspect;
+            continue;
+         }
+      }
+
+      vk_outarray_append_typed(VkSparseImageMemoryRequirements2, &reqs, r) {
+         r->memoryRequirements = mem_reqs;
+         if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT |
+                       VK_IMAGE_ASPECT_STENCIL_BIT))
+            ds_reqs_ptr = r;
+      }
+   }
 }
 
 void anv_GetImageSparseMemoryRequirements2(
-    VkDevice                                    device,
+    VkDevice                                    _device,
     const VkImageSparseMemoryRequirementsInfo2* pInfo,
     uint32_t*                                   pSparseMemoryRequirementCount,
     VkSparseImageMemoryRequirements2*           pSparseMemoryRequirements)
 {
-   if (INTEL_DEBUG(DEBUG_SPARSE))
-      fprintf(stderr, "=== [%s:%d] [%s]\n", __FILE__, __LINE__, __func__);
-   *pSparseMemoryRequirementCount = 0;
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   ANV_FROM_HANDLE(anv_image, image, pInfo->image);
+
+   if (!anv_sparse_residency_is_enabled(device)) {
+      if (!device->physical->has_sparse && INTEL_DEBUG(DEBUG_SPARSE))
+         fprintf(stderr, "=== [%s:%d] [%s]\n", __FILE__, __LINE__, __func__);
+
+      *pSparseMemoryRequirementCount = 0;
+      return;
+   }
+
+   anv_image_get_sparse_memory_requirements(device, image, image->vk.aspects,
+                                            pSparseMemoryRequirementCount,
+                                            pSparseMemoryRequirements);
 }
 
-void anv_GetDeviceImageSparseMemoryRequirementsKHR(
-    VkDevice                                    device,
-    const VkDeviceImageMemoryRequirements* pInfo,
+void anv_GetDeviceImageSparseMemoryRequirements(
+    VkDevice                                    _device,
+    const VkDeviceImageMemoryRequirements*      pInfo,
     uint32_t*                                   pSparseMemoryRequirementCount,
     VkSparseImageMemoryRequirements2*           pSparseMemoryRequirements)
 {
-   if (INTEL_DEBUG(DEBUG_SPARSE))
-      fprintf(stderr, "=== [%s:%d] [%s]\n", __FILE__, __LINE__, __func__);
-   *pSparseMemoryRequirementCount = 0;
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   struct anv_image image = { 0 };
+
+   if (!anv_sparse_residency_is_enabled(device)) {
+      if (!device->physical->has_sparse && INTEL_DEBUG(DEBUG_SPARSE))
+         fprintf(stderr, "=== [%s:%d] [%s]\n", __FILE__, __LINE__, __func__);
+
+      *pSparseMemoryRequirementCount = 0;
+      return;
+   }
+
+   /* This function is similar to anv_GetDeviceImageMemoryRequirementsKHR, in
+    * which it actually creates an image, gets the properties and then
+    * destroys the image.
+    *
+    * We could one day refactor things to allow us to gather the properties
+    * without having to actually create the image, maybe by reworking ISL to
+    * separate creation from parameter computing.
+    */
+
+   ASSERTED VkResult result =
+      anv_image_init_from_create_info(device, &image, pInfo->pCreateInfo,
+                                      true /* no_private_binding_alloc */);
+   assert(result == VK_SUCCESS);
+
+   /* The spec says:
+    *  "planeAspect is a VkImageAspectFlagBits value specifying the aspect
+    *   corresponding to the image plane to query. This parameter is ignored
+    *   unless pCreateInfo::tiling is VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT,
+    *   or pCreateInfo::flags has VK_IMAGE_CREATE_DISJOINT_BIT set."
+    */
+   VkImageAspectFlags aspects =
+      (pInfo->pCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT) ||
+      (pInfo->pCreateInfo->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
+         ? pInfo->planeAspect : image.vk.aspects;
+
+   anv_image_get_sparse_memory_requirements(device, &image, aspects,
+                                            pSparseMemoryRequirementCount,
+                                            pSparseMemoryRequirements);
+
+   anv_image_finish(&image);
 }
 
 VkResult anv_BindImageMemory2(
@@ -1848,6 +2056,8 @@ VkResult anv_BindImageMemory2(
       ANV_FROM_HANDLE(anv_image, image, bind_info->image);
       bool did_bind = false;
 
+      assert(!anv_image_is_sparse(image));
+
       /* Resolve will alter the image's aspects, do this first. */
       if (mem && mem->vk.ahardware_buffer)
          resolve_ahw_image(device, image, mem);
@@ -1872,7 +2082,7 @@ VkResult anv_BindImageMemory2(
                break;
 
             struct anv_image_binding *binding =
-               image_aspect_to_binding(image, plane_info->planeAspect);
+               anv_image_aspect_to_binding(image, plane_info->planeAspect);
 
             binding->address = (struct anv_address) {
                .bo = mem->bo,
index 831a1ab..bb2607f 100644 (file)
@@ -184,6 +184,7 @@ anv_shader_stage_to_nir(struct anv_device *device,
          .ray_tracing_position_fetch = rt_enabled,
          .shader_clock = true,
          .shader_viewport_index_layer = true,
+         .sparse_residency = pdevice->has_sparse,
          .stencil_export = true,
          .storage_8bit = true,
          .storage_16bit = true,
index b825ce4..8513752 100644 (file)
@@ -916,6 +916,11 @@ struct anv_physical_device {
     /** Whether the i915 driver has the ability to create VM objects */
     bool                                        has_vm_control;
 
+    /** True if we have the means to do sparse binding (e.g., a Kernel driver
+     * a vm_bind ioctl).
+     */
+    bool                                        has_sparse;
+
     /**/
     bool                                        uses_ex_bso;
 
@@ -1648,6 +1653,14 @@ struct anv_device {
      * Command pool for companion RCS command buffer.
      */
     VkCommandPool                               companion_rcs_cmd_pool;
+
+    /* This is true if the user ever bound a sparse resource to memory. This
+     * is used for a workaround that makes every memoryBarrier flush more
+     * things than it should. Many applications request for the sparse
+     * featuers to be enabled but don't use them, and some create sparse
+     * resources but never use them.
+     */
+    bool                                         using_sparse;
 };
 
 static inline uint32_t
@@ -2576,13 +2589,86 @@ const struct anv_descriptor_set_layout *
 anv_pipeline_layout_get_push_set(const struct anv_pipeline_sets_layout *layout,
                                  uint8_t *desc_idx);
 
+struct anv_sparse_binding_data {
+   uint64_t address;
+   uint64_t size;
+
+   /* This is kept only because it's given to us by vma_alloc() and need to be
+    * passed back to vma_free(), we have no other particular use for it
+    */
+   struct util_vma_heap *vma_heap;
+};
+
+#define ANV_SPARSE_BLOCK_SIZE (64 * 1024)
+
+static inline bool
+anv_sparse_binding_is_enabled(struct anv_device *device)
+{
+   return device->vk.enabled_features.sparseBinding;
+}
+
+static inline bool
+anv_sparse_residency_is_enabled(struct anv_device *device)
+{
+   return device->vk.enabled_features.sparseResidencyBuffer ||
+          device->vk.enabled_features.sparseResidencyImage2D ||
+          device->vk.enabled_features.sparseResidencyImage3D ||
+          device->vk.enabled_features.sparseResidency2Samples ||
+          device->vk.enabled_features.sparseResidency4Samples ||
+          device->vk.enabled_features.sparseResidency8Samples ||
+          device->vk.enabled_features.sparseResidency16Samples ||
+          device->vk.enabled_features.sparseResidencyAliased;
+}
+
+VkResult anv_init_sparse_bindings(struct anv_device *device,
+                                  uint64_t size,
+                                  struct anv_sparse_binding_data *sparse,
+                                  enum anv_bo_alloc_flags alloc_flags,
+                                  uint64_t client_address,
+                                  struct anv_address *out_address);
+VkResult anv_free_sparse_bindings(struct anv_device *device,
+                                  struct anv_sparse_binding_data *sparse);
+VkResult anv_sparse_bind_resource_memory(struct anv_device *device,
+                                         struct anv_sparse_binding_data *data,
+                                         const VkSparseMemoryBind *bind_);
+VkResult anv_sparse_bind_image_memory(struct anv_queue *queue,
+                                      struct anv_image *image,
+                                      const VkSparseImageMemoryBind *bind);
+
+VkSparseImageFormatProperties
+anv_sparse_calc_image_format_properties(struct anv_physical_device *pdevice,
+                                        VkImageAspectFlags aspect,
+                                        VkImageType vk_image_type,
+                                        struct isl_surf *surf);
+void anv_sparse_calc_miptail_properties(struct anv_device *device,
+                                        struct anv_image *image,
+                                        VkImageAspectFlags vk_aspect,
+                                        uint32_t *imageMipTailFirstLod,
+                                        VkDeviceSize *imageMipTailSize,
+                                        VkDeviceSize *imageMipTailOffset,
+                                        VkDeviceSize *imageMipTailStride);
+VkResult anv_sparse_image_check_support(struct anv_physical_device *pdevice,
+                                        VkImageCreateFlags flags,
+                                        VkImageTiling tiling,
+                                        VkSampleCountFlagBits samples,
+                                        VkImageType type,
+                                        VkFormat format);
+
 struct anv_buffer {
    struct vk_buffer vk;
 
    /* Set when bound */
    struct anv_address address;
+
+   struct anv_sparse_binding_data sparse_data;
 };
 
+static inline bool
+anv_buffer_is_sparse(struct anv_buffer *buffer)
+{
+   return buffer->vk.create_flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT;
+}
+
 enum anv_cmd_dirty_bits {
    ANV_CMD_DIRTY_PIPELINE                            = 1 << 0,
    ANV_CMD_DIRTY_INDEX_BUFFER                        = 1 << 1,
@@ -4472,6 +4558,7 @@ struct anv_image {
    struct anv_image_binding {
       struct anv_image_memory_range memory_range;
       struct anv_address address;
+      struct anv_sparse_binding_data sparse_data;
    } bindings[ANV_IMAGE_MEMORY_BINDING_END];
 
    /**
@@ -4526,6 +4613,12 @@ struct anv_image {
 };
 
 static inline bool
+anv_image_is_sparse(struct anv_image *image)
+{
+   return image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT;
+}
+
+static inline bool
 anv_image_is_externally_shared(const struct anv_image *image)
 {
    return image->vk.drm_format_mod != DRM_FORMAT_MOD_INVALID ||
@@ -4748,6 +4841,10 @@ anv_cmd_buffer_load_clear_color_from_image(struct anv_cmd_buffer *cmd_buffer,
                                            struct anv_state state,
                                            const struct anv_image *image);
 
+struct anv_image_binding *
+anv_image_aspect_to_binding(struct anv_image *image,
+                            VkImageAspectFlags aspect);
+
 void
 anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
                       const struct anv_image *image,
@@ -4809,6 +4906,12 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
                  enum isl_aux_op ccs_op, union isl_color_value *clear_value,
                  bool predicate);
 
+isl_surf_usage_flags_t
+anv_image_choose_isl_surf_usage(VkImageCreateFlags vk_create_flags,
+                                VkImageUsageFlags vk_usage,
+                                isl_surf_usage_flags_t isl_extra_usage,
+                                VkImageAspectFlagBits aspect);
+
 void
 anv_cmd_buffer_fill_area(struct anv_cmd_buffer *cmd_buffer,
                          struct anv_address address,
diff --git a/src/intel/vulkan/anv_sparse.c b/src/intel/vulkan/anv_sparse.c
new file mode 100644 (file)
index 0000000..6a39557
--- /dev/null
@@ -0,0 +1,670 @@
+/*
+ * Copyright Â© 2022 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <anv_private.h>
+
+/* Sparse binding handling.
+ *
+ * There is one main structure passed around all over this file:
+ *
+ * - struct anv_sparse_binding_data: every resource (VkBuffer or VkImage) has
+ *   a pointer to an instance of this structure. It contains the virtual
+ *   memory address (VMA) used by the binding operations (which is different
+ *   from the VMA used by the anv_bo it's bound to) and the VMA range size. We
+ *   do not keep record of our our list of bindings (which ranges were bound
+ *   to which buffers).
+ */
+
+static VkOffset3D
+vk_offset3d_px_to_el(const VkOffset3D offset_px,
+                     const struct isl_format_layout *layout)
+{
+   return (VkOffset3D) {
+      .x = offset_px.x / layout->bw,
+      .y = offset_px.y / layout->bh,
+      .z = offset_px.z / layout->bd,
+   };
+}
+
+static VkOffset3D
+vk_offset3d_el_to_px(const VkOffset3D offset_el,
+                     const struct isl_format_layout *layout)
+{
+   return (VkOffset3D) {
+      .x = offset_el.x * layout->bw,
+      .y = offset_el.y * layout->bh,
+      .z = offset_el.z * layout->bd,
+   };
+}
+
+static VkExtent3D
+vk_extent3d_px_to_el(const VkExtent3D extent_px,
+                     const struct isl_format_layout *layout)
+{
+   return (VkExtent3D) {
+      .width = extent_px.width / layout->bw,
+      .height = extent_px.height / layout->bh,
+      .depth = extent_px.depth / layout->bd,
+   };
+}
+
+static VkExtent3D
+vk_extent3d_el_to_px(const VkExtent3D extent_el,
+                     const struct isl_format_layout *layout)
+{
+   return (VkExtent3D) {
+      .width = extent_el.width * layout->bw,
+      .height = extent_el.height * layout->bh,
+      .depth = extent_el.depth * layout->bd,
+   };
+}
+
+static bool
+isl_tiling_supports_standard_block_shapes(enum isl_tiling tiling)
+{
+   return tiling == ISL_TILING_64 ||
+          tiling == ISL_TILING_ICL_Ys ||
+          tiling == ISL_TILING_SKL_Ys;
+}
+
+static VkExtent3D
+anv_sparse_get_standard_image_block_shape(enum isl_format format,
+                                          VkImageType image_type,
+                                          uint16_t texel_size)
+{
+   const struct isl_format_layout *layout = isl_format_get_layout(format);
+   VkExtent3D block_shape = { .width = 0, .height = 0, .depth = 0 };
+
+   switch (image_type) {
+   case VK_IMAGE_TYPE_1D:
+      /* 1D images don't have a standard block format. */
+      assert(false);
+      break;
+   case VK_IMAGE_TYPE_2D:
+      switch (texel_size) {
+      case 8:
+         block_shape = (VkExtent3D) { .width = 256, .height = 256, .depth = 1 };
+         break;
+      case 16:
+         block_shape = (VkExtent3D) { .width = 256, .height = 128, .depth = 1 };
+         break;
+      case 32:
+         block_shape = (VkExtent3D) { .width = 128, .height = 128, .depth = 1 };
+         break;
+      case 64:
+         block_shape = (VkExtent3D) { .width = 128, .height = 64, .depth = 1 };
+         break;
+      case 128:
+         block_shape = (VkExtent3D) { .width = 64, .height = 64, .depth = 1 };
+         break;
+      default:
+         fprintf(stderr, "unexpected texel_size %d\n", texel_size);
+         assert(false);
+      }
+      break;
+   case VK_IMAGE_TYPE_3D:
+      switch (texel_size) {
+      case 8:
+         block_shape = (VkExtent3D) { .width = 64, .height = 32, .depth = 32 };
+         break;
+      case 16:
+         block_shape = (VkExtent3D) { .width = 32, .height = 32, .depth = 32 };
+         break;
+      case 32:
+         block_shape = (VkExtent3D) { .width = 32, .height = 32, .depth = 16 };
+         break;
+      case 64:
+         block_shape = (VkExtent3D) { .width = 32, .height = 16, .depth = 16 };
+         break;
+      case 128:
+         block_shape = (VkExtent3D) { .width = 16, .height = 16, .depth = 16 };
+         break;
+      default:
+         fprintf(stderr, "unexpected texel_size %d\n", texel_size);
+         assert(false);
+      }
+      break;
+   default:
+      fprintf(stderr, "unexpected image_type %d\n", image_type);
+      assert(false);
+   }
+
+   return vk_extent3d_el_to_px(block_shape, layout);
+}
+
+VkResult
+anv_init_sparse_bindings(struct anv_device *device,
+                         uint64_t size_,
+                         struct anv_sparse_binding_data *sparse,
+                         enum anv_bo_alloc_flags alloc_flags,
+                         uint64_t client_address,
+                         struct anv_address *out_address)
+{
+   uint64_t size = align64(size_, ANV_SPARSE_BLOCK_SIZE);
+
+   sparse->address = anv_vma_alloc(device, size, ANV_SPARSE_BLOCK_SIZE,
+                                   alloc_flags,
+                                   intel_48b_address(client_address),
+                                   &sparse->vma_heap);
+   sparse->size = size;
+
+   out_address->bo = NULL;
+   out_address->offset = sparse->address;
+
+   struct anv_vm_bind bind = {
+      .bo = NULL, /* That's a NULL binding. */
+      .address = sparse->address,
+      .bo_offset = 0,
+      .size = size,
+      .op = ANV_VM_BIND,
+   };
+   int rc = device->kmd_backend->vm_bind(device, 1, &bind);
+   if (rc) {
+      anv_vma_free(device, sparse->vma_heap, sparse->address, sparse->size);
+      return vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
+                       "failed to bind sparse buffer");
+   }
+
+   return VK_SUCCESS;
+}
+
+VkResult
+anv_free_sparse_bindings(struct anv_device *device,
+                         struct anv_sparse_binding_data *sparse)
+{
+   if (!sparse->address)
+      return VK_SUCCESS;
+
+   struct anv_vm_bind unbind = {
+      .bo = 0,
+      .address = sparse->address,
+      .bo_offset = 0,
+      .size = sparse->size,
+      .op = ANV_VM_UNBIND,
+   };
+   int ret = device->kmd_backend->vm_bind(device, 1, &unbind);
+   if (ret)
+      return vk_errorf(device, VK_ERROR_UNKNOWN,
+                       "failed to unbind vm for sparse resource\n");
+
+   anv_vma_free(device, sparse->vma_heap, sparse->address, sparse->size);
+
+   return VK_SUCCESS;
+}
+
+static VkExtent3D
+anv_sparse_calc_block_shape(struct anv_physical_device *pdevice,
+                            struct isl_surf *surf)
+{
+   const struct isl_format_layout *layout =
+      isl_format_get_layout(surf->format);
+   const int Bpb = layout->bpb / 8;
+
+   struct isl_tile_info tile_info;
+   isl_surf_get_tile_info(surf, &tile_info);
+
+   VkExtent3D block_shape_el = {
+      .width = tile_info.logical_extent_el.width,
+      .height = tile_info.logical_extent_el.height,
+      .depth = tile_info.logical_extent_el.depth,
+   };
+   VkExtent3D block_shape_px = vk_extent3d_el_to_px(block_shape_el, layout);
+
+   if (surf->tiling == ISL_TILING_LINEAR) {
+      uint32_t elements_per_row = surf->row_pitch_B /
+                                  (block_shape_el.width * Bpb);
+      uint32_t rows_per_tile = ANV_SPARSE_BLOCK_SIZE /
+                               (elements_per_row * Bpb);
+      assert(rows_per_tile * elements_per_row * Bpb == ANV_SPARSE_BLOCK_SIZE);
+
+      block_shape_px = (VkExtent3D) {
+         .width = elements_per_row * layout->bw,
+         .height = rows_per_tile * layout->bh,
+         .depth = layout->bd,
+      };
+   }
+
+   return block_shape_px;
+}
+
+VkSparseImageFormatProperties
+anv_sparse_calc_image_format_properties(struct anv_physical_device *pdevice,
+                                        VkImageAspectFlags aspect,
+                                        VkImageType vk_image_type,
+                                        struct isl_surf *surf)
+{
+   const struct isl_format_layout *isl_layout =
+      isl_format_get_layout(surf->format);
+   const int bpb = isl_layout->bpb;
+   assert(bpb == 8 || bpb == 16 || bpb == 32 || bpb == 64 ||bpb == 128);
+   const int Bpb = bpb / 8;
+
+   VkExtent3D granularity = anv_sparse_calc_block_shape(pdevice, surf);
+   bool is_standard = false;
+   bool is_known_nonstandard_format = false;
+
+   if (vk_image_type != VK_IMAGE_TYPE_1D) {
+      VkExtent3D std_shape =
+         anv_sparse_get_standard_image_block_shape(surf->format, vk_image_type,
+                                                   bpb);
+      /* YUV formats don't work with Tile64, which is required if we want to
+       * claim standard block shapes. The spec requires us to support all
+       * non-compressed color formats that non-sparse supports, so we can't
+       * just say YUV formats are not supported by Sparse. So we end
+       * supporting this format and anv_sparse_calc_miptail_properties() will
+       * say that everything is part of the miptail.
+       *
+       * For more details on the hardware restriction, please check
+       * isl_gfx125_filter_tiling().
+       */
+      if (pdevice->info.verx10 >= 125 && isl_format_is_yuv(surf->format))
+         is_known_nonstandard_format = true;
+
+      is_standard = granularity.width == std_shape.width &&
+                    granularity.height == std_shape.height &&
+                    granularity.depth == std_shape.depth;
+
+      assert(is_standard || is_known_nonstandard_format);
+   }
+
+   uint32_t block_size = granularity.width * granularity.height *
+                         granularity.depth * Bpb;
+   bool wrong_block_size = block_size != ANV_SPARSE_BLOCK_SIZE;
+
+   return (VkSparseImageFormatProperties) {
+      .aspectMask = aspect,
+      .imageGranularity = granularity,
+      .flags = ((is_standard || is_known_nonstandard_format) ? 0 :
+                  VK_SPARSE_IMAGE_FORMAT_NONSTANDARD_BLOCK_SIZE_BIT) |
+               (wrong_block_size ? VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT :
+                  0),
+   };
+}
+
+/* The miptail is supposed to be this region where the tiniest mip levels
+ * are squished together in one single page, which should save us some memory.
+ * It's a hardware feature which our hardware supports on certain tiling
+ * formats - the ones we always want to use for sparse resources.
+ *
+ * For sparse, the main feature of the miptail is that it only supports opaque
+ * binds, so you either bind the whole miptail or you bind nothing at all,
+ * there are no subresources inside it to separately bind. While the idea is
+ * that the miptail as reported by sparse should match what our hardware does,
+ * in practice we can say in our sparse functions that certain mip levels are
+ * part of the miptail while from the point of view of our hardwared they
+ * aren't.
+ *
+ * If we detect we're using the sparse-friendly tiling formats and ISL
+ * supports miptails for them, we can just trust the miptail level set by ISL
+ * and things can proceed as The Spec intended.
+ *
+ * However, if that's not the case, we have to go on a best-effort policy. We
+ * could simply declare that every mip level is part of the miptail and be
+ * done, but since that kinda defeats the purpose of Sparse we try to find
+ * what level we really should be reporting as the first miptail level based
+ * on the alignments of the surface subresources.
+ */
+void
+anv_sparse_calc_miptail_properties(struct anv_device *device,
+                                   struct anv_image *image,
+                                   VkImageAspectFlags vk_aspect,
+                                   uint32_t *imageMipTailFirstLod,
+                                   VkDeviceSize *imageMipTailSize,
+                                   VkDeviceSize *imageMipTailOffset,
+                                   VkDeviceSize *imageMipTailStride)
+{
+   assert(__builtin_popcount(vk_aspect) == 1);
+   const uint32_t plane = anv_image_aspect_to_plane(image, vk_aspect);
+   struct isl_surf *surf = &image->planes[plane].primary_surface.isl;
+   uint64_t binding_plane_offset =
+      image->planes[plane].primary_surface.memory_range.offset;
+   const struct isl_format_layout *isl_layout =
+      isl_format_get_layout(surf->format);
+   const int Bpb = isl_layout->bpb / 8;
+   struct isl_tile_info tile_info;
+   isl_surf_get_tile_info(surf, &tile_info);
+   uint32_t tile_size = tile_info.logical_extent_el.width * Bpb *
+                        tile_info.logical_extent_el.height *
+                        tile_info.logical_extent_el.depth;
+
+   uint64_t layer1_offset;
+   uint32_t x_off, y_off;
+
+   /* Treat the whole thing as a single miptail. We should have already
+    * reported this image as VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT.
+    *
+    * In theory we could try to make ISL massage the alignments so that we
+    * could at least claim mip level 0 to be not part of the miptail, but
+    * that could end up wasting a lot of memory, so it's better to do
+    * nothing and focus our efforts into making things use the appropriate
+    * tiling formats that give us the standard block shapes.
+    */
+   if (tile_size != ANV_SPARSE_BLOCK_SIZE)
+      goto out_everything_is_miptail;
+
+   assert(surf->tiling != ISL_TILING_LINEAR);
+
+   if (image->vk.array_layers == 1) {
+      layer1_offset = surf->size_B;
+   } else {
+      isl_surf_get_image_offset_B_tile_sa(surf, 0, 1, 0, &layer1_offset,
+                                          &x_off, &y_off);
+      if (x_off || y_off)
+         goto out_everything_is_miptail;
+   }
+   assert(layer1_offset % tile_size == 0);
+
+   /* We could try to do better here, but there's not really any point since
+    * we should be supporting the appropriate tiling formats everywhere.
+    */
+   if (!isl_tiling_supports_standard_block_shapes(surf->tiling))
+      goto out_everything_is_miptail;
+
+   int miptail_first_level = surf->miptail_start_level;
+   if (miptail_first_level >= image->vk.mip_levels)
+      goto out_no_miptail;
+
+   uint64_t miptail_offset = 0;
+   isl_surf_get_image_offset_B_tile_sa(surf, miptail_first_level, 0, 0,
+                                       &miptail_offset,
+                                       &x_off, &y_off);
+   assert(x_off == 0 && y_off == 0);
+   assert(miptail_offset % tile_size == 0);
+
+   *imageMipTailFirstLod = miptail_first_level;
+   *imageMipTailSize = tile_size;
+   *imageMipTailOffset = binding_plane_offset + miptail_offset;
+   *imageMipTailStride = layer1_offset;
+   return;
+
+out_no_miptail:
+   *imageMipTailFirstLod = image->vk.mip_levels;
+   *imageMipTailSize = 0;
+   *imageMipTailOffset = 0;
+   *imageMipTailStride = 0;
+   return;
+
+out_everything_is_miptail:
+   *imageMipTailFirstLod = 0;
+   *imageMipTailSize = surf->size_B;
+   *imageMipTailOffset = binding_plane_offset;
+   *imageMipTailStride = 0;
+   return;
+}
+
+static struct anv_vm_bind
+vk_bind_to_anv_vm_bind(struct anv_sparse_binding_data *sparse,
+                       const struct VkSparseMemoryBind *vk_bind)
+{
+   struct anv_vm_bind anv_bind = {
+      .bo = NULL,
+      .address = sparse->address + vk_bind->resourceOffset,
+      .bo_offset = 0,
+      .size = vk_bind->size,
+      .op = ANV_VM_BIND,
+   };
+
+   assert(vk_bind->size);
+   assert(vk_bind->resourceOffset + vk_bind->size <= sparse->size);
+
+   if (vk_bind->memory != VK_NULL_HANDLE) {
+      anv_bind.bo = anv_device_memory_from_handle(vk_bind->memory)->bo;
+      anv_bind.bo_offset = vk_bind->memoryOffset,
+      assert(vk_bind->memoryOffset + vk_bind->size <= anv_bind.bo->size);
+   }
+
+   return anv_bind;
+}
+
+VkResult
+anv_sparse_bind_resource_memory(struct anv_device *device,
+                                struct anv_sparse_binding_data *sparse,
+                                const VkSparseMemoryBind *vk_bind)
+{
+   struct anv_vm_bind bind = vk_bind_to_anv_vm_bind(sparse, vk_bind);
+
+   int rc = device->kmd_backend->vm_bind(device, 1, &bind);
+   if (rc) {
+      return vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
+                       "failed to bind sparse buffer");
+   }
+
+   return VK_SUCCESS;
+}
+
+VkResult
+anv_sparse_bind_image_memory(struct anv_queue *queue,
+                             struct anv_image *image,
+                             const VkSparseImageMemoryBind *bind)
+{
+   struct anv_device *device = queue->device;
+   VkImageAspectFlags aspect = bind->subresource.aspectMask;
+   uint32_t mip_level = bind->subresource.mipLevel;
+   uint32_t array_layer = bind->subresource.arrayLayer;
+
+   assert(__builtin_popcount(aspect) == 1);
+   assert(!(bind->flags & VK_SPARSE_MEMORY_BIND_METADATA_BIT));
+
+   struct anv_image_binding *img_binding = image->disjoint ?
+      anv_image_aspect_to_binding(image, aspect) :
+      &image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN];
+   struct anv_sparse_binding_data *sparse_data = &img_binding->sparse_data;
+
+   const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
+   struct isl_surf *surf = &image->planes[plane].primary_surface.isl;
+   uint64_t binding_plane_offset =
+      image->planes[plane].primary_surface.memory_range.offset;
+   const struct isl_format_layout *layout =
+      isl_format_get_layout(surf->format);
+   struct isl_tile_info tile_info;
+   isl_surf_get_tile_info(surf, &tile_info);
+
+   VkExtent3D block_shape_px =
+      anv_sparse_calc_block_shape(device->physical, surf);
+   VkExtent3D block_shape_el = vk_extent3d_px_to_el(block_shape_px, layout);
+
+   /* Both bind->offset and bind->extent are in pixel units. */
+   VkOffset3D bind_offset_el = vk_offset3d_px_to_el(bind->offset, layout);
+
+   /* The spec says we only really need to align if for a given coordinate
+    * offset + extent equals the corresponding dimensions of the image
+    * subresource, but all the other non-aligned usage is invalid, so just
+    * align everything.
+    */
+   VkExtent3D bind_extent_px = {
+      .width = ALIGN_NPOT(bind->extent.width, block_shape_px.width),
+      .height = ALIGN_NPOT(bind->extent.height, block_shape_px.height),
+      .depth = ALIGN_NPOT(bind->extent.depth, block_shape_px.depth),
+   };
+   VkExtent3D bind_extent_el = vk_extent3d_px_to_el(bind_extent_px, layout);
+
+   /* A sparse block should correspond to our tile size, so this has to be
+    * either 4k or 64k depending on the tiling format. */
+   const uint64_t block_size_B = block_shape_el.width * (layout->bpb / 8) *
+                                 block_shape_el.height *
+                                 block_shape_el.depth;
+   /* How many blocks are necessary to form a whole line on this image? */
+   const uint32_t blocks_per_line = surf->row_pitch_B / (layout->bpb / 8) /
+                                    block_shape_el.width;
+   /* The loop below will try to bind a whole line of blocks at a time as
+    * they're guaranteed to be contiguous, so we calculate how many blocks
+    * that is and how big is each block to figure the bind size of a whole
+    * line.
+    *
+    * TODO: if we're binding mip_level 0 and bind_extent_el.width is the total
+    * line, the whole rectangle is contiguous so we could do this with a
+    * single bind instead of per-line. We should figure out how common this is
+    * and consider implementing this special-case.
+    */
+   uint64_t line_bind_size_in_blocks = bind_extent_el.width /
+                                       block_shape_el.width;
+   uint64_t line_bind_size = line_bind_size_in_blocks * block_size_B;
+   assert(line_bind_size_in_blocks != 0);
+   assert(line_bind_size != 0);
+
+   uint64_t memory_offset = bind->memoryOffset;
+   for (uint32_t z = bind_offset_el.z;
+        z < bind_offset_el.z + bind_extent_el.depth;
+        z += block_shape_el.depth) {
+      uint64_t subresource_offset_B;
+      uint32_t subresource_x_offset, subresource_y_offset;
+      isl_surf_get_image_offset_B_tile_sa(surf, mip_level, array_layer, z,
+                                          &subresource_offset_B,
+                                          &subresource_x_offset,
+                                          &subresource_y_offset);
+      assert(subresource_x_offset == 0 && subresource_y_offset == 0);
+      assert(subresource_offset_B % block_size_B == 0);
+
+      for (uint32_t y = bind_offset_el.y;
+           y < bind_offset_el.y + bind_extent_el.height;
+           y+= block_shape_el.height) {
+         uint32_t line_block_offset = y / block_shape_el.height *
+                                      blocks_per_line;
+         uint64_t line_start_B = subresource_offset_B +
+                                 line_block_offset * block_size_B;
+         uint64_t bind_offset_B = line_start_B +
+                                  (bind_offset_el.x / block_shape_el.width) *
+                                  block_size_B;
+
+         VkSparseMemoryBind opaque_bind = {
+            .resourceOffset = binding_plane_offset + bind_offset_B,
+            .size = line_bind_size,
+            .memory = bind->memory,
+            .memoryOffset = memory_offset,
+            .flags = bind->flags,
+         };
+
+         memory_offset += line_bind_size;
+
+         assert(line_start_B % block_size_B == 0);
+         assert(opaque_bind.resourceOffset % block_size_B == 0);
+         assert(opaque_bind.size % block_size_B == 0);
+
+         struct anv_vm_bind bind = vk_bind_to_anv_vm_bind(sparse_data,
+                                                          &opaque_bind);
+         int rc = device->kmd_backend->vm_bind(device, 1, &bind);
+         if (rc) {
+            return vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
+                             "failed to bind sparse buffer");
+         }
+      }
+   }
+
+   return VK_SUCCESS;
+}
+
+VkResult
+anv_sparse_image_check_support(struct anv_physical_device *pdevice,
+                               VkImageCreateFlags flags,
+                               VkImageTiling tiling,
+                               VkSampleCountFlagBits samples,
+                               VkImageType type,
+                               VkFormat vk_format)
+{
+   assert(flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT);
+
+   /* The spec says:
+    *   "A sparse image created using VK_IMAGE_CREATE_SPARSE_BINDING_BIT (but
+    *    not VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) supports all formats that
+    *    non-sparse usage supports, and supports both VK_IMAGE_TILING_OPTIMAL
+    *    and VK_IMAGE_TILING_LINEAR tiling."
+    */
+   if (!(flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT))
+      return VK_SUCCESS;
+
+   /* From here on, these are the rules:
+    *   "A sparse image created using VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT
+    *    supports all non-compressed color formats with power-of-two element
+    *    size that non-sparse usage supports. Additional formats may also be
+    *    supported and can be queried via
+    *    vkGetPhysicalDeviceSparseImageFormatProperties.
+    *    VK_IMAGE_TILING_LINEAR tiling is not supported."
+    */
+
+   /* While the spec itself says linear is not supported (see above), deqp-vk
+    * tries anyway to create linear sparse images, so we have to check for it.
+    * This is also said in VUID-VkImageCreateInfo-tiling-04121:
+    *   "If tiling is VK_IMAGE_TILING_LINEAR, flags must not contain
+    *    VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT"
+    */
+   if (tiling == VK_IMAGE_TILING_LINEAR)
+      return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+   /* TODO: not supported yet. */
+   if (samples != VK_SAMPLE_COUNT_1_BIT)
+      return VK_ERROR_FEATURE_NOT_PRESENT;
+
+   /* While the Vulkan spec allows us to support depth/stencil sparse images
+    * everywhere, sometimes we're not able to have them with the tiling
+    * formats that give us the standard block shapes. Having standard block
+    * shapes is higher priority than supporting depth/stencil sparse images.
+    *
+    * Please see ISL's filter_tiling() functions for accurate explanations on
+    * why depth/stencil images are not always supported with the tiling
+    * formats we want. But in short: depth/stencil support in our HW is
+    * limited to 2D and we can't build a 2D view of a 3D image with these
+    * tiling formats due to the address swizzling being different.
+    */
+   VkImageAspectFlags aspects = vk_format_aspects(vk_format);
+   if (aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
+      /* For 125+, isl_gfx125_filter_tiling() claims 3D is not supported.
+       * For the previous platforms, isl_gfx6_filter_tiling() says only 2D is
+       * supported.
+       */
+      if (pdevice->info.verx10 >= 125) {
+         if (type == VK_IMAGE_TYPE_3D)
+            return VK_ERROR_FORMAT_NOT_SUPPORTED;
+      } else {
+         if (type != VK_IMAGE_TYPE_2D)
+            return VK_ERROR_FORMAT_NOT_SUPPORTED;
+      }
+   }
+
+   const struct anv_format *anv_format = anv_get_format(vk_format);
+   if (!anv_format)
+      return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+   for (int p = 0; p < anv_format->n_planes; p++) {
+      enum isl_format isl_format = anv_format->planes[p].isl_format;
+
+      if (isl_format == ISL_FORMAT_UNSUPPORTED)
+         return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+      const struct isl_format_layout *isl_layout =
+         isl_format_get_layout(isl_format);
+
+      /* As quoted above, we only need to support the power-of-two formats.
+       * The problem with the non-power-of-two formats is that we need an
+       * integer number of pixels to fit into a sparse block, so we'd need the
+       * sparse block sizes to be, for example, 192k for 24bpp.
+       *
+       * TODO: add support for these formats.
+       */
+      if (isl_layout->bpb != 8 && isl_layout->bpb != 16 &&
+          isl_layout->bpb != 32 && isl_layout->bpb != 64 &&
+          isl_layout->bpb != 128)
+         return VK_ERROR_FORMAT_NOT_SUPPORTED;
+   }
+
+   return VK_SUCCESS;
+}
index 92f59ba..417d0ed 100644 (file)
@@ -3845,11 +3845,35 @@ mask_is_shader_write(const VkAccessFlags2 access)
                      VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT));
 }
 
+static inline bool
+mask_is_write(const VkAccessFlags2 access)
+{
+   return access & (VK_ACCESS_2_SHADER_WRITE_BIT |
+                    VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT |
+                    VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
+                    VK_ACCESS_2_TRANSFER_WRITE_BIT |
+                    VK_ACCESS_2_HOST_WRITE_BIT |
+                    VK_ACCESS_2_MEMORY_WRITE_BIT |
+                    VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT |
+                    VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR |
+#ifdef VK_ENABLE_BETA_EXTENSIONS
+                    VK_ACCESS_2_VIDEO_ENCODE_WRITE_BIT_KHR |
+#endif
+                    VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT |
+                    VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT |
+                    VK_ACCESS_2_COMMAND_PREPROCESS_WRITE_BIT_NV |
+                    VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR |
+                    VK_ACCESS_2_MICROMAP_WRITE_BIT_EXT |
+                    VK_ACCESS_2_OPTICAL_FLOW_WRITE_BIT_NV);
+}
+
 static void
 cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
                    const VkDependencyInfo *dep_info,
                    const char *reason)
 {
+   struct anv_device *device = cmd_buffer->device;
+
    /* XXX: Right now, we're really dumb and just flush whatever categories
     * the app asks for.  One of these days we may make this a bit better
     * but right now that's all the hardware allows for in most areas.
@@ -3857,6 +3881,8 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
    VkAccessFlags2 src_flags = 0;
    VkAccessFlags2 dst_flags = 0;
 
+   bool apply_sparse_flushes = false;
+
    if (anv_cmd_buffer_is_video_queue(cmd_buffer))
       return;
 
@@ -3873,21 +3899,34 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
          cmd_buffer->state.queries.buffer_write_bits |=
             ANV_QUERY_COMPUTE_WRITES_PENDING_BITS;
       }
+
+      /* There's no way of knowing if this memory barrier is related to sparse
+       * buffers! This is pretty horrible.
+       */
+      if (device->using_sparse && mask_is_write(src_flags))
+         apply_sparse_flushes = true;
    }
 
    for (uint32_t i = 0; i < dep_info->bufferMemoryBarrierCount; i++) {
-      src_flags |= dep_info->pBufferMemoryBarriers[i].srcAccessMask;
-      dst_flags |= dep_info->pBufferMemoryBarriers[i].dstAccessMask;
+      const VkBufferMemoryBarrier2 *buf_barrier =
+         &dep_info->pBufferMemoryBarriers[i];
+      ANV_FROM_HANDLE(anv_buffer, buffer, buf_barrier->buffer);
+
+      src_flags |= buf_barrier->srcAccessMask;
+      dst_flags |= buf_barrier->dstAccessMask;
 
       /* Shader writes to buffers that could then be written by a transfer
        * command (including queries).
        */
-      if (stage_is_shader(dep_info->pBufferMemoryBarriers[i].srcStageMask) &&
-          mask_is_shader_write(dep_info->pBufferMemoryBarriers[i].srcAccessMask) &&
-          stage_is_transfer(dep_info->pBufferMemoryBarriers[i].dstStageMask)) {
+      if (stage_is_shader(buf_barrier->srcStageMask) &&
+          mask_is_shader_write(buf_barrier->srcAccessMask) &&
+          stage_is_transfer(buf_barrier->dstStageMask)) {
          cmd_buffer->state.queries.buffer_write_bits |=
             ANV_QUERY_COMPUTE_WRITES_PENDING_BITS;
       }
+
+      if (anv_buffer_is_sparse(buffer) && mask_is_write(src_flags))
+         apply_sparse_flushes = true;
    }
 
    for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) {
@@ -3951,7 +3990,7 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
       anv_foreach_image_aspect_bit(aspect_bit, image, aspects) {
          VkImageAspectFlagBits aspect = 1UL << aspect_bit;
          if (anv_layout_has_untracked_aux_writes(
-                cmd_buffer->device->info,
+                device->info,
                 image, aspect,
                 img_barrier->newLayout,
                 cmd_buffer->queue_family->queueFlags)) {
@@ -3963,11 +4002,24 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
             }
          }
       }
+
+      if (anv_image_is_sparse(image) && mask_is_write(src_flags))
+         apply_sparse_flushes = true;
    }
 
    enum anv_pipe_bits bits =
-      anv_pipe_flush_bits_for_access_flags(cmd_buffer->device, src_flags) |
-      anv_pipe_invalidate_bits_for_access_flags(cmd_buffer->device, dst_flags);
+      anv_pipe_flush_bits_for_access_flags(device, src_flags) |
+      anv_pipe_invalidate_bits_for_access_flags(device, dst_flags);
+
+   /* Our HW implementation of the sparse feature lives in the GAM unit
+    * (interface between all the GPU caches and external memory). As a result
+    * writes to NULL bound images & buffers that should be ignored are
+    * actually still visible in the caches. The only way for us to get correct
+    * NULL bound regions to return 0s is to evict the caches to force the
+    * caches to be repopulated with 0s.
+    */
+   if (apply_sparse_flushes)
+      bits |= ANV_PIPE_FLUSH_BITS;
 
    if (dst_flags & VK_ACCESS_INDIRECT_COMMAND_READ_BIT)
       genX(cmd_buffer_flush_generated_draws)(cmd_buffer);
index dc47ad5..60058c8 100644 (file)
@@ -182,6 +182,7 @@ libanv_files = files(
   'anv_pipeline_cache.c',
   'anv_private.h',
   'anv_queue.c',
+  'anv_sparse.c',
   'anv_util.c',
   'anv_utrace.c',
   'anv_va.c',