dzn: Add initial bindless infrastructure
authorJesse Natalie <jenatali@microsoft.com>
Thu, 2 Mar 2023 18:06:13 +0000 (10:06 -0800)
committerMarge Bot <emma+marge@anholt.net>
Thu, 23 Mar 2023 21:48:44 +0000 (21:48 +0000)
When operating in "bindless" mode, the device will own 2 descriptor
heaps, one for views, and one for samplers. Every time a view is
created (image view, buffer view), a slot is allocated for it out
of the device view heap for each usage type (sampled vs storage).

Then, in a future change, descriptor sets will just contain view/
sampler indices instead of actual descriptors. Instead of copying
these to a cmdbuf-owned descriptor heap, we can directly bind the
descriptor set as a buffer. We'll also modify shaders to perform
an indirection and index into the device heap.

Buffers also get views set up on creation. In a perfect world, we
could just put addresses/sizes in the descriptor set, but DXIL
doesn't support loading from addresses, we need descriptors. When
robust buffer access is disabled *or* descriptor set buffer views
reference the remainder of the buffer, we can just re-use a view
from the buffer and use an offset.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21913>

src/microsoft/vulkan/dzn_descriptor_set.c
src/microsoft/vulkan/dzn_device.c
src/microsoft/vulkan/dzn_image.c
src/microsoft/vulkan/dzn_private.h

index 4d2c726..95831ec 100644 (file)
@@ -815,14 +815,14 @@ desc_type_to_heap_type(VkDescriptorType in)
    }
 }
 
-static void
+void
 dzn_descriptor_heap_finish(struct dzn_descriptor_heap *heap)
 {
    if (heap->heap)
       ID3D12DescriptorHeap_Release(heap->heap);
 }
 
-static VkResult
+VkResult
 dzn_descriptor_heap_init(struct dzn_descriptor_heap *heap,
                          struct dzn_device *device,
                          D3D12_DESCRIPTOR_HEAP_TYPE type,
@@ -874,7 +874,7 @@ dzn_descriptor_heap_get_gpu_handle(const struct dzn_descriptor_heap *heap, uint3
    };
 }
 
-static void
+void
 dzn_descriptor_heap_write_sampler_desc(struct dzn_device *device,
                                        struct dzn_descriptor_heap *heap,
                                        uint32_t desc_offset,
@@ -933,7 +933,7 @@ dzn_descriptor_heap_write_image_view_desc(struct dzn_device *device,
    }
 }
 
-static void
+void
 dzn_descriptor_heap_write_buffer_view_desc(struct dzn_device *device,
                                            struct dzn_descriptor_heap *heap,
                                            uint32_t desc_offset,
@@ -969,7 +969,7 @@ dzn_descriptor_heap_write_buffer_desc(struct dzn_device *device,
       assert(!writeable);
       D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc = {
          .BufferLocation = info->buffer->gpuva + info->offset,
-         .SizeInBytes = ALIGN_POT(size, 256),
+         .SizeInBytes = MIN2(ALIGN_POT(size, 256), D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 4 * sizeof(float)),
       };
       ID3D12Device1_CreateConstantBufferView(device->dev, &cbv_desc, view_handle);
    } else if (writeable) {
index 1d9afe3..781e656 100644 (file)
@@ -2168,6 +2168,12 @@ dzn_device_destroy(struct dzn_device *device, const VkAllocationCallbacks *pAllo
    dzn_device_query_finish(device);
    dzn_meta_finish(device);
 
+   dzn_foreach_pool_type(type) {
+      dzn_descriptor_heap_finish(&device->device_heaps[type].heap);
+      util_dynarray_fini(&device->device_heaps[type].slot_freelist);
+      mtx_destroy(&device->device_heaps[type].lock);
+   }
+
    if (device->dev_config)
       ID3D12DeviceConfiguration_Release(device->dev_config);
 
@@ -2357,6 +2363,23 @@ dzn_device_create(struct dzn_physical_device *pdev,
       device->need_swapchain_blits = true;
    }
 
+   if (device->bindless) {
+      dzn_foreach_pool_type(type) {
+         uint32_t descriptor_count = type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER ?
+            D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE :
+            D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1;
+         result = dzn_descriptor_heap_init(&device->device_heaps[type].heap, device, type, descriptor_count, true);
+         if (result != VK_SUCCESS) {
+            dzn_device_destroy(device, pAllocator);
+            return result;
+         }
+
+         mtx_init(&device->device_heaps[type].lock, mtx_plain);
+         util_dynarray_init(&device->device_heaps[type].slot_freelist, NULL);
+         device->device_heaps[type].next_alloc_slot = 0;
+      }
+   }
+
    assert(queue_count == qindex);
    *out = dzn_device_to_handle(device);
    return VK_SUCCESS;
@@ -2705,6 +2728,9 @@ dzn_buffer_destroy(struct dzn_buffer *buf, const VkAllocationCallbacks *pAllocat
    if (buf->res)
       ID3D12Resource_Release(buf->res);
 
+   dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, buf->cbv_bindless_slot);
+   dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, buf->uav_bindless_slot);
+
    vk_object_base_finish(&buf->base);
    vk_free2(&device->vk.alloc, pAllocator, buf);
 }
@@ -2762,6 +2788,24 @@ dzn_buffer_create(struct dzn_device *device,
       buf->valid_access |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
    }
 
+   buf->cbv_bindless_slot = buf->uav_bindless_slot = -1;
+   if (device->bindless) {
+      if (buf->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) {
+         buf->cbv_bindless_slot = dzn_device_descriptor_heap_alloc_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
+         if (buf->cbv_bindless_slot < 0) {
+            dzn_buffer_destroy(buf, pAllocator);
+            return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+         }
+      }
+      if (buf->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) {
+         buf->uav_bindless_slot = dzn_device_descriptor_heap_alloc_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
+         if (buf->uav_bindless_slot < 0) {
+            dzn_buffer_destroy(buf, pAllocator);
+            return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+         }
+      }
+   }
+
    *out = dzn_buffer_to_handle(buf);
    return VK_SUCCESS;
 }
@@ -2958,6 +3002,30 @@ dzn_BindBufferMemory2(VkDevice _device,
          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
 
       buffer->gpuva = ID3D12Resource_GetGPUVirtualAddress(buffer->res);
+
+      if (device->bindless) {
+         struct dzn_buffer_desc buf_desc = {
+            .buffer = buffer,
+            .offset = 0,
+            .range = VK_WHOLE_SIZE,
+         };
+         if (buffer->cbv_bindless_slot >= 0) {
+            buf_desc.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
+            dzn_descriptor_heap_write_buffer_desc(device,
+                                                  &device->device_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV].heap,
+                                                  buffer->cbv_bindless_slot,
+                                                  false,
+                                                  &buf_desc);
+         }
+         if (buffer->uav_bindless_slot >= 0) {
+            buf_desc.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+            dzn_descriptor_heap_write_buffer_desc(device,
+                                                  &device->device_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV].heap,
+                                                  buffer->uav_bindless_slot,
+                                                  true,
+                                                  &buf_desc);
+         }
+      }
    }
 
    return VK_SUCCESS;
@@ -3103,6 +3171,8 @@ dzn_sampler_destroy(struct dzn_sampler *sampler,
    struct dzn_device *device =
       container_of(sampler->base.device, struct dzn_device, vk);
 
+   dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, sampler->bindless_slot);
+
    vk_object_base_finish(&sampler->base);
    vk_free2(&device->vk.alloc, pAllocator, sampler);
 }
@@ -3203,6 +3273,20 @@ dzn_sampler_create(struct dzn_device *device,
       sampler->desc.Flags |= D3D12_SAMPLER_FLAG_NON_NORMALIZED_COORDINATES;
 #endif
 
+   sampler->bindless_slot = -1;
+   if (device->bindless) {
+      sampler->bindless_slot = dzn_device_descriptor_heap_alloc_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
+      if (sampler->bindless_slot < 0) {
+         dzn_sampler_destroy(sampler, pAllocator);
+         return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+      }
+
+      dzn_descriptor_heap_write_sampler_desc(device,
+                                             &device->device_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER].heap,
+                                             sampler->bindless_slot,
+                                             sampler);
+   }
+
    *out = dzn_sampler_to_handle(sampler);
    return VK_SUCCESS;
 }
@@ -3225,6 +3309,39 @@ dzn_DestroySampler(VkDevice device,
    dzn_sampler_destroy(dzn_sampler_from_handle(sampler), pAllocator);
 }
 
+int
+dzn_device_descriptor_heap_alloc_slot(struct dzn_device *device,
+                                      D3D12_DESCRIPTOR_HEAP_TYPE type)
+{
+   struct dzn_device_descriptor_heap *heap = &device->device_heaps[type];
+   mtx_lock(&heap->lock);
+
+   int ret = -1;
+   if (heap->slot_freelist.size)
+      ret = util_dynarray_pop(&heap->slot_freelist, int);
+   else if (heap->next_alloc_slot < heap->heap.desc_count)
+      ret = heap->next_alloc_slot++;
+
+   mtx_unlock(&heap->lock);
+   return ret;
+}
+
+void
+dzn_device_descriptor_heap_free_slot(struct dzn_device *device,
+                                     D3D12_DESCRIPTOR_HEAP_TYPE type,
+                                     int slot)
+{
+   struct dzn_device_descriptor_heap *heap = &device->device_heaps[type];
+   assert(slot < 0 || slot < heap->heap.desc_count);
+
+   if (slot < 0)
+      return;
+
+   mtx_lock(&heap->lock);
+   util_dynarray_append(&heap->slot_freelist, int, slot);
+   mtx_unlock(&heap->lock);
+}
+
 VKAPI_ATTR void VKAPI_CALL
 dzn_GetDeviceGroupPeerMemoryFeatures(VkDevice device,
                                      uint32_t heapIndex,
index 00c93c0..dfcbf05 100644 (file)
@@ -1362,6 +1362,9 @@ dzn_image_view_destroy(struct dzn_image_view *iview,
 
    struct dzn_device *device = container_of(iview->vk.base.device, struct dzn_device, vk);
 
+   dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, iview->srv_bindless_slot);
+   dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, iview->uav_bindless_slot);
+
    vk_image_view_finish(&iview->vk);
    vk_free2(&device->vk.alloc, pAllocator, iview);
 }
@@ -1372,6 +1375,7 @@ dzn_image_view_create(struct dzn_device *device,
                       const VkAllocationCallbacks *pAllocator,
                       VkImageView *out)
 {
+   VK_FROM_HANDLE(dzn_image, image, pCreateInfo->image);
    struct dzn_image_view *iview =
       vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*iview), 8,
                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
@@ -1380,6 +1384,36 @@ dzn_image_view_create(struct dzn_device *device,
 
    dzn_image_view_init(device, iview, pCreateInfo);
 
+   iview->srv_bindless_slot = iview->uav_bindless_slot = -1;
+   if (device->bindless) {
+      if (!(image->desc.Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE)) {
+         iview->srv_bindless_slot = dzn_device_descriptor_heap_alloc_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
+         if (iview->srv_bindless_slot < 0) {
+            dzn_image_view_destroy(iview, pAllocator);
+            return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+         }
+
+         dzn_descriptor_heap_write_image_view_desc(device,
+                                                   &device->device_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV].heap,
+                                                   iview->srv_bindless_slot,
+                                                   false, false,
+                                                   iview);
+      }
+      if (iview->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) {
+         iview->uav_bindless_slot = dzn_device_descriptor_heap_alloc_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
+         if (iview->uav_bindless_slot < 0) {
+            dzn_image_view_destroy(iview, pAllocator);
+            return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+         }
+
+         dzn_descriptor_heap_write_image_view_desc(device,
+                                                   &device->device_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV].heap,
+                                                   iview->uav_bindless_slot,
+                                                   true, true,
+                                                   iview);
+      }
+   }
+
    *out = dzn_image_view_to_handle(iview);
    return VK_SUCCESS;
 }
@@ -1411,6 +1445,9 @@ dzn_buffer_view_destroy(struct dzn_buffer_view *bview,
 
    struct dzn_device *device = container_of(bview->base.device, struct dzn_device, vk);
 
+   dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, bview->srv_bindless_slot);
+   dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, bview->uav_bindless_slot);
+
    vk_object_base_finish(&bview->base);
    vk_free2(&device->vk.alloc, pAllocator, bview);
 }
@@ -1438,6 +1475,7 @@ dzn_buffer_view_create(struct dzn_device *device,
       buf->size - pCreateInfo->offset : pCreateInfo->range;
 
    bview->buffer = buf;
+   bview->srv_bindless_slot = bview->uav_bindless_slot = -1;
    if (buf->usage &
        (VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
         VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT)) {
@@ -1452,6 +1490,16 @@ dzn_buffer_view_create(struct dzn_device *device,
             .Flags = D3D12_BUFFER_SRV_FLAG_NONE,
          },
       };
+
+      if (device->bindless) {
+         bview->srv_bindless_slot = dzn_device_descriptor_heap_alloc_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
+         if (bview->srv_bindless_slot < 0) {
+            dzn_buffer_view_destroy(bview, pAllocator);
+            return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+         }
+         dzn_descriptor_heap_write_buffer_view_desc(device, &device->device_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV].heap,
+                                                    bview->srv_bindless_slot, false, bview);
+      }
    }
 
    if (buf->usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) {
@@ -1464,6 +1512,16 @@ dzn_buffer_view_create(struct dzn_device *device,
             .Flags = D3D12_BUFFER_UAV_FLAG_NONE,
          },
       };
+
+      if (device->bindless) {
+         bview->uav_bindless_slot = dzn_device_descriptor_heap_alloc_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
+         if (bview->uav_bindless_slot < 0) {
+            dzn_buffer_view_destroy(bview, pAllocator);
+            return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+         }
+         dzn_descriptor_heap_write_buffer_view_desc(device, &device->device_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV].heap,
+                                                    bview->uav_bindless_slot, true, bview);
+      }
    }
 
    *out = dzn_buffer_view_to_handle(bview);
index ae8fbff..ddac019 100644 (file)
@@ -256,6 +256,23 @@ struct dzn_queue {
    uint64_t fence_point;
 };
 
+struct dzn_descriptor_heap {
+   ID3D12DescriptorHeap *heap;
+   SIZE_T cpu_base;
+   uint64_t gpu_base;
+   uint32_t desc_count;
+   uint32_t desc_sz;
+};
+
+struct dzn_device_descriptor_heap {
+   struct dzn_descriptor_heap heap;
+   mtx_t lock;
+   struct util_dynarray slot_freelist;
+   uint32_t next_alloc_slot;
+};
+
+#define NUM_POOL_TYPES D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER + 1
+
 struct dzn_device {
    struct vk_device vk;
    struct vk_device_extension_table enabled_extensions;
@@ -283,6 +300,9 @@ struct dzn_device {
     */
    bool need_swapchain_blits;
    struct dzn_queue *swapchain_queue;
+
+   bool bindless;
+   struct dzn_device_descriptor_heap device_heaps[NUM_POOL_TYPES];
 };
 
 void dzn_meta_finish(struct dzn_device *device);
@@ -345,7 +365,6 @@ enum dzn_cmd_dirty {
 #define MAX_PUSH_CONSTANT_DWORDS 32
 
 #define NUM_BIND_POINT VK_PIPELINE_BIND_POINT_COMPUTE + 1
-#define NUM_POOL_TYPES D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER + 1
 
 #define dzn_foreach_pool_type(type) \
    for (D3D12_DESCRIPTOR_HEAP_TYPE type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; \
@@ -369,6 +388,7 @@ struct dzn_descriptor_state {
 
 struct dzn_sampler;
 struct dzn_image_view;
+struct dzn_buffer_view;
 
 struct dzn_buffer_desc {
    VkDescriptorType type;
@@ -380,13 +400,15 @@ struct dzn_buffer_desc {
 #define MAX_DESCS_PER_SAMPLER_HEAP 2048u
 #define MAX_DESCS_PER_CBV_SRV_UAV_HEAP 1000000u
 
-struct dzn_descriptor_heap {
-   ID3D12DescriptorHeap *heap;
-   SIZE_T cpu_base;
-   uint64_t gpu_base;
-   uint32_t desc_count;
-   uint32_t desc_sz;
-};
+VkResult
+dzn_descriptor_heap_init(struct dzn_descriptor_heap *heap,
+                         struct dzn_device *device,
+                         D3D12_DESCRIPTOR_HEAP_TYPE type,
+                         uint32_t desc_count,
+                         bool shader_visible);
+
+void
+dzn_descriptor_heap_finish(struct dzn_descriptor_heap *heap);
 
 D3D12_CPU_DESCRIPTOR_HANDLE
 dzn_descriptor_heap_get_cpu_handle(const struct dzn_descriptor_heap *heap, uint32_t slot);
@@ -403,6 +425,13 @@ dzn_descriptor_heap_write_image_view_desc(struct dzn_device *device,
                                           const struct dzn_image_view *iview);
 
 void
+dzn_descriptor_heap_write_buffer_view_desc(struct dzn_device *device,
+                                           struct dzn_descriptor_heap *heap,
+                                           uint32_t heap_offset,
+                                           bool writeable,
+                                           const struct dzn_buffer_view *bview);
+
+void
 dzn_descriptor_heap_write_buffer_desc(struct dzn_device *device,
                                       struct dzn_descriptor_heap *heap,
                                       uint32_t heap_offset,
@@ -410,6 +439,12 @@ dzn_descriptor_heap_write_buffer_desc(struct dzn_device *device,
                                       const struct dzn_buffer_desc *bdesc);
 
 void
+dzn_descriptor_heap_write_sampler_desc(struct dzn_device *device,
+                                       struct dzn_descriptor_heap *heap,
+                                       uint32_t desc_offset,
+                                       const struct dzn_sampler *sampler);
+
+void
 dzn_descriptor_heap_copy(struct dzn_device *device,
                          struct dzn_descriptor_heap *dst_heap, uint32_t dst_heap_offset,
                          const struct dzn_descriptor_heap *src_heap, uint32_t src_heap_offset,
@@ -449,6 +484,15 @@ dzn_descriptor_heap_pool_alloc_slots(struct dzn_descriptor_heap_pool *pool,
                                      struct dzn_descriptor_heap **heap,
                                      uint32_t *first_slot);
 
+int
+dzn_device_descriptor_heap_alloc_slot(struct dzn_device *device,
+                                      D3D12_DESCRIPTOR_HEAP_TYPE type);
+
+void
+dzn_device_descriptor_heap_free_slot(struct dzn_device *device,
+                                     D3D12_DESCRIPTOR_HEAP_TYPE type,
+                                     int slot);
+
 struct dzn_cmd_buffer_query_range {
    struct dzn_query_pool *qpool;
    uint32_t start, count;
@@ -1025,6 +1069,8 @@ struct dzn_image_view {
    D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
    D3D12_RENDER_TARGET_VIEW_DESC rtv_desc;
    D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc;
+   int srv_bindless_slot;
+   int uav_bindless_slot;
 };
 
 void
@@ -1048,6 +1094,9 @@ struct dzn_buffer {
 
    D3D12_BARRIER_ACCESS valid_access;
    D3D12_GPU_VIRTUAL_ADDRESS gpuva;
+
+   int cbv_bindless_slot;
+   int uav_bindless_slot;
 };
 
 DXGI_FORMAT
@@ -1075,12 +1124,15 @@ struct dzn_buffer_view {
 
    D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc;
    D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
+   int srv_bindless_slot;
+   int uav_bindless_slot;
 };
 
 struct dzn_sampler {
    struct vk_object_base base;
    D3D12_SAMPLER_DESC2 desc;
    D3D12_STATIC_BORDER_COLOR static_border_color;
+   int bindless_slot;
 };
 
 /* This is defined as a macro so that it works for both