2 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3 * Copyright © 2015 Advanced Micro Devices, Inc.
4 * Copyright © 2021 Valve Corporation
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
19 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
29 * Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
32 #include "zink_context.h"
34 #include "zink_resource.h"
35 #include "zink_screen.h"
36 #include "util/u_hash_table.h"
38 #if !defined(__APPLE__) && !defined(_WIN32)
39 #define ZINK_USE_DMABUF
45 struct zink_sparse_backing_chunk {
51 * Sub-allocation information for a real buffer used as backing memory of a
54 struct zink_sparse_backing {
55 struct list_head list;
59 /* Sorted list of free chunks. */
60 struct zink_sparse_backing_chunk *chunks;
65 struct zink_sparse_commitment {
66 struct zink_sparse_backing *backing;
73 struct zink_bo *buffer;
74 struct zink_bo *entries;
78 ALWAYS_INLINE static struct zink_slab *
79 zink_slab(struct pb_slab *pslab)
81 return (struct zink_slab*)pslab;
84 static struct pb_slabs *
85 get_slabs(struct zink_screen *screen, uint64_t size, enum zink_alloc_flag flags)
87 //struct pb_slabs *bo_slabs = ((flags & RADEON_FLAG_ENCRYPTED) && screen->info.has_tmz_support) ?
88 //screen->bo_slabs_encrypted : screen->bo_slabs;
90 struct pb_slabs *bo_slabs = screen->pb.bo_slabs;
91 /* Find the correct slab allocator for the given size. */
92 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
93 struct pb_slabs *slabs = &bo_slabs[i];
95 if (size <= 1ULL << (slabs->min_order + slabs->num_orders - 1))
103 /* Return the power of two size of a slab entry matching the input size. */
105 get_slab_pot_entry_size(struct zink_screen *screen, unsigned size)
107 unsigned entry_size = util_next_power_of_two(size);
108 unsigned min_entry_size = 1 << screen->pb.bo_slabs[0].min_order;
110 return MAX2(entry_size, min_entry_size);
113 /* Return the slab entry alignment. */
114 static unsigned get_slab_entry_alignment(struct zink_screen *screen, unsigned size)
116 unsigned entry_size = get_slab_pot_entry_size(screen, size);
118 if (size <= entry_size * 3 / 4)
119 return entry_size / 4;
125 bo_destroy(struct zink_screen *screen, struct pb_buffer *pbuf)
127 struct zink_bo *bo = zink_bo(pbuf);
129 #ifdef ZINK_USE_DMABUF
130 if (bo->mem && !bo->u.real.use_reusable_pool) {
131 simple_mtx_lock(&bo->u.real.export_lock);
132 list_for_each_entry_safe(struct bo_export, export, &bo->u.real.exports, link) {
133 struct drm_gem_close args = { .handle = export->gem_handle };
134 drmIoctl(export->drm_fd, DRM_IOCTL_GEM_CLOSE, &args);
135 list_del(&export->link);
138 simple_mtx_unlock(&bo->u.real.export_lock);
139 simple_mtx_destroy(&bo->u.real.export_lock);
143 if (!bo->u.real.is_user_ptr && bo->u.real.cpu_ptr) {
144 bo->u.real.map_count = 1;
145 bo->u.real.cpu_ptr = NULL;
146 zink_bo_unmap(screen, bo);
149 VKSCR(FreeMemory)(screen->dev, bo->mem, NULL);
151 simple_mtx_destroy(&bo->lock);
156 bo_can_reclaim(struct zink_screen *screen, struct pb_buffer *pbuf)
158 struct zink_bo *bo = zink_bo(pbuf);
160 return zink_screen_usage_check_completion(screen, bo->reads) && zink_screen_usage_check_completion(screen, bo->writes);
164 bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
166 struct zink_bo *bo = container_of(entry, struct zink_bo, u.slab.entry);
168 return bo_can_reclaim(priv, &bo->base);
172 bo_slab_free(struct zink_screen *screen, struct pb_slab *pslab)
174 struct zink_slab *slab = zink_slab(pslab);
175 ASSERTED unsigned slab_size = slab->buffer->base.size;
177 assert(slab->base.num_entries * slab->entry_size <= slab_size);
179 zink_bo_unref(screen, slab->buffer);
184 bo_slab_destroy(struct zink_screen *screen, struct pb_buffer *pbuf)
186 struct zink_bo *bo = zink_bo(pbuf);
190 //if (bo->base.usage & RADEON_FLAG_ENCRYPTED)
191 //pb_slab_free(get_slabs(screen, bo->base.size, RADEON_FLAG_ENCRYPTED), &bo->u.slab.entry);
193 pb_slab_free(get_slabs(screen, bo->base.size, 0), &bo->u.slab.entry);
197 clean_up_buffer_managers(struct zink_screen *screen)
199 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
200 pb_slabs_reclaim(&screen->pb.bo_slabs[i]);
201 //if (screen->info.has_tmz_support)
202 //pb_slabs_reclaim(&screen->bo_slabs_encrypted[i]);
205 pb_cache_release_all_buffers(&screen->pb.bo_cache);
209 get_optimal_alignment(struct zink_screen *screen, uint64_t size, unsigned alignment)
211 /* Increase the alignment for faster address translation and better memory
215 alignment = MAX2(alignment, 4096);
217 unsigned msb = util_last_bit(size);
219 alignment = MAX2(alignment, 1u << (msb - 1));
225 bo_destroy_or_cache(struct zink_screen *screen, struct pb_buffer *pbuf)
227 struct zink_bo *bo = zink_bo(pbuf);
229 assert(bo->mem); /* slab buffers have a separate vtbl */
233 if (bo->u.real.use_reusable_pool)
234 pb_cache_add_buffer(bo->cache_entry);
236 bo_destroy(screen, pbuf);
239 static const struct pb_vtbl bo_vtbl = {
240 /* Cast to void* because one of the function parameters is a struct pointer instead of void*. */
241 (void*)bo_destroy_or_cache
242 /* other functions are never called */
245 static struct zink_bo *
246 bo_create_internal(struct zink_screen *screen,
253 struct zink_bo *bo = NULL;
256 /* too big for vk alloc */
257 if (size > UINT32_MAX)
260 alignment = get_optimal_alignment(screen, size, alignment);
262 VkMemoryAllocateFlagsInfo ai;
263 ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO;
265 ai.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR;
268 VkMemoryAllocateInfo mai;
269 mai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
270 if (screen->info.have_KHR_buffer_device_address)
274 mai.allocationSize = size;
275 mai.memoryTypeIndex = heap_idx;
276 if (screen->info.mem_props.memoryTypes[mai.memoryTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
277 alignment = MAX2(alignment, screen->info.props.limits.minMemoryMapAlignment);
278 mai.allocationSize = align64(mai.allocationSize, screen->info.props.limits.minMemoryMapAlignment);
280 unsigned heap = screen->info.mem_props.memoryTypes[heap_idx].heapIndex;
281 if (mai.allocationSize > screen->info.mem_props.memoryHeaps[heap].size) {
282 mesa_loge("zink: can't allocate %"PRIu64" bytes from heap that's only %"PRIu64" bytes!\n", mai.allocationSize, screen->info.mem_props.memoryHeaps[heap].size);
286 /* all non-suballocated bo can cache */
287 init_pb_cache = !pNext;
290 bo = CALLOC(1, sizeof(struct zink_bo) + init_pb_cache * sizeof(struct pb_cache_entry));
295 VkResult ret = VKSCR(AllocateMemory)(screen->dev, &mai, NULL, &bo->mem);
296 if (!zink_screen_handle_vkresult(screen, ret)) {
297 mesa_loge("zink: couldn't allocate memory: heap=%u size=%" PRIu64, heap_idx, size);
302 bo->u.real.use_reusable_pool = true;
303 pb_cache_init_entry(&screen->pb.bo_cache, bo->cache_entry, &bo->base, heap_idx);
305 #ifdef ZINK_USE_DMABUF
306 list_inithead(&bo->u.real.exports);
307 simple_mtx_init(&bo->u.real.export_lock, mtx_plain);
312 simple_mtx_init(&bo->lock, mtx_plain);
313 pipe_reference_init(&bo->base.reference, 1);
314 bo->base.alignment_log2 = util_logbase2(alignment);
315 bo->base.size = mai.allocationSize;
316 bo->base.vtbl = &bo_vtbl;
317 bo->base.placement = heap_idx;
318 bo->base.usage = flags;
319 bo->unique_id = p_atomic_inc_return(&screen->pb.next_bo_unique_id);
324 bo_destroy(screen, (void*)bo);
329 * Attempt to allocate the given number of backing pages. Fewer pages may be
330 * allocated (depending on the fragmentation of existing backing buffers),
331 * which will be reflected by a change to *pnum_pages.
333 static struct zink_sparse_backing *
334 sparse_backing_alloc(struct zink_screen *screen, struct zink_bo *bo,
335 uint32_t *pstart_page, uint32_t *pnum_pages)
337 struct zink_sparse_backing *best_backing;
339 uint32_t best_num_pages;
345 /* This is a very simple and inefficient best-fit algorithm. */
346 list_for_each_entry(struct zink_sparse_backing, backing, &bo->u.sparse.backing, list) {
347 for (unsigned idx = 0; idx < backing->num_chunks; ++idx) {
348 uint32_t cur_num_pages = backing->chunks[idx].end - backing->chunks[idx].begin;
349 if ((best_num_pages < *pnum_pages && cur_num_pages > best_num_pages) ||
350 (best_num_pages > *pnum_pages && cur_num_pages < best_num_pages)) {
351 best_backing = backing;
353 best_num_pages = cur_num_pages;
358 /* Allocate a new backing buffer if necessary. */
360 struct pb_buffer *buf;
364 best_backing = CALLOC_STRUCT(zink_sparse_backing);
368 best_backing->max_chunks = 4;
369 best_backing->chunks = CALLOC(best_backing->max_chunks,
370 sizeof(*best_backing->chunks));
371 if (!best_backing->chunks) {
376 assert(bo->u.sparse.num_backing_pages < DIV_ROUND_UP(bo->base.size, ZINK_SPARSE_BUFFER_PAGE_SIZE));
378 size = MIN3(bo->base.size / 16,
380 bo->base.size - (uint64_t)bo->u.sparse.num_backing_pages * ZINK_SPARSE_BUFFER_PAGE_SIZE);
381 size = MAX2(size, ZINK_SPARSE_BUFFER_PAGE_SIZE);
383 buf = zink_bo_create(screen, size, ZINK_SPARSE_BUFFER_PAGE_SIZE,
384 ZINK_HEAP_DEVICE_LOCAL, 0, screen->heap_map[ZINK_HEAP_DEVICE_LOCAL][0], NULL);
386 FREE(best_backing->chunks);
391 /* We might have gotten a bigger buffer than requested via caching. */
392 pages = buf->size / ZINK_SPARSE_BUFFER_PAGE_SIZE;
394 best_backing->bo = zink_bo(buf);
395 best_backing->num_chunks = 1;
396 best_backing->chunks[0].begin = 0;
397 best_backing->chunks[0].end = pages;
399 list_add(&best_backing->list, &bo->u.sparse.backing);
400 bo->u.sparse.num_backing_pages += pages;
403 best_num_pages = pages;
406 *pnum_pages = MIN2(*pnum_pages, best_num_pages);
407 *pstart_page = best_backing->chunks[best_idx].begin;
408 best_backing->chunks[best_idx].begin += *pnum_pages;
410 if (best_backing->chunks[best_idx].begin >= best_backing->chunks[best_idx].end) {
411 memmove(&best_backing->chunks[best_idx], &best_backing->chunks[best_idx + 1],
412 sizeof(*best_backing->chunks) * (best_backing->num_chunks - best_idx - 1));
413 best_backing->num_chunks--;
420 sparse_free_backing_buffer(struct zink_screen *screen, struct zink_bo *bo,
421 struct zink_sparse_backing *backing)
423 bo->u.sparse.num_backing_pages -= backing->bo->base.size / ZINK_SPARSE_BUFFER_PAGE_SIZE;
425 list_del(&backing->list);
426 zink_bo_unref(screen, backing->bo);
427 FREE(backing->chunks);
432 * Return a range of pages from the given backing buffer back into the
436 sparse_backing_free(struct zink_screen *screen, struct zink_bo *bo,
437 struct zink_sparse_backing *backing,
438 uint32_t start_page, uint32_t num_pages)
440 uint32_t end_page = start_page + num_pages;
442 unsigned high = backing->num_chunks;
444 /* Find the first chunk with begin >= start_page. */
446 unsigned mid = low + (high - low) / 2;
448 if (backing->chunks[mid].begin >= start_page)
454 assert(low >= backing->num_chunks || end_page <= backing->chunks[low].begin);
455 assert(low == 0 || backing->chunks[low - 1].end <= start_page);
457 if (low > 0 && backing->chunks[low - 1].end == start_page) {
458 backing->chunks[low - 1].end = end_page;
460 if (low < backing->num_chunks && end_page == backing->chunks[low].begin) {
461 backing->chunks[low - 1].end = backing->chunks[low].end;
462 memmove(&backing->chunks[low], &backing->chunks[low + 1],
463 sizeof(*backing->chunks) * (backing->num_chunks - low - 1));
464 backing->num_chunks--;
466 } else if (low < backing->num_chunks && end_page == backing->chunks[low].begin) {
467 backing->chunks[low].begin = start_page;
469 if (backing->num_chunks >= backing->max_chunks) {
470 unsigned new_max_chunks = 2 * backing->max_chunks;
471 struct zink_sparse_backing_chunk *new_chunks =
472 REALLOC(backing->chunks,
473 sizeof(*backing->chunks) * backing->max_chunks,
474 sizeof(*backing->chunks) * new_max_chunks);
478 backing->max_chunks = new_max_chunks;
479 backing->chunks = new_chunks;
482 memmove(&backing->chunks[low + 1], &backing->chunks[low],
483 sizeof(*backing->chunks) * (backing->num_chunks - low));
484 backing->chunks[low].begin = start_page;
485 backing->chunks[low].end = end_page;
486 backing->num_chunks++;
489 if (backing->num_chunks == 1 && backing->chunks[0].begin == 0 &&
490 backing->chunks[0].end == backing->bo->base.size / ZINK_SPARSE_BUFFER_PAGE_SIZE)
491 sparse_free_backing_buffer(screen, bo, backing);
497 bo_sparse_destroy(struct zink_screen *screen, struct pb_buffer *pbuf)
499 struct zink_bo *bo = zink_bo(pbuf);
501 assert(!bo->mem && bo->base.usage & ZINK_ALLOC_SPARSE);
503 while (!list_is_empty(&bo->u.sparse.backing)) {
504 sparse_free_backing_buffer(screen, bo,
505 container_of(bo->u.sparse.backing.next,
506 struct zink_sparse_backing, list));
509 FREE(bo->u.sparse.commitments);
510 simple_mtx_destroy(&bo->lock);
514 static const struct pb_vtbl bo_sparse_vtbl = {
515 /* Cast to void* because one of the function parameters is a struct pointer instead of void*. */
516 (void*)bo_sparse_destroy
517 /* other functions are never called */
520 static struct pb_buffer *
521 bo_sparse_create(struct zink_screen *screen, uint64_t size)
525 /* We use 32-bit page numbers; refuse to attempt allocating sparse buffers
526 * that exceed this limit. This is not really a restriction: we don't have
527 * that much virtual address space anyway.
529 if (size > (uint64_t)INT32_MAX * ZINK_SPARSE_BUFFER_PAGE_SIZE)
532 bo = CALLOC_STRUCT(zink_bo);
536 simple_mtx_init(&bo->lock, mtx_plain);
537 pipe_reference_init(&bo->base.reference, 1);
538 bo->base.alignment_log2 = util_logbase2(ZINK_SPARSE_BUFFER_PAGE_SIZE);
539 bo->base.size = size;
540 bo->base.vtbl = &bo_sparse_vtbl;
541 bo->base.placement = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
542 bo->unique_id = p_atomic_inc_return(&screen->pb.next_bo_unique_id);
543 bo->base.usage = ZINK_ALLOC_SPARSE;
545 bo->u.sparse.num_va_pages = DIV_ROUND_UP(size, ZINK_SPARSE_BUFFER_PAGE_SIZE);
546 bo->u.sparse.commitments = CALLOC(bo->u.sparse.num_va_pages,
547 sizeof(*bo->u.sparse.commitments));
548 if (!bo->u.sparse.commitments)
549 goto error_alloc_commitments;
551 list_inithead(&bo->u.sparse.backing);
555 error_alloc_commitments:
556 simple_mtx_destroy(&bo->lock);
562 zink_bo_create(struct zink_screen *screen, uint64_t size, unsigned alignment, enum zink_heap heap, enum zink_alloc_flag flags, unsigned heap_idx, const void *pNext)
565 /* pull in sparse flag */
566 flags |= zink_alloc_flags_from_heap(heap);
568 //struct pb_slabs *slabs = ((flags & RADEON_FLAG_ENCRYPTED) && screen->info.has_tmz_support) ?
569 //screen->bo_slabs_encrypted : screen->bo_slabs;
570 struct pb_slabs *slabs = screen->pb.bo_slabs;
572 struct pb_slabs *last_slab = &slabs[NUM_SLAB_ALLOCATORS - 1];
573 unsigned max_slab_entry_size = 1 << (last_slab->min_order + last_slab->num_orders - 1);
575 /* Sub-allocate small buffers from slabs. */
576 if (!(flags & (ZINK_ALLOC_NO_SUBALLOC | ZINK_ALLOC_SPARSE)) &&
577 size <= max_slab_entry_size) {
578 struct pb_slab_entry *entry;
580 if (heap < 0 || heap >= ZINK_HEAP_MAX)
583 unsigned alloc_size = size;
585 /* Always use slabs for sizes less than 4 KB because the kernel aligns
586 * everything to 4 KB.
588 if (size < alignment && alignment <= 4 * 1024)
589 alloc_size = alignment;
591 if (alignment > get_slab_entry_alignment(screen, alloc_size)) {
592 /* 3/4 allocations can return too small alignment. Try again with a power of two
595 unsigned pot_size = get_slab_pot_entry_size(screen, alloc_size);
597 if (alignment <= pot_size) {
598 /* This size works but wastes some memory to fulfil the alignment. */
599 alloc_size = pot_size;
601 goto no_slab; /* can't fulfil alignment requirements */
605 struct pb_slabs *slabs = get_slabs(screen, alloc_size, flags);
606 bool reclaim_all = false;
607 if (heap == ZINK_HEAP_DEVICE_LOCAL_VISIBLE && !screen->resizable_bar) {
608 unsigned low_bound = 128 * 1024 * 1024; //128MB is a very small BAR
609 if (screen->info.driver_props.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY)
610 low_bound *= 2; //nvidia has fat textures or something
611 reclaim_all = screen->info.mem_props.memoryHeaps[heap_idx].size <= low_bound;
613 entry = pb_slab_alloc_reclaimed(slabs, alloc_size, heap_idx, reclaim_all);
615 /* Clean up buffer managers and try again. */
616 clean_up_buffer_managers(screen);
618 entry = pb_slab_alloc_reclaimed(slabs, alloc_size, heap_idx, true);
623 bo = container_of(entry, struct zink_bo, u.slab.entry);
624 pipe_reference_init(&bo->base.reference, 1);
625 bo->base.size = size;
626 assert(alignment <= 1 << bo->base.alignment_log2);
632 if (flags & ZINK_ALLOC_SPARSE) {
633 assert(ZINK_SPARSE_BUFFER_PAGE_SIZE % alignment == 0);
635 return bo_sparse_create(screen, size);
638 /* Align size to page size. This is the minimum alignment for normal
639 * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
640 * like constant/uniform buffers, can benefit from better and more reuse.
642 if (heap == ZINK_HEAP_DEVICE_LOCAL_VISIBLE) {
643 size = align64(size, screen->info.props.limits.minMemoryMapAlignment);
644 alignment = align(alignment, screen->info.props.limits.minMemoryMapAlignment);
647 bool use_reusable_pool = !(flags & ZINK_ALLOC_NO_SUBALLOC);
649 if (use_reusable_pool) {
650 /* Get a buffer from the cache. */
651 bo = (struct zink_bo*)
652 pb_cache_reclaim_buffer(&screen->pb.bo_cache, size, alignment, 0, heap_idx);
657 /* Create a new one. */
658 bo = bo_create_internal(screen, size, alignment, heap_idx, flags, pNext);
660 /* Clean up buffer managers and try again. */
661 clean_up_buffer_managers(screen);
663 bo = bo_create_internal(screen, size, alignment, heap_idx, flags, pNext);
672 zink_bo_map(struct zink_screen *screen, struct zink_bo *bo)
676 struct zink_bo *real;
681 real = bo->u.slab.real;
682 offset = bo->offset - real->offset;
685 cpu = p_atomic_read(&real->u.real.cpu_ptr);
687 simple_mtx_lock(&real->lock);
688 /* Must re-check due to the possibility of a race. Re-check need not
689 * be atomic thanks to the lock. */
690 cpu = real->u.real.cpu_ptr;
692 VkResult result = VKSCR(MapMemory)(screen->dev, real->mem, 0, real->base.size, 0, &cpu);
693 if (result != VK_SUCCESS) {
694 mesa_loge("ZINK: vkMapMemory failed (%s)", vk_Result_to_str(result));
695 simple_mtx_unlock(&real->lock);
698 p_atomic_set(&real->u.real.cpu_ptr, cpu);
700 simple_mtx_unlock(&real->lock);
702 p_atomic_inc(&real->u.real.map_count);
704 return (uint8_t*)cpu + offset;
708 zink_bo_unmap(struct zink_screen *screen, struct zink_bo *bo)
710 struct zink_bo *real = bo->mem ? bo : bo->u.slab.real;
712 assert(real->u.real.map_count != 0 && "too many unmaps");
714 if (p_atomic_dec_zero(&real->u.real.map_count)) {
715 p_atomic_set(&real->u.real.cpu_ptr, NULL);
716 VKSCR(UnmapMemory)(screen->dev, real->mem);
721 get_semaphore(struct zink_screen *screen)
723 VkSemaphoreCreateInfo sci = {
724 VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
729 VkResult ret = VKSCR(CreateSemaphore)(screen->dev, &sci, NULL, &sem);
730 return ret == VK_SUCCESS ? sem : VK_NULL_HANDLE;
734 buffer_commit_single(struct zink_screen *screen, struct zink_resource *res, struct zink_bo *bo, uint32_t bo_offset, uint32_t offset, uint32_t size, bool commit, VkSemaphore wait)
736 VkSemaphore sem = get_semaphore(screen);
737 VkBindSparseInfo sparse = {0};
738 sparse.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO;
739 sparse.bufferBindCount = res->obj->storage_buffer ? 2 : 1;
740 sparse.waitSemaphoreCount = !!wait;
741 sparse.pWaitSemaphores = &wait;
742 sparse.signalSemaphoreCount = 1;
743 sparse.pSignalSemaphores = &sem;
745 VkSparseBufferMemoryBindInfo sparse_bind[2];
746 sparse_bind[0].buffer = res->obj->buffer;
747 sparse_bind[1].buffer = res->obj->storage_buffer;
748 sparse_bind[0].bindCount = 1;
749 sparse_bind[1].bindCount = 1;
750 sparse.pBufferBinds = sparse_bind;
752 VkSparseMemoryBind mem_bind;
753 mem_bind.resourceOffset = offset;
754 mem_bind.size = MIN2(res->base.b.width0 - offset, size);
755 mem_bind.memory = commit ? (bo->mem ? bo->mem : bo->u.slab.real->mem) : VK_NULL_HANDLE;
756 mem_bind.memoryOffset = bo_offset * ZINK_SPARSE_BUFFER_PAGE_SIZE + (commit ? (bo->mem ? 0 : bo->offset) : 0);
758 sparse_bind[0].pBinds = &mem_bind;
759 sparse_bind[1].pBinds = &mem_bind;
761 VkResult ret = VKSCR(QueueBindSparse)(screen->queue_sparse, 1, &sparse, VK_NULL_HANDLE);
762 if (zink_screen_handle_vkresult(screen, ret))
764 VKSCR(DestroySemaphore)(screen->dev, sem, NULL);
765 return VK_NULL_HANDLE;
769 buffer_bo_commit(struct zink_screen *screen, struct zink_resource *res, uint32_t offset, uint32_t size, bool commit, VkSemaphore *sem)
772 struct zink_bo *bo = res->obj->bo;
773 assert(offset % ZINK_SPARSE_BUFFER_PAGE_SIZE == 0);
774 assert(offset <= bo->base.size);
775 assert(size <= bo->base.size - offset);
776 assert(size % ZINK_SPARSE_BUFFER_PAGE_SIZE == 0 || offset + size == bo->base.size);
778 struct zink_sparse_commitment *comm = bo->u.sparse.commitments;
780 uint32_t va_page = offset / ZINK_SPARSE_BUFFER_PAGE_SIZE;
781 uint32_t end_va_page = va_page + DIV_ROUND_UP(size, ZINK_SPARSE_BUFFER_PAGE_SIZE);
782 VkSemaphore cur_sem = VK_NULL_HANDLE;
784 while (va_page < end_va_page) {
785 uint32_t span_va_page;
787 /* Skip pages that are already committed. */
788 if (comm[va_page].backing) {
793 /* Determine length of uncommitted span. */
794 span_va_page = va_page;
795 while (va_page < end_va_page && !comm[va_page].backing)
798 /* Fill the uncommitted span with chunks of backing memory. */
799 while (span_va_page < va_page) {
800 struct zink_sparse_backing *backing;
801 uint32_t backing_start, backing_size;
803 backing_size = va_page - span_va_page;
804 backing = sparse_backing_alloc(screen, bo, &backing_start, &backing_size);
809 cur_sem = buffer_commit_single(screen, res, backing->bo, backing_start,
810 (uint64_t)span_va_page * ZINK_SPARSE_BUFFER_PAGE_SIZE,
811 (uint64_t)backing_size * ZINK_SPARSE_BUFFER_PAGE_SIZE, true, cur_sem);
813 ok = sparse_backing_free(screen, bo, backing, backing_start, backing_size);
814 assert(ok && "sufficient memory should already be allocated");
820 while (backing_size) {
821 comm[span_va_page].backing = backing;
822 comm[span_va_page].page = backing_start;
831 uint32_t base_page = va_page;
832 while (va_page < end_va_page) {
833 struct zink_sparse_backing *backing;
834 uint32_t backing_start;
837 /* Skip pages that are already uncommitted. */
838 if (!comm[va_page].backing) {
844 cur_sem = buffer_commit_single(screen, res, NULL, 0,
845 (uint64_t)base_page * ZINK_SPARSE_BUFFER_PAGE_SIZE,
846 (uint64_t)(end_va_page - base_page) * ZINK_SPARSE_BUFFER_PAGE_SIZE, false, cur_sem);
854 /* Group contiguous spans of pages. */
855 backing = comm[va_page].backing;
856 backing_start = comm[va_page].page;
857 comm[va_page].backing = NULL;
862 while (va_page < end_va_page &&
863 comm[va_page].backing == backing &&
864 comm[va_page].page == backing_start + span_pages) {
865 comm[va_page].backing = NULL;
870 if (!sparse_backing_free(screen, bo, backing, backing_start, span_pages)) {
871 /* Couldn't allocate tracking data structures, so we have to leak */
872 fprintf(stderr, "zink: leaking sparse backing memory\n");
883 texture_commit_single(struct zink_screen *screen, struct zink_resource *res, VkSparseImageMemoryBind *ibind, unsigned num_binds, bool commit, VkSemaphore wait)
885 VkSemaphore sem = get_semaphore(screen);
886 VkBindSparseInfo sparse = {0};
887 sparse.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO;
888 sparse.imageBindCount = 1;
889 sparse.waitSemaphoreCount = !!wait;
890 sparse.pWaitSemaphores = &wait;
891 sparse.signalSemaphoreCount = 1;
892 sparse.pSignalSemaphores = &sem;
894 VkSparseImageMemoryBindInfo sparse_ibind;
895 sparse_ibind.image = res->obj->image;
896 sparse_ibind.bindCount = num_binds;
897 sparse_ibind.pBinds = ibind;
898 sparse.pImageBinds = &sparse_ibind;
900 VkResult ret = VKSCR(QueueBindSparse)(screen->queue_sparse, 1, &sparse, VK_NULL_HANDLE);
901 if (zink_screen_handle_vkresult(screen, ret))
903 VKSCR(DestroySemaphore)(screen->dev, sem, NULL);
904 return VK_NULL_HANDLE;
908 texture_commit_miptail(struct zink_screen *screen, struct zink_resource *res, struct zink_bo *bo, uint32_t bo_offset, uint32_t offset, bool commit, VkSemaphore wait)
910 VkSemaphore sem = get_semaphore(screen);
911 VkBindSparseInfo sparse = {0};
912 sparse.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO;
913 sparse.imageOpaqueBindCount = 1;
914 sparse.waitSemaphoreCount = !!wait;
915 sparse.pWaitSemaphores = &wait;
916 sparse.signalSemaphoreCount = 1;
917 sparse.pSignalSemaphores = &sem;
919 VkSparseImageOpaqueMemoryBindInfo sparse_bind;
920 sparse_bind.image = res->obj->image;
921 sparse_bind.bindCount = 1;
922 sparse.pImageOpaqueBinds = &sparse_bind;
924 VkSparseMemoryBind mem_bind;
925 mem_bind.resourceOffset = offset;
926 mem_bind.size = MIN2(ZINK_SPARSE_BUFFER_PAGE_SIZE, res->sparse.imageMipTailSize - offset);
927 mem_bind.memory = commit ? (bo->mem ? bo->mem : bo->u.slab.real->mem) : VK_NULL_HANDLE;
928 mem_bind.memoryOffset = bo_offset + (commit ? (bo->mem ? 0 : bo->offset) : 0);
930 sparse_bind.pBinds = &mem_bind;
932 VkResult ret = VKSCR(QueueBindSparse)(screen->queue_sparse, 1, &sparse, VK_NULL_HANDLE);
933 if (zink_screen_handle_vkresult(screen, ret))
935 VKSCR(DestroySemaphore)(screen->dev, sem, NULL);
936 return VK_NULL_HANDLE;
940 zink_bo_commit(struct zink_screen *screen, struct zink_resource *res, unsigned level, struct pipe_box *box, bool commit, VkSemaphore *sem)
943 struct zink_bo *bo = res->obj->bo;
944 VkSemaphore cur_sem = VK_NULL_HANDLE;
946 if (screen->faked_e5sparse && res->base.b.format == PIPE_FORMAT_R9G9B9E5_FLOAT)
949 simple_mtx_lock(&screen->queue_lock);
950 simple_mtx_lock(&bo->lock);
951 if (res->base.b.target == PIPE_BUFFER) {
952 ok = buffer_bo_commit(screen, res, box->x, box->width, commit, sem);
956 int gwidth, gheight, gdepth;
957 gwidth = res->sparse.formatProperties.imageGranularity.width;
958 gheight = res->sparse.formatProperties.imageGranularity.height;
959 gdepth = res->sparse.formatProperties.imageGranularity.depth;
960 assert(gwidth && gheight && gdepth);
962 struct zink_sparse_commitment *comm = bo->u.sparse.commitments;
963 VkImageSubresource subresource = { res->aspect, level, 0 };
964 unsigned nwidth = DIV_ROUND_UP(box->width, gwidth);
965 unsigned nheight = DIV_ROUND_UP(box->height, gheight);
966 unsigned ndepth = DIV_ROUND_UP(box->depth, gdepth);
967 VkExtent3D lastBlockExtent = {
968 (box->width % gwidth) ? box->width % gwidth : gwidth,
969 (box->height % gheight) ? box->height % gheight : gheight,
970 (box->depth % gdepth) ? box->depth % gdepth : gdepth
972 #define NUM_BATCHED_BINDS 50
973 VkSparseImageMemoryBind ibind[NUM_BATCHED_BINDS];
974 uint32_t backing_start[NUM_BATCHED_BINDS], backing_size[NUM_BATCHED_BINDS];
975 struct zink_sparse_backing *backing[NUM_BATCHED_BINDS];
977 bool commits_pending = false;
978 uint32_t va_page_offset = 0;
979 for (unsigned l = 0; l < level; l++) {
980 unsigned mipwidth = DIV_ROUND_UP(MAX2(res->base.b.width0 >> l, 1), gwidth);
981 unsigned mipheight = DIV_ROUND_UP(MAX2(res->base.b.height0 >> l, 1), gheight);
982 unsigned mipdepth = DIV_ROUND_UP(res->base.b.array_size > 1 ? res->base.b.array_size : MAX2(res->base.b.depth0 >> l, 1), gdepth);
983 va_page_offset += mipwidth * mipheight * mipdepth;
985 for (unsigned d = 0; d < ndepth; d++) {
986 for (unsigned h = 0; h < nheight; h++) {
987 for (unsigned w = 0; w < nwidth; w++) {
988 ibind[i].subresource = subresource;
991 ibind[i].offset.x = w * gwidth;
992 ibind[i].offset.y = h * gheight;
993 if (res->base.b.array_size > 1) {
994 ibind[i].subresource.arrayLayer = d * gdepth;
995 ibind[i].offset.z = 0;
997 ibind[i].offset.z = d * gdepth;
1000 ibind[i].extent.width = (w == nwidth - 1) ? lastBlockExtent.width : gwidth;
1001 ibind[i].extent.height = (h == nheight - 1) ? lastBlockExtent.height : gheight;
1002 ibind[i].extent.depth = (d == ndepth - 1 && res->base.b.target != PIPE_TEXTURE_CUBE) ? lastBlockExtent.depth : gdepth;
1003 uint32_t va_page = va_page_offset +
1004 (d + (box->z / gdepth)) * ((MAX2(res->base.b.width0 >> level, 1) / gwidth) * (MAX2(res->base.b.height0 >> level, 1) / gheight)) +
1005 (h + (box->y / gheight)) * (MAX2(res->base.b.width0 >> level, 1) / gwidth) +
1006 (w + (box->x / gwidth));
1008 uint32_t end_va_page = va_page + 1;
1011 while (va_page < end_va_page) {
1012 uint32_t span_va_page;
1014 /* Skip pages that are already committed. */
1015 if (comm[va_page].backing) {
1020 /* Determine length of uncommitted span. */
1021 span_va_page = va_page;
1022 while (va_page < end_va_page && !comm[va_page].backing)
1025 /* Fill the uncommitted span with chunks of backing memory. */
1026 while (span_va_page < va_page) {
1027 backing_size[i] = va_page - span_va_page;
1028 backing[i] = sparse_backing_alloc(screen, bo, &backing_start[i], &backing_size[i]);
1033 if (level >= res->sparse.imageMipTailFirstLod) {
1034 uint32_t offset = res->sparse.imageMipTailOffset + d * res->sparse.imageMipTailStride;
1035 cur_sem = texture_commit_miptail(screen, res, backing[i]->bo, backing_start[i], offset, commit, cur_sem);
1039 ibind[i].memory = backing[i]->bo->mem ? backing[i]->bo->mem : backing[i]->bo->u.slab.real->mem;
1040 ibind[i].memoryOffset = backing_start[i] * ZINK_SPARSE_BUFFER_PAGE_SIZE +
1041 (backing[i]->bo->mem ? 0 : backing[i]->bo->offset);
1042 commits_pending = true;
1045 while (backing_size[i]) {
1046 comm[span_va_page].backing = backing[i];
1047 comm[span_va_page].page = backing_start[i];
1056 ibind[i].memory = VK_NULL_HANDLE;
1057 ibind[i].memoryOffset = 0;
1059 while (va_page < end_va_page) {
1060 /* Skip pages that are already uncommitted. */
1061 if (!comm[va_page].backing) {
1066 /* Group contiguous spans of pages. */
1067 backing[i] = comm[va_page].backing;
1068 backing_start[i] = comm[va_page].page;
1069 comm[va_page].backing = NULL;
1071 backing_size[i] = 1;
1074 while (va_page < end_va_page &&
1075 comm[va_page].backing == backing[i] &&
1076 comm[va_page].page == backing_start[i] + backing_size[i]) {
1077 comm[va_page].backing = NULL;
1081 if (level >= res->sparse.imageMipTailFirstLod) {
1082 uint32_t offset = res->sparse.imageMipTailOffset + d * res->sparse.imageMipTailStride;
1083 cur_sem = texture_commit_miptail(screen, res, NULL, 0, offset, commit, cur_sem);
1087 commits_pending = true;
1092 if (i == ARRAY_SIZE(ibind)) {
1093 cur_sem = texture_commit_single(screen, res, ibind, ARRAY_SIZE(ibind), commit, cur_sem);
1095 for (unsigned s = 0; s < i; s++) {
1096 ok = sparse_backing_free(screen, backing[s]->bo, backing[s], backing_start[s], backing_size[s]);
1098 /* Couldn't allocate tracking data structures, so we have to leak */
1099 fprintf(stderr, "zink: leaking sparse backing memory\n");
1105 commits_pending = false;
1111 if (commits_pending) {
1112 cur_sem = texture_commit_single(screen, res, ibind, i, commit, cur_sem);
1114 for (unsigned s = 0; s < i; s++) {
1115 ok = sparse_backing_free(screen, backing[s]->bo, backing[s], backing_start[s], backing_size[s]);
1117 /* Couldn't allocate tracking data structures, so we have to leak */
1118 fprintf(stderr, "zink: leaking sparse backing memory\n");
1126 simple_mtx_unlock(&bo->lock);
1127 simple_mtx_unlock(&screen->queue_lock);
1133 zink_bo_get_kms_handle(struct zink_screen *screen, struct zink_bo *bo, int fd, uint32_t *handle)
1135 #ifdef ZINK_USE_DMABUF
1136 assert(bo->mem && !bo->u.real.use_reusable_pool);
1137 simple_mtx_lock(&bo->u.real.export_lock);
1138 list_for_each_entry(struct bo_export, export, &bo->u.real.exports, link) {
1139 if (export->drm_fd == fd) {
1140 simple_mtx_unlock(&bo->u.real.export_lock);
1141 *handle = export->gem_handle;
1145 struct bo_export *export = CALLOC_STRUCT(bo_export);
1147 simple_mtx_unlock(&bo->u.real.export_lock);
1150 bool success = drmPrimeFDToHandle(screen->drm_fd, fd, handle) == 0;
1152 list_addtail(&export->link, &bo->u.real.exports);
1153 export->gem_handle = *handle;
1154 export->drm_fd = screen->drm_fd;
1156 mesa_loge("zink: failed drmPrimeFDToHandle %s", strerror(errno));
1159 simple_mtx_unlock(&bo->u.real.export_lock);
1166 static const struct pb_vtbl bo_slab_vtbl = {
1167 /* Cast to void* because one of the function parameters is a struct pointer instead of void*. */
1168 (void*)bo_slab_destroy
1169 /* other functions are never called */
1172 static struct pb_slab *
1173 bo_slab_alloc(void *priv, unsigned heap, unsigned entry_size, unsigned group_index, bool encrypted)
1175 struct zink_screen *screen = priv;
1177 unsigned slab_size = 0;
1178 struct zink_slab *slab = CALLOC_STRUCT(zink_slab);
1183 //struct pb_slabs *slabs = ((flags & RADEON_FLAG_ENCRYPTED) && screen->info.has_tmz_support) ?
1184 //screen->bo_slabs_encrypted : screen->bo_slabs;
1185 struct pb_slabs *slabs = screen->pb.bo_slabs;
1187 /* Determine the slab buffer size. */
1188 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
1189 unsigned max_entry_size = 1 << (slabs[i].min_order + slabs[i].num_orders - 1);
1191 if (entry_size <= max_entry_size) {
1192 /* The slab size is twice the size of the largest possible entry. */
1193 slab_size = max_entry_size * 2;
1195 if (!util_is_power_of_two_nonzero(entry_size)) {
1196 assert(util_is_power_of_two_nonzero(entry_size * 4 / 3));
1198 /* If the entry size is 3/4 of a power of two, we would waste space and not gain
1199 * anything if we allocated only twice the power of two for the backing buffer:
1200 * 2 * 3/4 = 1.5 usable with buffer size 2
1202 * Allocating 5 times the entry size leads us to the next power of two and results
1203 * in a much better memory utilization:
1204 * 5 * 3/4 = 3.75 usable with buffer size 4
1206 if (entry_size * 5 > slab_size)
1207 slab_size = util_next_power_of_two(entry_size * 5);
1213 assert(slab_size != 0);
1215 slab->buffer = zink_bo(zink_bo_create(screen, slab_size, slab_size,
1216 zink_heap_from_domain_flags(screen->info.mem_props.memoryTypes[heap].propertyFlags, 0),
1221 slab_size = slab->buffer->base.size;
1223 slab->base.num_entries = slab_size / entry_size;
1224 slab->base.num_free = slab->base.num_entries;
1225 slab->entry_size = entry_size;
1226 slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
1230 list_inithead(&slab->base.free);
1232 base_id = p_atomic_fetch_add(&screen->pb.next_bo_unique_id, slab->base.num_entries);
1233 for (unsigned i = 0; i < slab->base.num_entries; ++i) {
1234 struct zink_bo *bo = &slab->entries[i];
1236 simple_mtx_init(&bo->lock, mtx_plain);
1237 bo->base.alignment_log2 = util_logbase2(get_slab_entry_alignment(screen, entry_size));
1238 bo->base.size = entry_size;
1239 bo->base.vtbl = &bo_slab_vtbl;
1240 bo->offset = slab->buffer->offset + i * entry_size;
1241 bo->unique_id = base_id + i;
1242 bo->u.slab.entry.slab = &slab->base;
1243 bo->u.slab.entry.group_index = group_index;
1244 bo->u.slab.entry.entry_size = entry_size;
1246 if (slab->buffer->mem) {
1247 /* The slab is not suballocated. */
1248 bo->u.slab.real = slab->buffer;
1250 /* The slab is allocated out of a bigger slab. */
1251 bo->u.slab.real = slab->buffer->u.slab.real;
1252 assert(bo->u.slab.real->mem);
1254 bo->base.placement = bo->u.slab.real->base.placement;
1256 list_addtail(&bo->u.slab.entry.head, &slab->base.free);
1259 /* Wasted alignment due to slabs with 3/4 allocations being aligned to a power of two. */
1260 assert(slab->base.num_entries * entry_size <= slab_size);
1265 zink_bo_unref(screen, slab->buffer);
1271 static struct pb_slab *
1272 bo_slab_alloc_normal(void *priv, unsigned heap, unsigned entry_size, unsigned group_index)
1274 return bo_slab_alloc(priv, heap, entry_size, group_index, false);
1278 zink_bo_init(struct zink_screen *screen)
1280 uint64_t total_mem = 0;
1281 for (uint32_t i = 0; i < screen->info.mem_props.memoryHeapCount; ++i)
1282 total_mem += screen->info.mem_props.memoryHeaps[i].size;
1283 /* Create managers. */
1284 pb_cache_init(&screen->pb.bo_cache, ZINK_HEAP_MAX,
1286 total_mem / 8, screen,
1287 (void*)bo_destroy, (void*)bo_can_reclaim);
1289 unsigned min_slab_order = MIN_SLAB_ORDER; /* 256 bytes */
1290 unsigned max_slab_order = 20; /* 1 MB (slab size = 2 MB) */
1291 unsigned num_slab_orders_per_allocator = (max_slab_order - min_slab_order) /
1292 NUM_SLAB_ALLOCATORS;
1294 /* Divide the size order range among slab managers. */
1295 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
1296 unsigned min_order = min_slab_order;
1297 unsigned max_order = MIN2(min_order + num_slab_orders_per_allocator,
1300 if (!pb_slabs_init(&screen->pb.bo_slabs[i],
1301 min_order, max_order,
1302 ZINK_HEAP_MAX, true,
1304 bo_can_reclaim_slab,
1305 bo_slab_alloc_normal,
1306 (void*)bo_slab_free)) {
1309 min_slab_order = max_order + 1;
1311 screen->pb.min_alloc_size = 1 << screen->pb.bo_slabs[0].min_order;
1316 zink_bo_deinit(struct zink_screen *screen)
1318 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
1319 if (screen->pb.bo_slabs[i].groups)
1320 pb_slabs_deinit(&screen->pb.bo_slabs[i]);
1322 pb_cache_deinit(&screen->pb.bo_cache);