}
}
-static void *
-radv_alloc_shader_memory(struct radv_device *device, struct radv_shader_variant *shader)
+static unsigned
+get_size_class(unsigned size, bool round_up)
{
- mtx_lock(&device->shader_slab_mutex);
- list_for_each_entry(struct radv_shader_slab, slab, &device->shader_slabs, slabs)
- {
- uint64_t offset = 0;
+ size = round_up ? util_logbase2_ceil(size) : util_logbase2(size);
+ unsigned size_class =
+ MAX2(size, RADV_SHADER_ALLOC_MIN_SIZE_CLASS) - RADV_SHADER_ALLOC_MIN_SIZE_CLASS;
+ return MIN2(size_class, RADV_SHADER_ALLOC_NUM_FREE_LISTS - 1);
+}
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wshadow"
-#endif
- list_for_each_entry(struct radv_shader_variant, s, &slab->shaders, slab_list)
+static void
+remove_hole(struct radv_device *device, union radv_shader_arena_block *hole)
+{
+ unsigned size_class = get_size_class(hole->size, false);
+ list_del(&hole->freelist);
+ if (list_is_empty(&device->shader_free_lists[size_class]))
+ device->shader_free_list_mask &= ~(1u << size_class);
+}
+
+static void
+add_hole(struct radv_device *device, union radv_shader_arena_block *hole)
+{
+ unsigned size_class = get_size_class(hole->size, false);
+ list_addtail(&hole->freelist, &device->shader_free_lists[size_class]);
+ device->shader_free_list_mask |= 1u << size_class;
+}
+
+static union radv_shader_arena_block *
+alloc_block_obj(struct radv_device *device)
+{
+ if (!list_is_empty(&device->shader_block_obj_pool)) {
+ union radv_shader_arena_block *block =
+ list_first_entry(&device->shader_block_obj_pool, union radv_shader_arena_block, pool);
+ list_del(&block->pool);
+ return block;
+ }
+
+ return malloc(sizeof(union radv_shader_arena_block));
+}
+
+static void
+free_block_obj(struct radv_device *device, union radv_shader_arena_block *block)
+{
+ list_add(&block->pool, &device->shader_block_obj_pool);
+}
+
+/* Segregated fit allocator, implementing a good-fit allocation policy.
+ *
+ * This is an variation of sequential fit allocation with several lists of free blocks ("holes")
+ * instead of one. Each list of holes only contains holes of a certain range of sizes, so holes that
+ * are too small can easily be ignored while allocating. Because this also ignores holes that are
+ * larger than necessary (approximating best-fit allocation), this could be described as a
+ * "good-fit" allocator.
+ *
+ * Typically, shaders are allocated and only free'd when the device is destroyed. For this pattern,
+ * this should allocate blocks for shaders fast and with no fragmentation, while still allowing
+ * free'd memory to be re-used.
+ */
+static union radv_shader_arena_block *
+alloc_shader_memory(struct radv_device *device, uint32_t size, void *ptr)
+{
+ size = align(size, RADV_SHADER_ALLOC_ALIGNMENT);
+
+ mtx_lock(&device->shader_arena_mutex);
+
+ /* Try to use an existing hole. Unless the shader is very large, this should only have to look
+ * at the first one available.
+ */
+ unsigned free_list_mask = BITFIELD_MASK(RADV_SHADER_ALLOC_NUM_FREE_LISTS);
+ unsigned size_class =
+ ffs(device->shader_free_list_mask & (free_list_mask << get_size_class(size, true)));
+ if (size_class) {
+ size_class--;
+
+ list_for_each_entry(union radv_shader_arena_block, hole,
+ &device->shader_free_lists[size_class], freelist)
{
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif
- if (s->bo_offset - offset >= shader->code_size) {
- shader->bo = slab->bo;
- shader->bo_offset = offset;
- list_addtail(&shader->slab_list, &s->slab_list);
- mtx_unlock(&device->shader_slab_mutex);
- return slab->ptr + offset;
+ if (hole->size < size)
+ continue;
+
+ assert(hole->offset % RADV_SHADER_ALLOC_ALIGNMENT == 0);
+
+ if (size == hole->size) {
+ remove_hole(device, hole);
+ hole->freelist.next = ptr;
+ mtx_unlock(&device->shader_arena_mutex);
+ return hole;
+ } else {
+ union radv_shader_arena_block *alloc = alloc_block_obj(device);
+ if (!alloc) {
+ mtx_unlock(&device->shader_arena_mutex);
+ return NULL;
+ }
+ list_addtail(&alloc->list, &hole->list);
+ alloc->freelist.prev = NULL;
+ alloc->freelist.next = ptr;
+ alloc->arena = hole->arena;
+ alloc->offset = hole->offset;
+ alloc->size = size;
+
+ remove_hole(device, hole);
+ hole->offset += size;
+ hole->size -= size;
+ add_hole(device, hole);
+
+ mtx_unlock(&device->shader_arena_mutex);
+ return alloc;
}
- offset = align_u64(s->bo_offset + s->code_size, 256);
- }
- if (offset <= slab->size && slab->size - offset >= shader->code_size) {
- shader->bo = slab->bo;
- shader->bo_offset = offset;
- list_addtail(&shader->slab_list, &slab->shaders);
- mtx_unlock(&device->shader_slab_mutex);
- return slab->ptr + offset;
}
}
- mtx_unlock(&device->shader_slab_mutex);
- struct radv_shader_slab *slab = calloc(1, sizeof(struct radv_shader_slab));
+ /* Allocate a new shader arena. */
+ struct radv_shader_arena *arena = calloc(1, sizeof(struct radv_shader_arena));
+ union radv_shader_arena_block *alloc = NULL, *hole = NULL;
+ if (!arena)
+ goto fail;
- slab->size = MAX2(256 * 1024, shader->code_size);
+ unsigned arena_size = MAX2(RADV_SHADER_ALLOC_MIN_ARENA_SIZE, size);
VkResult result = device->ws->buffer_create(
- device->ws, slab->size, 256, RADEON_DOMAIN_VRAM,
+ device->ws, arena_size, RADV_SHADER_ALLOC_ALIGNMENT, RADEON_DOMAIN_VRAM,
RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_32BIT |
(device->physical_device->rad_info.cpdma_prefetch_writes_memory ? 0
: RADEON_FLAG_READ_ONLY),
- RADV_BO_PRIORITY_SHADER, 0, &slab->bo);
- if (result != VK_SUCCESS) {
- free(slab);
- return NULL;
+ RADV_BO_PRIORITY_SHADER, 0, &arena->bo);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ list_inithead(&arena->entries);
+
+ arena->ptr = (char *)device->ws->buffer_map(arena->bo);
+ if (!arena->ptr)
+ goto fail;
+
+ alloc = alloc_block_obj(device);
+ hole = arena_size - size > 0 ? alloc_block_obj(device) : alloc;
+ if (!alloc || !hole)
+ goto fail;
+ list_addtail(&alloc->list, &arena->entries);
+ alloc->freelist.prev = NULL;
+ alloc->freelist.next = ptr;
+ alloc->arena = arena;
+ alloc->offset = 0;
+ alloc->size = size;
+
+ if (hole != alloc) {
+ hole->arena = arena;
+ hole->offset = size;
+ hole->size = arena_size - size;
+
+ list_addtail(&hole->list, &arena->entries);
+ add_hole(device, hole);
}
- slab->ptr = (char *)device->ws->buffer_map(slab->bo);
- if (!slab->ptr) {
- device->ws->buffer_destroy(device->ws, slab->bo);
- free(slab);
+ list_addtail(&arena->list, &device->shader_arenas);
+
+ mtx_unlock(&device->shader_arena_mutex);
+ return alloc;
+
+fail:
+ mtx_unlock(&device->shader_arena_mutex);
+ free(alloc);
+ free(hole);
+ if (arena && arena->bo)
+ device->ws->buffer_destroy(device->ws, arena->bo);
+ free(arena);
+ return NULL;
+}
+
+static union radv_shader_arena_block *
+get_hole(struct radv_shader_arena *arena, struct list_head *head)
+{
+ if (head == &arena->entries)
return NULL;
+
+ union radv_shader_arena_block *hole = LIST_ENTRY(union radv_shader_arena_block, head, list);
+ return hole->freelist.prev ? hole : NULL;
+}
+
+static void
+free_shader_memory(struct radv_device *device, union radv_shader_arena_block *alloc)
+{
+ mtx_lock(&device->shader_arena_mutex);
+
+ union radv_shader_arena_block *hole_prev = get_hole(alloc->arena, alloc->list.prev);
+ union radv_shader_arena_block *hole_next = get_hole(alloc->arena, alloc->list.next);
+
+ union radv_shader_arena_block *hole = alloc;
+
+ /* merge with previous hole */
+ if (hole_prev) {
+ remove_hole(device, hole_prev);
+
+ hole_prev->size += hole->size;
+ list_del(&hole->list);
+ free_block_obj(device, hole);
+
+ hole = hole_prev;
+ }
+
+ /* merge with next hole */
+ if (hole_next) {
+ remove_hole(device, hole_next);
+
+ hole_next->offset -= hole->size;
+ hole_next->size += hole->size;
+ list_del(&hole->list);
+ free_block_obj(device, hole);
+
+ hole = hole_next;
}
- list_inithead(&slab->shaders);
+ if (list_is_singular(&hole->list)) {
+ struct radv_shader_arena *arena = hole->arena;
+ free_block_obj(device, hole);
+
+ device->ws->buffer_destroy(device->ws, arena->bo);
+ list_del(&arena->list);
+ free(arena);
+ } else {
+ add_hole(device, hole);
+ }
+
+ mtx_unlock(&device->shader_arena_mutex);
+}
+
+static void *
+radv_alloc_shader_memory(struct radv_device *device, struct radv_shader_variant *shader)
+{
+ shader->alloc = alloc_shader_memory(device, shader->code_size, shader);
+ if (!shader->alloc)
+ return NULL;
+ shader->bo = shader->alloc->arena->bo;
+ return shader->alloc->arena->ptr + shader->alloc->offset;
+}
- mtx_lock(&device->shader_slab_mutex);
- list_add(&slab->slabs, &device->shader_slabs);
+void
+radv_init_shader_arenas(struct radv_device *device)
+{
+ mtx_init(&device->shader_arena_mutex, mtx_plain);
+
+ device->shader_free_list_mask = 0;
- shader->bo = slab->bo;
- shader->bo_offset = 0;
- list_add(&shader->slab_list, &slab->shaders);
- mtx_unlock(&device->shader_slab_mutex);
- return slab->ptr;
+ list_inithead(&device->shader_arenas);
+ list_inithead(&device->shader_block_obj_pool);
+ for (unsigned i = 0; i < RADV_SHADER_ALLOC_NUM_FREE_LISTS; i++)
+ list_inithead(&device->shader_free_lists[i]);
}
void
-radv_destroy_shader_slabs(struct radv_device *device)
+radv_destroy_shader_arenas(struct radv_device *device)
{
- list_for_each_entry_safe(struct radv_shader_slab, slab, &device->shader_slabs, slabs)
+ list_for_each_entry_safe(union radv_shader_arena_block, block, &device->shader_block_obj_pool,
+ pool) free(block);
+
+ list_for_each_entry_safe(struct radv_shader_arena, arena, &device->shader_arenas, list)
{
- device->ws->buffer_destroy(device->ws, slab->bo);
- free(slab);
+ device->ws->buffer_destroy(device->ws, arena->bo);
+ free(arena);
}
- mtx_destroy(&device->shader_slab_mutex);
+ mtx_destroy(&device->shader_arena_mutex);
}
/* For the UMR disassembler. */
if (!p_atomic_dec_zero(&variant->ref_count))
return;
- mtx_lock(&device->shader_slab_mutex);
- list_del(&variant->slab_list);
- mtx_unlock(&device->shader_slab_mutex);
+ free_shader_memory(device, variant->alloc);
free(variant->spirv);
free(variant->nir_string);
uint64_t
radv_shader_variant_get_va(const struct radv_shader_variant *variant)
{
- return radv_buffer_get_va(variant->bo) + variant->bo_offset;
+ return radv_buffer_get_va(variant->bo) + variant->alloc->offset;
}
struct radv_shader_variant *
radv_find_shader_variant(struct radv_device *device, uint64_t pc)
{
- mtx_lock(&device->shader_slab_mutex);
-
- list_for_each_entry(struct radv_shader_slab, slab, &device->shader_slabs, slabs)
+ mtx_lock(&device->shader_arena_mutex);
+ list_for_each_entry(struct radv_shader_arena, arena, &device->shader_arenas, list)
{
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#endif
- list_for_each_entry(struct radv_shader_variant, s, &slab->shaders, slab_list)
+ list_for_each_entry(union radv_shader_arena_block, block, &arena->entries, list)
{
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
- uint64_t offset = align_u64(s->bo_offset + s->code_size, 256);
- uint64_t va = radv_buffer_get_va(s->bo);
-
- if (pc >= va + s->bo_offset && pc < va + offset) {
- mtx_unlock(&device->shader_slab_mutex);
- return s;
+ uint64_t start = radv_buffer_get_va(block->arena->bo) + block->offset;
+ if (!block->freelist.prev && pc >= start && pc < start + block->size) {
+ mtx_unlock(&device->shader_arena_mutex);
+ return (struct radv_shader_variant *)block->freelist.next;
}
}
}
- mtx_unlock(&device->shader_slab_mutex);
+ mtx_unlock(&device->shader_arena_mutex);
return NULL;
}