From 08d162f0b57b5bb68b5a239ef2b3d3456752e0f2 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 23 Mar 2021 14:43:28 +0000 Subject: [PATCH] radv: expose 2/3rd of total memory as VRAM and 1/3rd as GTT on APUs A bunch of games complain when the VRAM size is too small. The most compatible solution seems to return memory heaps like a dGPU. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3423 Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Part-of: --- src/amd/vulkan/radv_device.c | 116 +++++++++++++++++++++++++++++++------------ 1 file changed, 84 insertions(+), 32 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 2fdd64d..158cf49 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -166,10 +166,23 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device) { uint64_t visible_vram_size = radv_get_visible_vram_size(device); uint64_t vram_size = radv_get_vram_size(device); + uint64_t gtt_size = device->rad_info.gart_size; int vram_index = -1, visible_vram_index = -1, gart_index = -1; + device->memory_properties.memoryHeapCount = 0; device->heaps = 0; + if (!device->rad_info.has_dedicated_vram) { + /* On APUs, the carveout is usually too small for games that request a minimum VRAM size + * greater than it. To workaround this, we compute the total available memory size (GTT + + * visible VRAM size) and report 2/3 as VRAM and 1/3 as GTT. + */ + const uint64_t total_size = gtt_size + visible_vram_size; + visible_vram_size = align64((total_size * 2) / 3, device->rad_info.gart_page_size); + gtt_size = total_size - visible_vram_size; + vram_size = 0; + } + /* Only get a VRAM heap if it is significant, not if it is a 16 MiB * remainder above visible VRAM. */ if (vram_size > 0 && vram_size * 9 >= visible_vram_size) { @@ -181,11 +194,11 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device) }; } - if (device->rad_info.gart_size > 0) { + if (gtt_size > 0) { gart_index = device->memory_properties.memoryHeapCount++; device->heaps |= RADV_HEAP_GTT; device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap){ - .size = device->rad_info.gart_size, + .size = gtt_size, .flags = 0, }; } @@ -2402,39 +2415,78 @@ radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice, * Note that the application heap usages are not really accurate (eg. * in presence of shared buffers). */ - unsigned mask = device->heaps; - unsigned heap = 0; - while (mask) { - uint64_t internal_usage = 0, system_usage = 0; - unsigned type = 1u << u_bit_scan(&mask); - - switch (type) { - case RADV_HEAP_VRAM: - internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM); - system_usage = device->ws->query_value(device->ws, RADEON_VRAM_USAGE); - break; - case RADV_HEAP_VRAM_VIS: - internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS); - if (!(device->heaps & RADV_HEAP_VRAM)) - internal_usage += device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM); - system_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE); - break; - case RADV_HEAP_GTT: - internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT); - system_usage = device->ws->query_value(device->ws, RADEON_GTT_USAGE); - break; - } + if (!device->rad_info.has_dedicated_vram) { + /* On APUs, the driver exposes fake heaps to the application because usually the carveout is + * too small for games but the budgets need to be redistributed accordingly. + */ - uint64_t total_usage = MAX2(internal_usage, system_usage); + /* Get the visible VRAM/GTT heap sizes and internal usages. */ + uint64_t vram_vis_heap_size = device->memory_properties.memoryHeaps[RADV_HEAP_VRAM_VIS].size; + uint64_t gtt_heap_size = device->memory_properties.memoryHeaps[RADV_HEAP_GTT].size; + uint64_t vram_vis_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS) + + device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM); + uint64_t gtt_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT); - uint64_t free_space = device->memory_properties.memoryHeaps[heap].size - - MIN2(device->memory_properties.memoryHeaps[heap].size, total_usage); - memoryBudget->heapBudget[heap] = free_space + internal_usage; - memoryBudget->heapUsage[heap] = internal_usage; - ++heap; - } + /* Compute the total heap size, internal and system usage. */ + uint64_t total_heap_size = vram_vis_heap_size + gtt_heap_size; + uint64_t total_internal_usage = vram_vis_internal_usage + gtt_internal_usage; + uint64_t total_system_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) + + device->ws->query_value(device->ws, RADEON_GTT_USAGE); + + uint64_t total_usage = MAX2(total_internal_usage, total_system_usage); + + /* Compute the total free space that can be allocated for this process accross all heaps. */ + uint64_t total_free_space = total_heap_size - MIN2(total_heap_size, total_usage); - assert(heap == memory_properties->memoryHeapCount); + /* Compute the remaining visible VRAM size for this process. */ + uint64_t vram_vis_free_space = vram_vis_heap_size - MIN2(vram_vis_heap_size, vram_vis_internal_usage); + + /* Distribute the total free space (2/3rd as VRAM and 1/3rd as GTT) to match the heap sizes, + * and align down to the page size to be conservative. + */ + vram_vis_free_space = ROUND_DOWN_TO(MIN2((total_free_space * 2) / 3, vram_vis_free_space), + device->rad_info.gart_page_size); + uint64_t gtt_free_space = total_free_space - vram_vis_free_space; + + memoryBudget->heapBudget[RADV_HEAP_VRAM_VIS] = vram_vis_free_space + vram_vis_internal_usage; + memoryBudget->heapUsage[RADV_HEAP_VRAM_VIS] = vram_vis_internal_usage; + memoryBudget->heapBudget[RADV_HEAP_GTT] = gtt_free_space + gtt_internal_usage; + memoryBudget->heapUsage[RADV_HEAP_GTT] = gtt_internal_usage; + } else { + unsigned mask = device->heaps; + unsigned heap = 0; + while (mask) { + uint64_t internal_usage = 0, system_usage = 0; + unsigned type = 1u << u_bit_scan(&mask); + + switch (type) { + case RADV_HEAP_VRAM: + internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM); + system_usage = device->ws->query_value(device->ws, RADEON_VRAM_USAGE); + break; + case RADV_HEAP_VRAM_VIS: + internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS); + if (!(device->heaps & RADV_HEAP_VRAM)) + internal_usage += device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM); + system_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE); + break; + case RADV_HEAP_GTT: + internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT); + system_usage = device->ws->query_value(device->ws, RADEON_GTT_USAGE); + break; + } + + uint64_t total_usage = MAX2(internal_usage, system_usage); + + uint64_t free_space = device->memory_properties.memoryHeaps[heap].size - + MIN2(device->memory_properties.memoryHeaps[heap].size, total_usage); + memoryBudget->heapBudget[heap] = free_space + internal_usage; + memoryBudget->heapUsage[heap] = internal_usage; + ++heap; + } + + assert(heap == memory_properties->memoryHeapCount); + } /* The heapBudget and heapUsage values must be zero for array elements * greater than or equal to -- 2.7.4