From fbd32a04daf84f7ed931a4a2510f2181083753cf Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 18 Jan 2023 09:01:15 -0700 Subject: [PATCH] anv: add a third memory type for LLC configuration Commit 582bf4d9 turned on write-combining for most (all?) memory allocations. This caused a fairly large performance drop in some of our VMware tests (application traces, such as Windows Metro Paint). This patch adds a third memory type configuration: DEVICE_LOCAL, HOST_VISIBLE, HOST_COHERENT. This is uncached. Then, in anv_AllocateMemory() we only use write-combining for this uncached type. This memory type is found in the Intel Windows Vulkan driver. And according to https://asawicki.info/news_1740_vulkan_memory_types_on_pc_and_how_to_use_them uncached memory correlates to write-combined memory. This fixes our performance regression (and actually produced the fastest ever results for our test suite). Signed-off-by: Brian Paul Part-of: --- src/intel/vulkan/anv_device.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 837f683..fad34e0 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -514,19 +514,28 @@ anv_physical_device_init_heaps(struct anv_physical_device *device, int fd) * But some game engines can't handle single type well * https://gitlab.freedesktop.org/mesa/mesa/-/issues/7360#note_1719438 * - * And Intel on Windows uses 3 types so it's better to add extra one here + * The second memory type w/out HOST_CACHED_BIT will get write-combining. + * See anv_AllocateMemory()). + * + * The Intel Vulkan driver for Windows also advertises these memory types. */ - device->memory.type_count = 2; + device->memory.type_count = 3; device->memory.types[0] = (struct anv_memory_type) { - .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - .heapIndex = 0, + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + .heapIndex = 0, }; device->memory.types[1] = (struct anv_memory_type) { - .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT, - .heapIndex = 0, + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + .heapIndex = 0, + }; + device->memory.types[2] = (struct anv_memory_type) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + .heapIndex = 0, }; } else { device->memory.heap_count = 1; @@ -3819,7 +3828,7 @@ VkResult anv_AllocateMemory( return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count); - struct anv_memory_type *mem_type = + const struct anv_memory_type *mem_type = &pdevice->memory.types[pAllocateInfo->memoryTypeIndex]; assert(mem_type->heapIndex < pdevice->memory.heap_count); struct anv_memory_heap *mem_heap = @@ -3914,9 +3923,10 @@ VkResult anv_AllocateMemory( alloc_flags |= ANV_BO_ALLOC_NO_LOCAL_MEM; /* If the allocated buffer might end up in local memory and it's host - * visible, make CPU writes are combined, it should be faster. + * visible and uncached, enable CPU write-combining. It should be faster. */ if (!(alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM) && + (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) == 0 && (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) alloc_flags |= ANV_BO_ALLOC_WRITE_COMBINE; -- 2.7.4