From 406c87b779d7abf4f23b99beb045f95a31a1e8e0 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Fri, 30 Apr 2021 14:22:15 +0200 Subject: [PATCH] radv: Support address capture and replay. Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_device.c | 42 ++++++++++++++++++++------- src/amd/vulkan/radv_radeon_winsys.h | 1 + src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c | 10 +++++-- 3 files changed, 40 insertions(+), 13 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index a796ca1..6a74cc0 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1202,7 +1202,7 @@ radv_get_physical_device_features_1_2(struct radv_physical_device *pdevice, f->separateDepthStencilLayouts = true; f->hostQueryReset = true; f->timelineSemaphore = true, f->bufferDeviceAddress = true; - f->bufferDeviceAddressCaptureReplay = false; + f->bufferDeviceAddressCaptureReplay = true; f->bufferDeviceAddressMultiDevice = false; f->vulkanMemoryModel = true; f->vulkanMemoryModelDeviceScope = true; @@ -1335,9 +1335,9 @@ radv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: { VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features = (VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *)ext; - features->bufferDeviceAddress = true; - features->bufferDeviceAddressCaptureReplay = false; - features->bufferDeviceAddressMultiDevice = false; + CORE_FEATURE(1, 2, bufferDeviceAddress); + CORE_FEATURE(1, 2, bufferDeviceAddressCaptureReplay); + CORE_FEATURE(1, 2, bufferDeviceAddressMultiDevice); break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES: { @@ -5296,6 +5296,12 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc if (priority_ext) priority_float = priority_ext->priority; + uint64_t replay_address = 0; + const VkMemoryOpaqueCaptureAddressAllocateInfo *replay_info = + vk_find_struct_const(pAllocateInfo->pNext, MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO); + if (replay_info && replay_info->opaqueCaptureAddress) + replay_address = replay_info->opaqueCaptureAddress; + unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1, (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX)); @@ -5369,6 +5375,10 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc } } + const VkMemoryAllocateFlagsInfo *flags_info = vk_find_struct_const(pAllocateInfo->pNext, MEMORY_ALLOCATE_FLAGS_INFO); + if (flags_info && flags_info->flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT) + flags |= RADEON_FLAG_REPLAYABLE; + if (device->overallocation_disallowed) { uint64_t total_size = device->physical_device->memory_properties.memoryHeaps[heap_index].size; @@ -5385,7 +5395,7 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc result = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment, domain, - flags, priority, 0, &mem->bo); + flags, priority, replay_address, &mem->bo); if (result != VK_SUCCESS) { if (device->overallocation_disallowed) { @@ -6385,9 +6395,19 @@ radv_CreateBuffer(VkDevice _device, const VkBufferCreateInfo *pCreateInfo, vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_BUFFER_CREATE_INFO) != NULL; if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) { - VkResult result = - device->ws->buffer_create(device->ws, align64(buffer->size, 4096), 4096, 0, - RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL, 0, &buffer->bo); + enum radeon_bo_flag flags = RADEON_FLAG_VIRTUAL; + if (pCreateInfo->flags & VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT) + flags |= RADEON_FLAG_REPLAYABLE; + + uint64_t replay_address = 0; + const VkBufferOpaqueCaptureAddressCreateInfo *replay_info = + vk_find_struct_const(pCreateInfo->pNext, BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO); + if (replay_info && replay_info->opaqueCaptureAddress) + replay_address = replay_info->opaqueCaptureAddress; + + VkResult result = device->ws->buffer_create(device->ws, align64(buffer->size, 4096), 4096, 0, + flags, RADV_BO_PRIORITY_VIRTUAL, + replay_address, &buffer->bo); if (result != VK_SUCCESS) { radv_destroy_buffer(device, pAllocator, buffer); return vk_error(device->instance, result); @@ -6421,14 +6441,16 @@ radv_GetBufferDeviceAddress(VkDevice device, const VkBufferDeviceAddressInfo *pI uint64_t radv_GetBufferOpaqueCaptureAddress(VkDevice device, const VkBufferDeviceAddressInfo *pInfo) { - return 0; + RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer); + return buffer->bo ? radv_buffer_get_va(buffer->bo) + buffer->offset : 0; } uint64_t radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device, const VkDeviceMemoryOpaqueCaptureAddressInfo *pInfo) { - return 0; + RADV_FROM_HANDLE(radv_device_memory, mem, pInfo->memory); + return radv_buffer_get_va(mem->bo); } static inline unsigned diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h index 2a50556..9f12b60 100644 --- a/src/amd/vulkan/radv_radeon_winsys.h +++ b/src/amd/vulkan/radv_radeon_winsys.h @@ -63,6 +63,7 @@ enum radeon_bo_flag { /* bitfield */ RADEON_FLAG_32BIT = (1 << 8), RADEON_FLAG_PREFER_LOCAL_BO = (1 << 9), RADEON_FLAG_ZERO_VRAM = (1 << 10), + RADEON_FLAG_REPLAYABLE = (1 << 11), }; enum radeon_ctx_priority { diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c index baec906..3bea2f3 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c @@ -420,9 +420,13 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned if (size >= ws->info.pte_fragment_size) virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size); - r = amdgpu_va_range_alloc( - ws->dev, amdgpu_gpu_va_range_general, size, virt_alignment, replay_address, &va, &va_handle, - (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) | AMDGPU_VA_RANGE_HIGH); + assert(!replay_address || (flags & RADEON_FLAG_REPLAYABLE)); + + const uint64_t va_flags = AMDGPU_VA_RANGE_HIGH | + (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) | + (flags & RADEON_FLAG_REPLAYABLE ? AMDGPU_VA_RANGE_REPLAYABLE : 0); + r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, size, virt_alignment, replay_address, + &va, &va_handle, va_flags); if (r) { result = replay_address ? VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS : VK_ERROR_OUT_OF_DEVICE_MEMORY; -- 2.7.4