radv/winsys: Add support for a fixed VA address for replay.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Mon, 23 Nov 2020 02:13:18 +0000 (03:13 +0100)
committerMarge Bot <eric+marge@anholt.net>
Mon, 5 Jul 2021 17:27:51 +0000 (17:27 +0000)
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10570>

13 files changed:
src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_debug.c
src/amd/vulkan/radv_descriptor_set.c
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_image.c
src/amd/vulkan/radv_query.c
src/amd/vulkan/radv_radeon_winsys.h
src/amd/vulkan/radv_shader.c
src/amd/vulkan/radv_sqtt.c
src/amd/vulkan/si_cmd_buffer.c
src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
src/amd/vulkan/winsys/null/radv_null_bo.c

index 11818b8..20e51f2 100644 (file)
@@ -522,7 +522,7 @@ radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer, uint64_t m
       device->ws->buffer_create(device->ws, new_size, 4096, device->ws->cs_domain(device->ws),
                                 RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
                                    RADEON_FLAG_32BIT | RADEON_FLAG_GTT_WC,
-                                RADV_BO_PRIORITY_UPLOAD_BUFFER, &bo);
+                                RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &bo);
 
    if (result != VK_SUCCESS) {
       cmd_buffer->record_result = result;
index d1c2ef0..160c5e9 100644 (file)
@@ -70,7 +70,7 @@ radv_init_trace(struct radv_device *device)
    result = ws->buffer_create(
       ws, TRACE_BO_SIZE, 8, RADEON_DOMAIN_VRAM,
       RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
-      RADV_BO_PRIORITY_UPLOAD_BUFFER, &device->trace_bo);
+      RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->trace_bo);
    if (result != VK_SUCCESS)
       return false;
 
@@ -839,7 +839,7 @@ radv_trap_handler_init(struct radv_device *device)
    result = ws->buffer_create(ws, TMA_BO_SIZE, 256, RADEON_DOMAIN_VRAM,
                               RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
                                  RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT,
-                              RADV_BO_PRIORITY_SCRATCH, &device->tma_bo);
+                              RADV_BO_PRIORITY_SCRATCH, 0, &device->tma_bo);
    if (result != VK_SUCCESS)
       return false;
 
index efe58fc..1ea9ce6 100644 (file)
@@ -804,7 +804,7 @@ radv_CreateDescriptorPool(VkDevice _device, const VkDescriptorPoolCreateInfo *pC
          VkResult result = device->ws->buffer_create(
             device->ws, bo_size, 32, RADEON_DOMAIN_VRAM,
             RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT,
-            RADV_BO_PRIORITY_DESCRIPTOR, &pool->bo);
+            RADV_BO_PRIORITY_DESCRIPTOR, 0, &pool->bo);
          if (result != VK_SUCCESS) {
             radv_destroy_descriptor_pool(device, pAllocator, pool);
             return vk_error(device->instance, result);
index a6b2f0e..a796ca1 100644 (file)
@@ -2761,7 +2761,7 @@ radv_device_init_border_color(struct radv_device *device)
    result = device->ws->buffer_create(
       device->ws, RADV_BORDER_COLOR_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM,
       RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_NO_INTERPROCESS_SHARING,
-      RADV_BO_PRIORITY_SHADER, &device->border_color_data.bo);
+      RADV_BO_PRIORITY_SHADER, 0, &device->border_color_data.bo);
 
    if (result != VK_SUCCESS)
       return vk_error(device->physical_device->instance, result);
@@ -3824,7 +3824,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave,
    if (scratch_size > queue_scratch_size) {
       result =
          queue->device->ws->buffer_create(queue->device->ws, scratch_size, 4096, RADEON_DOMAIN_VRAM,
-                                          ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, &scratch_bo);
+                                          ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &scratch_bo);
       if (result != VK_SUCCESS)
          goto fail;
    } else
@@ -3836,7 +3836,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave,
    if (compute_scratch_size > compute_queue_scratch_size) {
       result = queue->device->ws->buffer_create(queue->device->ws, compute_scratch_size, 4096,
                                                 RADEON_DOMAIN_VRAM, ring_bo_flags,
-                                                RADV_BO_PRIORITY_SCRATCH, &compute_scratch_bo);
+                                                RADV_BO_PRIORITY_SCRATCH, 0, &compute_scratch_bo);
       if (result != VK_SUCCESS)
          goto fail;
 
@@ -3846,7 +3846,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave,
    if (esgs_ring_size > queue->esgs_ring_size) {
       result = queue->device->ws->buffer_create(queue->device->ws, esgs_ring_size, 4096,
                                                 RADEON_DOMAIN_VRAM, ring_bo_flags,
-                                                RADV_BO_PRIORITY_SCRATCH, &esgs_ring_bo);
+                                                RADV_BO_PRIORITY_SCRATCH, 0, &esgs_ring_bo);
       if (result != VK_SUCCESS)
          goto fail;
    } else {
@@ -3857,7 +3857,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave,
    if (gsvs_ring_size > queue->gsvs_ring_size) {
       result = queue->device->ws->buffer_create(queue->device->ws, gsvs_ring_size, 4096,
                                                 RADEON_DOMAIN_VRAM, ring_bo_flags,
-                                                RADV_BO_PRIORITY_SCRATCH, &gsvs_ring_bo);
+                                                RADV_BO_PRIORITY_SCRATCH, 0, &gsvs_ring_bo);
       if (result != VK_SUCCESS)
          goto fail;
    } else {
@@ -3868,7 +3868,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave,
    if (add_tess_rings) {
       result = queue->device->ws->buffer_create(
          queue->device->ws, tess_offchip_ring_offset + tess_offchip_ring_size, 256,
-         RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, &tess_rings_bo);
+         RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &tess_rings_bo);
       if (result != VK_SUCCESS)
          goto fail;
    } else {
@@ -3881,8 +3881,9 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave,
       /* 4 streamout GDS counters.
        * We need 256B (64 dw) of GDS, otherwise streamout hangs.
        */
-      result = queue->device->ws->buffer_create(queue->device->ws, 256, 4, RADEON_DOMAIN_GDS,
-                                                ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, &gds_bo);
+      result =
+         queue->device->ws->buffer_create(queue->device->ws, 256, 4, RADEON_DOMAIN_GDS,
+                                          ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &gds_bo);
       if (result != VK_SUCCESS)
          goto fail;
    } else {
@@ -3894,7 +3895,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave,
 
       result =
          queue->device->ws->buffer_create(queue->device->ws, 4, 1, RADEON_DOMAIN_OA, ring_bo_flags,
-                                          RADV_BO_PRIORITY_SCRATCH, &gds_oa_bo);
+                                          RADV_BO_PRIORITY_SCRATCH, 0, &gds_oa_bo);
       if (result != VK_SUCCESS)
          goto fail;
    } else {
@@ -3915,7 +3916,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave,
       result = queue->device->ws->buffer_create(
          queue->device->ws, size, 4096, RADEON_DOMAIN_VRAM,
          RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY,
-         RADV_BO_PRIORITY_DESCRIPTOR, &descriptor_bo);
+         RADV_BO_PRIORITY_DESCRIPTOR, 0, &descriptor_bo);
       if (result != VK_SUCCESS)
          goto fail;
    } else
@@ -5384,7 +5385,7 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc
 
       result = device->ws->buffer_create(device->ws, alloc_size,
                                          device->physical_device->rad_info.max_alignment, domain,
-                                         flags, priority, &mem->bo);
+                                         flags, priority, 0, &mem->bo);
 
       if (result != VK_SUCCESS) {
          if (device->overallocation_disallowed) {
@@ -6283,7 +6284,7 @@ radv_CreateEvent(VkDevice _device, const VkEventCreateInfo *pCreateInfo,
    VkResult result = device->ws->buffer_create(
       device->ws, 8, 8, RADEON_DOMAIN_GTT,
       RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
-      RADV_BO_PRIORITY_FENCE, &event->bo);
+      RADV_BO_PRIORITY_FENCE, 0, &event->bo);
    if (result != VK_SUCCESS) {
       radv_destroy_event(device, pAllocator, event);
       return vk_error(device->instance, result);
@@ -6386,7 +6387,7 @@ radv_CreateBuffer(VkDevice _device, const VkBufferCreateInfo *pCreateInfo,
    if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
       VkResult result =
          device->ws->buffer_create(device->ws, align64(buffer->size, 4096), 4096, 0,
-                                   RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL, &buffer->bo);
+                                   RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL, 0, &buffer->bo);
       if (result != VK_SUCCESS) {
          radv_destroy_buffer(device, pAllocator, buffer);
          return vk_error(device->instance, result);
index 6f88bcf..75cbb73 100644 (file)
@@ -1700,8 +1700,9 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_
       image->size = align64(image->size, image->alignment);
       image->offset = 0;
 
-      result = device->ws->buffer_create(device->ws, image->size, image->alignment, 0,
-                                         RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL, &image->bo);
+      result =
+         device->ws->buffer_create(device->ws, image->size, image->alignment, 0,
+                                   RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL, 0, &image->bo);
       if (result != VK_SUCCESS) {
          radv_destroy_image(device, alloc, image);
          return vk_error(device->instance, result);
index 26d3a54..b0b8453 100644 (file)
@@ -978,7 +978,7 @@ radv_CreateQueryPool(VkDevice _device, const VkQueryPoolCreateInfo *pCreateInfo,
 
    VkResult result = device->ws->buffer_create(device->ws, pool->size, 64, RADEON_DOMAIN_GTT,
                                                RADEON_FLAG_NO_INTERPROCESS_SHARING,
-                                               RADV_BO_PRIORITY_QUERY_POOL, &pool->bo);
+                                               RADV_BO_PRIORITY_QUERY_POOL, 0, &pool->bo);
    if (result != VK_SUCCESS) {
       radv_destroy_query_pool(device, pAllocator, pool);
       return vk_error(device->instance, result);
index b0771c7..2a50556 100644 (file)
@@ -224,7 +224,7 @@ struct radeon_winsys {
 
    VkResult (*buffer_create)(struct radeon_winsys *ws, uint64_t size, unsigned alignment,
                              enum radeon_bo_domain domain, enum radeon_bo_flag flags,
-                             unsigned priority, struct radeon_winsys_bo **out_bo);
+                             unsigned priority, uint64_t address, struct radeon_winsys_bo **out_bo);
 
    void (*buffer_destroy)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo);
    void *(*buffer_map)(struct radeon_winsys_bo *bo);
index f848abf..eda2cdf 100644 (file)
@@ -923,7 +923,7 @@ radv_alloc_shader_memory(struct radv_device *device, struct radv_shader_variant
       RADEON_FLAG_NO_INTERPROCESS_SHARING |
          (device->physical_device->rad_info.cpdma_prefetch_writes_memory ? 0
                                                                          : RADEON_FLAG_READ_ONLY),
-      RADV_BO_PRIORITY_SHADER, &slab->bo);
+      RADV_BO_PRIORITY_SHADER, 0, &slab->bo);
    if (result != VK_SUCCESS) {
       free(slab);
       return NULL;
index 6f16261..c23ca4b 100644 (file)
@@ -385,7 +385,7 @@ radv_thread_trace_init_bo(struct radv_device *device)
    VkResult result = ws->buffer_create(
       ws, size, 4096, RADEON_DOMAIN_VRAM,
       RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
-      RADV_BO_PRIORITY_SCRATCH, &bo);
+      RADV_BO_PRIORITY_SCRATCH, 0, &bo);
    device->thread_trace.bo = bo;
    if (result != VK_SUCCESS)
       return false;
index ecda58d..433ab50 100644 (file)
@@ -632,7 +632,7 @@ cik_create_gfx_config(struct radv_device *device)
       device->ws->buffer_create(device->ws, cs->cdw * 4, 4096, device->ws->cs_domain(device->ws),
                                 RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
                                    RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC,
-                                RADV_BO_PRIORITY_CS, &device->gfx_init);
+                                RADV_BO_PRIORITY_CS, 0, &device->gfx_init);
    if (result != VK_SUCCESS)
       goto fail;
 
index dcaada3..baec906 100644 (file)
@@ -394,7 +394,8 @@ radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo
 static VkResult
 radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned alignment,
                              enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags,
-                             unsigned priority, struct radeon_winsys_bo **out_bo)
+                             unsigned priority, uint64_t replay_address,
+                             struct radeon_winsys_bo **out_bo)
 {
    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
    struct radv_amdgpu_winsys_bo *bo;
@@ -420,10 +421,11 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned
       virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size);
 
    r = amdgpu_va_range_alloc(
-      ws->dev, amdgpu_gpu_va_range_general, size, virt_alignment, 0, &va, &va_handle,
+      ws->dev, amdgpu_gpu_va_range_general, size, virt_alignment, replay_address, &va, &va_handle,
       (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) | AMDGPU_VA_RANGE_HIGH);
    if (r) {
-      result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+      result =
+         replay_address ? VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS : VK_ERROR_OUT_OF_DEVICE_MEMORY;
       goto error_va_alloc;
    }
 
index dc20675..e26a589 100644 (file)
@@ -213,7 +213,7 @@ radv_amdgpu_cs_create(struct radeon_winsys *ws, enum ring_type ring_type)
          ws->buffer_create(ws, ib_size, 0, radv_amdgpu_cs_domain(ws),
                            RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
                               RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC,
-                           RADV_BO_PRIORITY_CS, &cs->ib_buffer);
+                           RADV_BO_PRIORITY_CS, 0, &cs->ib_buffer);
       if (result != VK_SUCCESS) {
          free(cs);
          return NULL;
@@ -334,7 +334,7 @@ radv_amdgpu_cs_grow(struct radeon_cmdbuf *_cs, size_t min_size)
       cs->ws->base.buffer_create(&cs->ws->base, ib_size, 0, radv_amdgpu_cs_domain(&cs->ws->base),
                                  RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
                                     RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC,
-                                 RADV_BO_PRIORITY_CS, &cs->ib_buffer);
+                                 RADV_BO_PRIORITY_CS, 0, &cs->ib_buffer);
 
    if (result != VK_SUCCESS) {
       cs->base.cdw = 0;
@@ -1036,7 +1036,7 @@ radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, int queue_id
             ws->buffer_create(
                ws, 4 * size, 4096, radv_amdgpu_cs_domain(ws),
                RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY,
-               RADV_BO_PRIORITY_CS, &bos[j]);
+               RADV_BO_PRIORITY_CS, 0, &bos[j]);
             ptr = ws->buffer_map(bos[j]);
 
             if (needs_preamble) {
@@ -1079,7 +1079,7 @@ radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, int queue_id
          ws->buffer_create(
             ws, 4 * size, 4096, radv_amdgpu_cs_domain(ws),
             RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY,
-            RADV_BO_PRIORITY_CS, &bos[0]);
+            RADV_BO_PRIORITY_CS, 0, &bos[0]);
          ptr = ws->buffer_map(bos[0]);
 
          if (preamble_cs) {
@@ -1264,7 +1264,7 @@ radv_amdgpu_ctx_create(struct radeon_winsys *_ws, enum radeon_ctx_priority prior
    assert(AMDGPU_HW_IP_NUM * MAX_RINGS_PER_TYPE * sizeof(uint64_t) <= 4096);
    result = ws->base.buffer_create(&ws->base, 4096, 8, RADEON_DOMAIN_GTT,
                                    RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
-                                   RADV_BO_PRIORITY_CS, &ctx->fence_bo);
+                                   RADV_BO_PRIORITY_CS, 0, &ctx->fence_bo);
    if (result != VK_SUCCESS) {
       goto fail_alloc;
    }
index f177579..496b496 100644 (file)
@@ -31,7 +31,7 @@
 static VkResult
 radv_null_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned alignment,
                            enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags,
-                           unsigned priority, struct radeon_winsys_bo **out_bo)
+                           unsigned priority, uint64_t address, struct radeon_winsys_bo **out_bo)
 {
    struct radv_null_winsys_bo *bo;