From: Bas Nieuwenhuizen Date: Mon, 23 Nov 2020 02:13:18 +0000 (+0100) Subject: radv/winsys: Add support for a fixed VA address for replay. X-Git-Tag: upstream/21.2.3~957 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f9cc94af7b77e648837a388567b95cdc302398ce;p=platform%2Fupstream%2Fmesa.git radv/winsys: Add support for a fixed VA address for replay. Reviewed-by: Samuel Pitoiset Part-of: --- diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 11818b8..20e51f2 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -522,7 +522,7 @@ radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer, uint64_t m device->ws->buffer_create(device->ws, new_size, 4096, device->ws->cs_domain(device->ws), RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_32BIT | RADEON_FLAG_GTT_WC, - RADV_BO_PRIORITY_UPLOAD_BUFFER, &bo); + RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &bo); if (result != VK_SUCCESS) { cmd_buffer->record_result = result; diff --git a/src/amd/vulkan/radv_debug.c b/src/amd/vulkan/radv_debug.c index d1c2ef0..160c5e9 100644 --- a/src/amd/vulkan/radv_debug.c +++ b/src/amd/vulkan/radv_debug.c @@ -70,7 +70,7 @@ radv_init_trace(struct radv_device *device) result = ws->buffer_create( ws, TRACE_BO_SIZE, 8, RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM, - RADV_BO_PRIORITY_UPLOAD_BUFFER, &device->trace_bo); + RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->trace_bo); if (result != VK_SUCCESS) return false; @@ -839,7 +839,7 @@ radv_trap_handler_init(struct radv_device *device) result = ws->buffer_create(ws, TMA_BO_SIZE, 256, RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT, - RADV_BO_PRIORITY_SCRATCH, &device->tma_bo); + RADV_BO_PRIORITY_SCRATCH, 0, &device->tma_bo); if (result != VK_SUCCESS) return false; diff --git a/src/amd/vulkan/radv_descriptor_set.c b/src/amd/vulkan/radv_descriptor_set.c index efe58fc..1ea9ce6 100644 --- a/src/amd/vulkan/radv_descriptor_set.c +++ b/src/amd/vulkan/radv_descriptor_set.c @@ -804,7 +804,7 @@ radv_CreateDescriptorPool(VkDevice _device, const VkDescriptorPoolCreateInfo *pC VkResult result = device->ws->buffer_create( device->ws, bo_size, 32, RADEON_DOMAIN_VRAM, RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT, - RADV_BO_PRIORITY_DESCRIPTOR, &pool->bo); + RADV_BO_PRIORITY_DESCRIPTOR, 0, &pool->bo); if (result != VK_SUCCESS) { radv_destroy_descriptor_pool(device, pAllocator, pool); return vk_error(device->instance, result); diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index a6b2f0e..a796ca1 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -2761,7 +2761,7 @@ radv_device_init_border_color(struct radv_device *device) result = device->ws->buffer_create( device->ws, RADV_BORDER_COLOR_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_NO_INTERPROCESS_SHARING, - RADV_BO_PRIORITY_SHADER, &device->border_color_data.bo); + RADV_BO_PRIORITY_SHADER, 0, &device->border_color_data.bo); if (result != VK_SUCCESS) return vk_error(device->physical_device->instance, result); @@ -3824,7 +3824,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave, if (scratch_size > queue_scratch_size) { result = queue->device->ws->buffer_create(queue->device->ws, scratch_size, 4096, RADEON_DOMAIN_VRAM, - ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, &scratch_bo); + ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &scratch_bo); if (result != VK_SUCCESS) goto fail; } else @@ -3836,7 +3836,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave, if (compute_scratch_size > compute_queue_scratch_size) { result = queue->device->ws->buffer_create(queue->device->ws, compute_scratch_size, 4096, RADEON_DOMAIN_VRAM, ring_bo_flags, - RADV_BO_PRIORITY_SCRATCH, &compute_scratch_bo); + RADV_BO_PRIORITY_SCRATCH, 0, &compute_scratch_bo); if (result != VK_SUCCESS) goto fail; @@ -3846,7 +3846,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave, if (esgs_ring_size > queue->esgs_ring_size) { result = queue->device->ws->buffer_create(queue->device->ws, esgs_ring_size, 4096, RADEON_DOMAIN_VRAM, ring_bo_flags, - RADV_BO_PRIORITY_SCRATCH, &esgs_ring_bo); + RADV_BO_PRIORITY_SCRATCH, 0, &esgs_ring_bo); if (result != VK_SUCCESS) goto fail; } else { @@ -3857,7 +3857,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave, if (gsvs_ring_size > queue->gsvs_ring_size) { result = queue->device->ws->buffer_create(queue->device->ws, gsvs_ring_size, 4096, RADEON_DOMAIN_VRAM, ring_bo_flags, - RADV_BO_PRIORITY_SCRATCH, &gsvs_ring_bo); + RADV_BO_PRIORITY_SCRATCH, 0, &gsvs_ring_bo); if (result != VK_SUCCESS) goto fail; } else { @@ -3868,7 +3868,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave, if (add_tess_rings) { result = queue->device->ws->buffer_create( queue->device->ws, tess_offchip_ring_offset + tess_offchip_ring_size, 256, - RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, &tess_rings_bo); + RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &tess_rings_bo); if (result != VK_SUCCESS) goto fail; } else { @@ -3881,8 +3881,9 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave, /* 4 streamout GDS counters. * We need 256B (64 dw) of GDS, otherwise streamout hangs. */ - result = queue->device->ws->buffer_create(queue->device->ws, 256, 4, RADEON_DOMAIN_GDS, - ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, &gds_bo); + result = + queue->device->ws->buffer_create(queue->device->ws, 256, 4, RADEON_DOMAIN_GDS, + ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &gds_bo); if (result != VK_SUCCESS) goto fail; } else { @@ -3894,7 +3895,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave, result = queue->device->ws->buffer_create(queue->device->ws, 4, 1, RADEON_DOMAIN_OA, ring_bo_flags, - RADV_BO_PRIORITY_SCRATCH, &gds_oa_bo); + RADV_BO_PRIORITY_SCRATCH, 0, &gds_oa_bo); if (result != VK_SUCCESS) goto fail; } else { @@ -3915,7 +3916,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave, result = queue->device->ws->buffer_create( queue->device->ws, size, 4096, RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY, - RADV_BO_PRIORITY_DESCRIPTOR, &descriptor_bo); + RADV_BO_PRIORITY_DESCRIPTOR, 0, &descriptor_bo); if (result != VK_SUCCESS) goto fail; } else @@ -5384,7 +5385,7 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc result = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment, domain, - flags, priority, &mem->bo); + flags, priority, 0, &mem->bo); if (result != VK_SUCCESS) { if (device->overallocation_disallowed) { @@ -6283,7 +6284,7 @@ radv_CreateEvent(VkDevice _device, const VkEventCreateInfo *pCreateInfo, VkResult result = device->ws->buffer_create( device->ws, 8, 8, RADEON_DOMAIN_GTT, RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING, - RADV_BO_PRIORITY_FENCE, &event->bo); + RADV_BO_PRIORITY_FENCE, 0, &event->bo); if (result != VK_SUCCESS) { radv_destroy_event(device, pAllocator, event); return vk_error(device->instance, result); @@ -6386,7 +6387,7 @@ radv_CreateBuffer(VkDevice _device, const VkBufferCreateInfo *pCreateInfo, if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) { VkResult result = device->ws->buffer_create(device->ws, align64(buffer->size, 4096), 4096, 0, - RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL, &buffer->bo); + RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL, 0, &buffer->bo); if (result != VK_SUCCESS) { radv_destroy_buffer(device, pAllocator, buffer); return vk_error(device->instance, result); diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index 6f88bcf..75cbb73 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -1700,8 +1700,9 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_ image->size = align64(image->size, image->alignment); image->offset = 0; - result = device->ws->buffer_create(device->ws, image->size, image->alignment, 0, - RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL, &image->bo); + result = + device->ws->buffer_create(device->ws, image->size, image->alignment, 0, + RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL, 0, &image->bo); if (result != VK_SUCCESS) { radv_destroy_image(device, alloc, image); return vk_error(device->instance, result); diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index 26d3a54..b0b8453 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -978,7 +978,7 @@ radv_CreateQueryPool(VkDevice _device, const VkQueryPoolCreateInfo *pCreateInfo, VkResult result = device->ws->buffer_create(device->ws, pool->size, 64, RADEON_DOMAIN_GTT, RADEON_FLAG_NO_INTERPROCESS_SHARING, - RADV_BO_PRIORITY_QUERY_POOL, &pool->bo); + RADV_BO_PRIORITY_QUERY_POOL, 0, &pool->bo); if (result != VK_SUCCESS) { radv_destroy_query_pool(device, pAllocator, pool); return vk_error(device->instance, result); diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h index b0771c7..2a50556 100644 --- a/src/amd/vulkan/radv_radeon_winsys.h +++ b/src/amd/vulkan/radv_radeon_winsys.h @@ -224,7 +224,7 @@ struct radeon_winsys { VkResult (*buffer_create)(struct radeon_winsys *ws, uint64_t size, unsigned alignment, enum radeon_bo_domain domain, enum radeon_bo_flag flags, - unsigned priority, struct radeon_winsys_bo **out_bo); + unsigned priority, uint64_t address, struct radeon_winsys_bo **out_bo); void (*buffer_destroy)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo); void *(*buffer_map)(struct radeon_winsys_bo *bo); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index f848abf..eda2cdf 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -923,7 +923,7 @@ radv_alloc_shader_memory(struct radv_device *device, struct radv_shader_variant RADEON_FLAG_NO_INTERPROCESS_SHARING | (device->physical_device->rad_info.cpdma_prefetch_writes_memory ? 0 : RADEON_FLAG_READ_ONLY), - RADV_BO_PRIORITY_SHADER, &slab->bo); + RADV_BO_PRIORITY_SHADER, 0, &slab->bo); if (result != VK_SUCCESS) { free(slab); return NULL; diff --git a/src/amd/vulkan/radv_sqtt.c b/src/amd/vulkan/radv_sqtt.c index 6f16261..c23ca4b 100644 --- a/src/amd/vulkan/radv_sqtt.c +++ b/src/amd/vulkan/radv_sqtt.c @@ -385,7 +385,7 @@ radv_thread_trace_init_bo(struct radv_device *device) VkResult result = ws->buffer_create( ws, size, 4096, RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM, - RADV_BO_PRIORITY_SCRATCH, &bo); + RADV_BO_PRIORITY_SCRATCH, 0, &bo); device->thread_trace.bo = bo; if (result != VK_SUCCESS) return false; diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index ecda58d..433ab50 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -632,7 +632,7 @@ cik_create_gfx_config(struct radv_device *device) device->ws->buffer_create(device->ws, cs->cdw * 4, 4096, device->ws->cs_domain(device->ws), RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC, - RADV_BO_PRIORITY_CS, &device->gfx_init); + RADV_BO_PRIORITY_CS, 0, &device->gfx_init); if (result != VK_SUCCESS) goto fail; diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c index dcaada3..baec906 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c @@ -394,7 +394,8 @@ radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo static VkResult radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned alignment, enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags, - unsigned priority, struct radeon_winsys_bo **out_bo) + unsigned priority, uint64_t replay_address, + struct radeon_winsys_bo **out_bo) { struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); struct radv_amdgpu_winsys_bo *bo; @@ -420,10 +421,11 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size); r = amdgpu_va_range_alloc( - ws->dev, amdgpu_gpu_va_range_general, size, virt_alignment, 0, &va, &va_handle, + ws->dev, amdgpu_gpu_va_range_general, size, virt_alignment, replay_address, &va, &va_handle, (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) | AMDGPU_VA_RANGE_HIGH); if (r) { - result = VK_ERROR_OUT_OF_DEVICE_MEMORY; + result = + replay_address ? VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS : VK_ERROR_OUT_OF_DEVICE_MEMORY; goto error_va_alloc; } diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index dc20675..e26a589 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -213,7 +213,7 @@ radv_amdgpu_cs_create(struct radeon_winsys *ws, enum ring_type ring_type) ws->buffer_create(ws, ib_size, 0, radv_amdgpu_cs_domain(ws), RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC, - RADV_BO_PRIORITY_CS, &cs->ib_buffer); + RADV_BO_PRIORITY_CS, 0, &cs->ib_buffer); if (result != VK_SUCCESS) { free(cs); return NULL; @@ -334,7 +334,7 @@ radv_amdgpu_cs_grow(struct radeon_cmdbuf *_cs, size_t min_size) cs->ws->base.buffer_create(&cs->ws->base, ib_size, 0, radv_amdgpu_cs_domain(&cs->ws->base), RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC, - RADV_BO_PRIORITY_CS, &cs->ib_buffer); + RADV_BO_PRIORITY_CS, 0, &cs->ib_buffer); if (result != VK_SUCCESS) { cs->base.cdw = 0; @@ -1036,7 +1036,7 @@ radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, int queue_id ws->buffer_create( ws, 4 * size, 4096, radv_amdgpu_cs_domain(ws), RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY, - RADV_BO_PRIORITY_CS, &bos[j]); + RADV_BO_PRIORITY_CS, 0, &bos[j]); ptr = ws->buffer_map(bos[j]); if (needs_preamble) { @@ -1079,7 +1079,7 @@ radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, int queue_id ws->buffer_create( ws, 4 * size, 4096, radv_amdgpu_cs_domain(ws), RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY, - RADV_BO_PRIORITY_CS, &bos[0]); + RADV_BO_PRIORITY_CS, 0, &bos[0]); ptr = ws->buffer_map(bos[0]); if (preamble_cs) { @@ -1264,7 +1264,7 @@ radv_amdgpu_ctx_create(struct radeon_winsys *_ws, enum radeon_ctx_priority prior assert(AMDGPU_HW_IP_NUM * MAX_RINGS_PER_TYPE * sizeof(uint64_t) <= 4096); result = ws->base.buffer_create(&ws->base, 4096, 8, RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING, - RADV_BO_PRIORITY_CS, &ctx->fence_bo); + RADV_BO_PRIORITY_CS, 0, &ctx->fence_bo); if (result != VK_SUCCESS) { goto fail_alloc; } diff --git a/src/amd/vulkan/winsys/null/radv_null_bo.c b/src/amd/vulkan/winsys/null/radv_null_bo.c index f177579..496b496 100644 --- a/src/amd/vulkan/winsys/null/radv_null_bo.c +++ b/src/amd/vulkan/winsys/null/radv_null_bo.c @@ -31,7 +31,7 @@ static VkResult radv_null_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned alignment, enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags, - unsigned priority, struct radeon_winsys_bo **out_bo) + unsigned priority, uint64_t address, struct radeon_winsys_bo **out_bo) { struct radv_null_winsys_bo *bo;