From e259f4050d096014fe240f5d7f73d1b23b5f33b8 Mon Sep 17 00:00:00 2001 From: Flora Cui Date: Tue, 11 Apr 2023 15:04:02 +0800 Subject: [PATCH] radeonsi: limit CP DMA to skip holes in sparse bo CP DMA on gfx9 can't handle the hole in sparse buffer. The fix skip sparse bo hole so that arb_sparse_buffer-buffer-data && arb_sparse_buffer-commit pass Signed-off-by: Flora Cui Signed-off-by: Julia Zhang Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/amd/ci/radeonsi-raven-fails.txt | 2 - .../drivers/radeonsi/ci/gfx9-raven-fail.csv | 2 - .../drivers/radeonsi/ci/gfx9-vega20-fail.csv | 2 - src/gallium/drivers/radeonsi/si_cp_dma.c | 41 ++++++++++++++++++ src/gallium/include/winsys/radeon_winsys.h | 7 ++++ src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 49 ++++++++++++++++++++++ 6 files changed, 97 insertions(+), 6 deletions(-) diff --git a/src/amd/ci/radeonsi-raven-fails.txt b/src/amd/ci/radeonsi-raven-fails.txt index 14691cc..25e0bd1 100644 --- a/src/amd/ci/radeonsi-raven-fails.txt +++ b/src/amd/ci/radeonsi-raven-fails.txt @@ -77,8 +77,6 @@ spec@arb_program_interface_query@arb_program_interface_query-getprogramresourcei spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail spec@arb_shading_language_packing@execution@built-in-functions@fs-packhalf2x16,Fail spec@arb_shading_language_packing@execution@built-in-functions@vs-packhalf2x16,Fail -spec@arb_sparse_buffer@buffer-data,Fail -spec@arb_sparse_buffer@commit,Fail spec@egl 1.4@eglterminate then unbind context,Fail spec@egl_chromium_sync_control@conformance,Fail spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail diff --git a/src/gallium/drivers/radeonsi/ci/gfx9-raven-fail.csv b/src/gallium/drivers/radeonsi/ci/gfx9-raven-fail.csv index 749095e..5a40563 100644 --- a/src/gallium/drivers/radeonsi/ci/gfx9-raven-fail.csv +++ b/src/gallium/drivers/radeonsi/ci/gfx9-raven-fail.csv @@ -65,8 +65,6 @@ spec@arb_program_interface_query@arb_program_interface_query-getprogramresourcei spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail spec@arb_shading_language_packing@execution@built-in-functions@fs-packhalf2x16,Fail spec@arb_shading_language_packing@execution@built-in-functions@vs-packhalf2x16,Fail -spec@arb_sparse_buffer@buffer-data,Fail -spec@arb_sparse_buffer@commit,Fail spec@egl_chromium_sync_control@conformance,Fail spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail spec@egl_ext_protected_content@conformance,Fail diff --git a/src/gallium/drivers/radeonsi/ci/gfx9-vega20-fail.csv b/src/gallium/drivers/radeonsi/ci/gfx9-vega20-fail.csv index 95b9155..1fed121 100644 --- a/src/gallium/drivers/radeonsi/ci/gfx9-vega20-fail.csv +++ b/src/gallium/drivers/radeonsi/ci/gfx9-vega20-fail.csv @@ -82,8 +82,6 @@ spec@arb_shader_clock@execution@clock2x32,Fail spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail spec@arb_shading_language_packing@execution@built-in-functions@fs-packhalf2x16,Fail spec@arb_shading_language_packing@execution@built-in-functions@vs-packhalf2x16,Fail -spec@arb_sparse_buffer@buffer-data,Fail -spec@arb_sparse_buffer@commit,Fail spec@egl_ext_protected_content@conformance,Fail spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y210,Fail diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index c26b49d..87001a8 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -32,6 +32,15 @@ static inline unsigned cp_dma_max_byte_count(struct si_context *sctx) return max & ~(SI_CPDMA_ALIGNMENT - 1); } +/* should cp dma skip the hole in sparse bo */ +static inline bool cp_dma_sparse_wa(struct si_context *sctx, struct si_resource *sdst) +{ + if ((sctx->gfx_level == GFX9) && sdst && (sdst->flags & RADEON_FLAG_SPARSE)) + return true; + + return false; +} + /* Emit a CP DMA packet to do a copy from one buffer to another, or to clear * a buffer. The size must fit in bits [20:0]. If CP_DMA_CLEAR is set, src_va is a 32-bit * clear value. @@ -199,6 +208,17 @@ void si_cp_dma_clear_buffer(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned byte_count = MIN2(size, cp_dma_max_byte_count(sctx)); unsigned dma_flags = CP_DMA_CLEAR | (sdst ? 0 : CP_DMA_DST_IS_GDS); + if (cp_dma_sparse_wa(sctx,sdst)) { + unsigned skip_count = + sctx->ws->buffer_find_next_committed_memory(sdst->buf, + va - sdst->gpu_address, &byte_count); + va += skip_count; + size -= skip_count; + } + + if (!byte_count) + continue; + si_cp_dma_prepare(sctx, dst, NULL, byte_count, size, user_flags, coher, &is_first, &dma_flags); @@ -344,6 +364,27 @@ void si_cp_dma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, unsigned byte_count = MIN2(size, cp_dma_max_byte_count(sctx)); unsigned dma_flags = gds_flags; + if (cp_dma_sparse_wa(sctx, si_resource(dst))) { + unsigned skip_count = + sctx->ws->buffer_find_next_committed_memory(si_resource(dst)->buf, + main_dst_offset - si_resource(dst)->gpu_address, &byte_count); + main_dst_offset += skip_count; + main_src_offset += skip_count; + size -= skip_count; + } + + if (cp_dma_sparse_wa(sctx, si_resource(src))) { + unsigned skip_count = + sctx->ws->buffer_find_next_committed_memory(si_resource(src)->buf, + main_src_offset - si_resource(src)->gpu_address, &byte_count); + main_dst_offset += skip_count; + main_src_offset += skip_count; + size -= skip_count; + } + + if (!byte_count) + continue; + si_cp_dma_prepare(sctx, dst, src, byte_count, size + skipped_size + realign_size, user_flags, coher, &is_first, &dma_flags); diff --git a/src/gallium/include/winsys/radeon_winsys.h b/src/gallium/include/winsys/radeon_winsys.h index d86491a..d56c57b 100644 --- a/src/gallium/include/winsys/radeon_winsys.h +++ b/src/gallium/include/winsys/radeon_winsys.h @@ -448,6 +448,13 @@ struct radeon_winsys { uint64_t offset, uint64_t size, bool commit); /** + * Calc size of the first committed part of the given sparse buffer. + * \note Only implemented by the amdgpu winsys. + * \return the skipped count if the range_offset fall into a hole. + */ + unsigned (*buffer_find_next_committed_memory)(struct pb_buffer *buf, + uint64_t range_offset, unsigned *range_size); + /** * Return the virtual address of a buffer. * * When virtual memory is not in use, this is the offset relative to the diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 1e77431..a77d428 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -1281,6 +1281,54 @@ out: return ok; } +static unsigned +amdgpu_bo_find_next_committed_memory(struct pb_buffer *buf, + uint64_t range_offset, unsigned *range_size) +{ + struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buf); + struct amdgpu_sparse_commitment *comm; + uint32_t va_page, end_va_page; + uint32_t span_va_page, start_va_page; + unsigned skip, skip_after; + + skip = skip_after = 0; + comm = bo->u.sparse.commitments; + start_va_page = va_page = range_offset / RADEON_SPARSE_PAGE_SIZE; + end_va_page = va_page + DIV_ROUND_UP(*range_size, RADEON_SPARSE_PAGE_SIZE); + + simple_mtx_lock(&bo->lock); + /* Lookup the first page with backing physical storage */ + while (va_page < end_va_page && !comm[va_page].backing) + va_page++; + span_va_page = va_page; + + /* Lookup the first page without backing physical storage */ + while (va_page < end_va_page && comm[va_page].backing) + va_page++; + simple_mtx_unlock(&bo->lock); + + if (span_va_page * RADEON_SPARSE_PAGE_SIZE >= range_offset + *range_size) { + skip = *range_size; + *range_size = 0; + return skip; + } + + /* Calc byte count that need to skip before committed range */ + if (span_va_page != start_va_page) + skip = (span_va_page - start_va_page) * RADEON_SPARSE_PAGE_SIZE + - range_offset % RADEON_SPARSE_PAGE_SIZE; + + if (va_page != end_va_page) { + skip_after = (end_va_page - va_page - 1) * RADEON_SPARSE_PAGE_SIZE + + *range_size % RADEON_SPARSE_PAGE_SIZE; + if (!(*range_size % RADEON_SPARSE_PAGE_SIZE)) + skip_after += RADEON_SPARSE_PAGE_SIZE; + } + + *range_size = *range_size - skip_after - skip; + return skip; +} + static void amdgpu_buffer_get_metadata(struct radeon_winsys *rws, struct pb_buffer *_buf, struct radeon_bo_metadata *md, @@ -1755,6 +1803,7 @@ void amdgpu_bo_init_functions(struct amdgpu_screen_winsys *ws) ws->base.buffer_is_suballocated = amdgpu_bo_is_suballocated; ws->base.buffer_get_handle = amdgpu_bo_get_handle; ws->base.buffer_commit = amdgpu_bo_sparse_commit; + ws->base.buffer_find_next_committed_memory = amdgpu_bo_find_next_committed_memory; ws->base.buffer_get_virtual_address = amdgpu_bo_get_va; ws->base.buffer_get_initial_domain = amdgpu_bo_get_initial_domain; ws->base.buffer_get_flags = amdgpu_bo_get_flags; -- 2.7.4