From 6a504ab4733a97f2f1d5790c36ab380c962af5b3 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 25 Jul 2019 15:38:51 +0200 Subject: [PATCH] radv/gfx10: use L2 for DMA copy/fill operations It's coherent and faster. GFX7-GFX9 should also support this but for now only uses L2 for GFX10 because it's untested on previous gens. This fixes dEQP-VK.memory.pipeline_barrier.transfer_* This also fixes some missing geometry in Dawn Of War III because VBOs weren't updated correctly. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen --- src/amd/vulkan/si_cmd_buffer.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index 21a90cb..94f7591 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -1501,6 +1501,14 @@ void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, unsigned dma_flags = 0; unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer)); + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { + /* DMA operations via L2 are coherent and faster. + * TODO: GFX7-GFX9 should also support this but it + * requires tests/benchmarks. + */ + dma_flags |= CP_DMA_USE_L2; + } + si_cp_dma_prepare(cmd_buffer, byte_count, size + skipped_size + realign_size, &dma_flags); @@ -1545,6 +1553,14 @@ void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer)); unsigned dma_flags = CP_DMA_CLEAR; + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { + /* DMA operations via L2 are coherent and faster. + * TODO: GFX7-GFX9 should also support this but it + * requires tests/benchmarks. + */ + dma_flags |= CP_DMA_USE_L2; + } + si_cp_dma_prepare(cmd_buffer, byte_count, size, &dma_flags); /* Emit the clear packet. */ -- 2.7.4