From 0c5429cc73f6d1787914fcebb4cb95677c8ebb82 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 2 Aug 2018 20:33:06 -0400 Subject: [PATCH] radeonsi: add flag L2_STREAM for minimal cache usage --- src/amd/common/sid.h | 2 ++ src/gallium/drivers/radeonsi/si_cp_dma.c | 16 ++++++++++------ src/gallium/drivers/radeonsi/si_pipe.h | 1 + 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h index 0671f7d..d9c4a1a 100644 --- a/src/amd/common/sid.h +++ b/src/amd/common/sid.h @@ -294,11 +294,13 @@ #define V_500_GDS 1 /* program SAS to 1 as well */ #define V_500_DATA 2 #define V_500_SRC_ADDR_TC_L2 3 /* new for CIK */ +#define S_500_DST_CACHE_POLICY(x) (((unsigned)(x) & 0x3) << 25) /* CIK+ */ #define S_500_DST_SEL(x) (((unsigned)(x) & 0x3) << 20) #define V_500_DST_ADDR 0 #define V_500_GDS 1 /* program DAS to 1 as well */ #define V_500_NOWHERE 2 /* new for GFX9 */ #define V_500_DST_ADDR_TC_L2 3 /* new for CIK */ +#define S_500_SRC_CACHE_POLICY(x) (((unsigned)(x) & 0x3) << 13) /* CIK+ */ #define S_500_ENGINE(x) ((x) & 0x1) #define V_500_ME 0 #define V_500_PFP 1 diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index bae592a..61be22f 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -88,15 +88,19 @@ static void si_emit_cp_dma(struct si_context *sctx, uint64_t dst_va, /* Src and dst flags. */ if (sctx->chip_class >= GFX9 && !(flags & CP_DMA_CLEAR) && - src_va == dst_va) + src_va == dst_va) { header |= S_411_DST_SEL(V_411_NOWHERE); /* prefetch only */ - else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS) - header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2); + } else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS) { + header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2) | + S_500_DST_CACHE_POLICY(cache_policy == L2_STREAM); + } - if (flags & CP_DMA_CLEAR) + if (flags & CP_DMA_CLEAR) { header |= S_411_SRC_SEL(V_411_DATA); - else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS) - header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2); + } else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS) { + header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) | + S_500_SRC_CACHE_POLICY(cache_policy == L2_STREAM); + } if (sctx->chip_class >= CIK) { radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0)); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 5fa8c33..95489f0 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1111,6 +1111,7 @@ void si_init_clear_functions(struct si_context *sctx); enum si_cache_policy { L2_BYPASS, L2_LRU, /* same as SLC=0 */ + L2_STREAM, /* same as SLC=1 */ }; enum si_coherency { -- 2.7.4