ac,radeonsi: limit Smart Access Memory to Zen 3 and GFX10.3 due to perf issues
authorMarek Olšák <marek.olsak@amd.com>
Thu, 24 Dec 2020 11:14:11 +0000 (06:14 -0500)
committerMarge Bot <eric+marge@anholt.net>
Tue, 5 Jan 2021 02:43:55 +0000 (02:43 +0000)
Many people experience performance degradation on some systems.
There will be a driconf option to enable SAM on other chips as well as
disable it on enabled systems.

Fixes: d3d6d381450 - ac: add radeon_info::all_vram_visible for Smart Access Memory
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3982

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8225>

src/amd/common/ac_gpu_info.c
src/amd/common/ac_gpu_info.h
src/gallium/drivers/radeonsi/si_buffer.c
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_texture.c
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c

index 1ea04ab..22eebe4 100644 (file)
@@ -29,6 +29,7 @@
 #include "drm-uapi/amdgpu_drm.h"
 #include "sid.h"
 #include "util/macros.h"
+#include "util/u_cpu_detect.h"
 #include "util/u_math.h"
 
 #include <stdio.h>
@@ -506,6 +507,12 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
    /* Add some margin of error, though this shouldn't be needed in theory. */
    info->all_vram_visible = info->vram_size * 0.9 < info->vram_vis_size;
 
+   util_cpu_detect();
+   info->smart_access_memory = info->all_vram_visible &&
+                               info->chip_class >= GFX10_3 &&
+                               util_cpu_caps.family >= CPU_AMD_ZEN3 &&
+                               util_cpu_caps.family < CPU_AMD_LAST;
+
    /* Set chip identification. */
    info->pci_id = amdinfo->asic_id; /* TODO: is this correct? */
    info->pci_rev_id = amdinfo->pci_rev_id;
@@ -1048,6 +1055,7 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
    fprintf(f, "    address32_hi = %u\n", info->address32_hi);
    fprintf(f, "    has_dedicated_vram = %u\n", info->has_dedicated_vram);
    fprintf(f, "    all_vram_visible = %u\n", info->all_vram_visible);
+   fprintf(f, "    smart_access_memory = %u\n", info->smart_access_memory);
    fprintf(f, "    num_sdp_interfaces = %u\n", info->num_sdp_interfaces);
    fprintf(f, "    num_tcc_blocks = %i\n", info->num_tcc_blocks);
    fprintf(f, "    tcc_cache_line_size = %u\n", info->tcc_cache_line_size);
index 0904f12..ed11d73 100644 (file)
@@ -99,6 +99,7 @@ struct radeon_info {
    uint32_t address32_hi;
    bool has_dedicated_vram;
    bool all_vram_visible;
+   bool smart_access_memory;
    bool has_l2_uncached;
    bool r600_has_virtual_memory;
    uint32_t num_sdp_interfaces;
index 14c5069..7f769f4 100644 (file)
@@ -56,7 +56,7 @@ void si_init_resource_fields(struct si_screen *sscreen, struct si_resource *res,
    switch (res->b.b.usage) {
    case PIPE_USAGE_STREAM:
       res->flags |= RADEON_FLAG_GTT_WC;
-      if (sscreen->info.all_vram_visible)
+      if (sscreen->info.smart_access_memory)
          res->domains = RADEON_DOMAIN_VRAM;
       else
          res->domains = RADEON_DOMAIN_GTT;
@@ -153,7 +153,7 @@ void si_init_resource_fields(struct si_screen *sscreen, struct si_resource *res,
    if (res->domains & RADEON_DOMAIN_VRAM) {
       res->vram_usage = size;
 
-      if (!sscreen->info.all_vram_visible) {
+      if (!sscreen->info.smart_access_memory) {
          /* We don't want to evict buffers from VRAM by mapping them for CPU access,
           * because they might never be moved back again. If a buffer is large enough,
           * upload data by copying from a temporary GTT buffer. 8K might not seem much,
index 61508c2..eb4a94c 100644 (file)
@@ -493,15 +493,15 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign
       goto fail;
 
    /* Initialize public allocators. */
-   bool all_vram_visible = sscreen->info.all_vram_visible;
+   bool smart_access_memory = sscreen->info.smart_access_memory;
    sctx->b.stream_uploader =
       u_upload_create(&sctx->b, 1024 * 1024, 0,
-                      all_vram_visible ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STREAM,
+                      smart_access_memory ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STREAM,
                       SI_RESOURCE_FLAG_32BIT); /* same flags as const_uploader */
    if (!sctx->b.stream_uploader)
       goto fail;
 
-   if (all_vram_visible) {
+   if (smart_access_memory) {
       sctx->b.const_uploader = sctx->b.stream_uploader;
    } else {
       sctx->b.const_uploader =
index e89f188..13d197b 100644 (file)
@@ -1810,7 +1810,8 @@ static void *si_texture_transfer_map(struct pipe_context *ctx, struct pipe_resou
        * is busy.
        */
       if (!tex->surface.is_linear || (tex->buffer.flags & RADEON_FLAG_ENCRYPTED) ||
-          (tex->buffer.domains & RADEON_DOMAIN_VRAM && !sctx->screen->info.all_vram_visible))
+          (tex->buffer.domains & RADEON_DOMAIN_VRAM &&
+           !sctx->screen->info.smart_access_memory))
          use_staging_texture = true;
       else if (usage & PIPE_MAP_READ)
          use_staging_texture =
index 497cb39..c22947b 100644 (file)
@@ -711,7 +711,7 @@ static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws,
    if (cs->ring_type == RING_GFX ||
        cs->ring_type == RING_COMPUTE ||
        cs->ring_type == RING_DMA) {
-      domain = ws->info.all_vram_visible ? RADEON_DOMAIN_VRAM : RADEON_DOMAIN_GTT;
+      domain = ws->info.smart_access_memory ? RADEON_DOMAIN_VRAM : RADEON_DOMAIN_GTT;
       flags |= RADEON_FLAG_32BIT | RADEON_FLAG_GTT_WC;
    } else {
       /* UVD/VCE */