From 79732416fd061b225ef3713a2bc76ea4dd0eea47 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 18 Feb 2023 03:54:27 -0500 Subject: [PATCH] amd: query cache sizes from the kernel Also rename l1_cache_size -> tcp_cache_size. L1 means shader array cache. Reviewed-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/common/ac_gpu_info.c | 113 ++++++++++++++++++++++++++----------------- src/amd/common/ac_gpu_info.h | 4 ++ src/amd/common/ac_rgp.c | 2 +- 3 files changed, 73 insertions(+), 46 deletions(-) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 59c110e..985647a 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -990,42 +990,64 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info) info->tcc_rb_non_coherent = !util_is_power_of_two_or_zero(info->num_tcc_blocks); - switch (info->family) { - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_OLAND: - case CHIP_HAWAII: - case CHIP_KABINI: - case CHIP_TONGA: - case CHIP_STONEY: - case CHIP_RAVEN2: - info->l2_cache_size = info->num_tcc_blocks * 64 * 1024; - break; - case CHIP_VERDE: - case CHIP_HAINAN: - case CHIP_BONAIRE: - case CHIP_KAVERI: - case CHIP_ICELAND: - case CHIP_CARRIZO: - case CHIP_FIJI: - case CHIP_POLARIS12: - case CHIP_VEGAM: - case CHIP_GFX1036: - info->l2_cache_size = info->num_tcc_blocks * 128 * 1024; - break; - default: - info->l2_cache_size = info->num_tcc_blocks * 256 * 1024; - break; - case CHIP_REMBRANDT: - case CHIP_GFX1103_R1: - info->l2_cache_size = info->num_tcc_blocks * 512 * 1024; - break; + if (info->drm_minor >= 52) { + info->sqc_inst_cache_size = device_info.sqc_inst_cache_size; + info->sqc_scalar_cache_size = device_info.sqc_data_cache_size; + info->num_sqc_per_wgp = device_info.num_sqc_per_wgp; } - if (info->gfx_level >= GFX11) - info->l1_cache_size = 32768; - else - info->l1_cache_size = 16384; + if (info->gfx_level >= GFX11 && info->drm_minor >= 52) { + info->tcp_cache_size = device_info.tcp_cache_size; + info->l1_cache_size = device_info.gl1c_cache_size; + info->l2_cache_size = device_info.gl2c_cache_size; + info->l3_cache_size_mb = DIV_ROUND_UP(device_info.mall_size, 1024 * 1024); + } else { + if (info->gfx_level >= GFX11) { + info->tcp_cache_size = 32768; + info->l1_cache_size = 256 * 1024; + } else { + info->tcp_cache_size = 16384; + info->l1_cache_size = 128 * 1024; + } + + if (info->gfx_level >= GFX10_3 && info->has_dedicated_vram) { + info->l3_cache_size_mb = info->num_tcc_blocks * + (info->family == CHIP_NAVI21 || + info->family == CHIP_NAVI22 ? 8 : 4); + } + + switch (info->family) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_OLAND: + case CHIP_HAWAII: + case CHIP_KABINI: + case CHIP_TONGA: + case CHIP_STONEY: + case CHIP_RAVEN2: + info->l2_cache_size = info->num_tcc_blocks * 64 * 1024; + break; + case CHIP_VERDE: + case CHIP_HAINAN: + case CHIP_BONAIRE: + case CHIP_KAVERI: + case CHIP_ICELAND: + case CHIP_CARRIZO: + case CHIP_FIJI: + case CHIP_POLARIS12: + case CHIP_VEGAM: + case CHIP_GFX1036: + info->l2_cache_size = info->num_tcc_blocks * 128 * 1024; + break; + default: + info->l2_cache_size = info->num_tcc_blocks * 256 * 1024; + break; + case CHIP_REMBRANDT: + case CHIP_GFX1103_R1: + info->l2_cache_size = info->num_tcc_blocks * 512 * 1024; + break; + } + } info->mc_arb_ramcfg = amdinfo.mc_arb_ramcfg; info->gb_addr_config = amdinfo.gb_addr_cfg; @@ -1412,12 +1434,6 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info) } } - if (info->gfx_level >= GFX10_3 && info->has_dedicated_vram) { - info->l3_cache_size_mb = info->num_tcc_blocks * - (info->family == CHIP_NAVI21 || - info->family == CHIP_NAVI22 ? 8 : 4); - } - if (info->gfx_level >= GFX11) { switch (info->family) { case CHIP_GFX1103_R1: @@ -1517,13 +1533,20 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f) fprintf(f, " max_gpu_freq = %i MHz\n", info->max_gpu_freq_mhz); fprintf(f, " max_gflops = %u GFLOPS\n", info->max_gflops); - if (info->gfx_level >= GFX10) { - fprintf(f, " l0_cache_size = %i KB\n", DIV_ROUND_UP(info->l1_cache_size, 1024)); - fprintf(f, " l1_cache_size = %i KB\n", info->gfx_level >= GFX11 ? 256 : 128); - } else { - fprintf(f, " l1_cache_size = %i KB\n", DIV_ROUND_UP(info->l1_cache_size, 1024)); + if (info->sqc_inst_cache_size) { + fprintf(f, " sqc_inst_cache_size = %i KB (%u per WGP)\n", + DIV_ROUND_UP(info->sqc_inst_cache_size, 1024), info->num_sqc_per_wgp); + } + if (info->sqc_scalar_cache_size) { + fprintf(f, " sqc_scalar_cache_size = %i KB (%u per WGP)\n", + DIV_ROUND_UP(info->sqc_scalar_cache_size, 1024), info->num_sqc_per_wgp); } + fprintf(f, " tcp_cache_size = %i KB\n", DIV_ROUND_UP(info->tcp_cache_size, 1024)); + + if (info->gfx_level >= GFX10) + fprintf(f, " l1_cache_size = %i KB\n", DIV_ROUND_UP(info->l1_cache_size, 1024)); + fprintf(f, " l2_cache_size = %i KB\n", DIV_ROUND_UP(info->l2_cache_size, 1024)); if (info->l3_cache_size_mb) diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index c5b1179..d0e4d22 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -60,6 +60,10 @@ struct radeon_info { uint32_t num_cu; /* only enabled CUs */ uint32_t max_gpu_freq_mhz; /* also known as the shader clock */ uint32_t max_gflops; + uint32_t sqc_inst_cache_size; + uint32_t sqc_scalar_cache_size; + uint32_t num_sqc_per_wgp; + uint32_t tcp_cache_size; uint32_t l1_cache_size; uint32_t l2_cache_size; uint32_t l3_cache_size_mb; diff --git a/src/amd/common/ac_rgp.c b/src/amd/common/ac_rgp.c index 41441d4..fa9ecb8 100644 --- a/src/amd/common/ac_rgp.c +++ b/src/amd/common/ac_rgp.c @@ -462,7 +462,7 @@ static void ac_sqtt_fill_asic_info(struct radeon_info *rad_info, chunk->vram_bus_width = rad_info->memory_bus_width; chunk->vram_size = (uint64_t)rad_info->vram_size_kb * 1024; chunk->l2_cache_size = rad_info->l2_cache_size; - chunk->l1_cache_size = rad_info->l1_cache_size; + chunk->l1_cache_size = rad_info->tcp_cache_size; chunk->lds_size = rad_info->lds_size_per_workgroup; if (rad_info->gfx_level >= GFX10) { /* RGP expects the LDS size in CU mode. */ -- 2.7.4