From 0004974467561e30b1251bf9daea54ca6766b8fd Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 13 Oct 2022 18:40:15 +0100 Subject: [PATCH] radeonsi: increase gfx1100/gfx1101 physical vgprs MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit https://reviews.llvm.org/D134522 Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Reviewed-by: Marek Olšák Part-of: --- src/amd/common/ac_binary.c | 10 ---------- src/gallium/drivers/radeonsi/si_shader.c | 17 ++++++++++++++++- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/src/amd/common/ac_binary.c b/src/amd/common/ac_binary.c index a26e549..53e22e9 100644 --- a/src/amd/common/ac_binary.c +++ b/src/amd/common/ac_binary.c @@ -118,16 +118,6 @@ void ac_parse_shader_binary_config(const char *data, size_t nbytes, unsigned wav if (!conf->spi_ps_input_addr) conf->spi_ps_input_addr = conf->spi_ps_input_ena; - /* GFX 10.3 internally: - * - aligns VGPRS to 16 for Wave32 and 8 for Wave64 - * - aligns LDS to 1024 - * - * For shader-db stats, set num_vgprs that the hw actually uses. - */ - if (info->gfx_level == GFX10_3) { - conf->num_vgprs = align(conf->num_vgprs, wave_size == 32 ? 16 : 8); - } - /* Enable 64-bit and 16-bit denormals, because there is no performance * cost. * diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index de21a80..4797f0f 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1058,10 +1058,25 @@ static void si_calculate_max_simd_waves(struct si_shader *shader) } if (conf->num_vgprs) { + /* GFX 10.3 internally: + * - aligns VGPRS to 16 for Wave32 and 8 for Wave64 + * - aligns LDS to 1024 + * + * For shader-db stats, set num_vgprs that the hw actually uses. + */ + unsigned num_vgprs = conf->num_vgprs; + if (sscreen->info.family == CHIP_GFX1100 || sscreen->info.family == CHIP_GFX1101) { + num_vgprs = util_align_npot(num_vgprs, shader->wave_size == 32 ? 24 : 12); + } else if (sscreen->info.gfx_level == GFX10_3) { + num_vgprs = align(num_vgprs, shader->wave_size == 32 ? 16 : 8); + } else { + num_vgprs = align(num_vgprs, shader->wave_size == 32 ? 8 : 4); + } + /* Always print wave limits as Wave64, so that we can compare * Wave32 and Wave64 with shader-db fairly. */ unsigned max_vgprs = sscreen->info.num_physical_wave64_vgprs_per_simd; - max_simd_waves = MIN2(max_simd_waves, max_vgprs / conf->num_vgprs); + max_simd_waves = MIN2(max_simd_waves, max_vgprs / num_vgprs); } unsigned max_lds_per_simd = sscreen->info.lds_size_per_workgroup / 4; -- 2.7.4