From: Rhys Perry Date: Mon, 26 Sep 2022 16:18:48 +0000 (+0100) Subject: aco/gfx11: increase gfx1100/gfx1101 physical vgprs X-Git-Tag: upstream/22.3.5~561 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=50073d6135044b8a8ccc7ddaf35588594edeb3d1;p=platform%2Fupstream%2Fmesa.git aco/gfx11: increase gfx1100/gfx1101 physical vgprs https://reviews.llvm.org/D134522 Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Reviewed-by: Marek Olšák Part-of: --- diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 300e320..83a8537 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -12563,7 +12563,7 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_key* key, ac_shade program->config->float_mode = program->blocks[0].fp_mode.val; /* addition on GFX6-8 requires a carry-out (we use VCC) */ program->needs_vcc = program->gfx_level <= GFX8; - program->config->num_vgprs = get_vgpr_alloc(program, num_vgprs); + program->config->num_vgprs = std::min(get_vgpr_alloc(program, num_vgprs), 256); program->config->num_sgprs = get_sgpr_alloc(program, num_sgprs); } diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 14de439..84493d1 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -111,14 +111,20 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info, if (gfx_level >= GFX10) { program->dev.physical_sgprs = 5120; /* doesn't matter as long as it's at least 128 * 40 */ - program->dev.physical_vgprs = program->wave_size == 32 ? 1024 : 512; program->dev.sgpr_alloc_granule = 128; program->dev.sgpr_limit = 108; /* includes VCC, which can be treated as s[106-107] on GFX10+ */ - if (gfx_level == GFX10_3) - program->dev.vgpr_alloc_granule = program->wave_size == 32 ? 16 : 8; - else - program->dev.vgpr_alloc_granule = program->wave_size == 32 ? 8 : 4; + + if (family == CHIP_GFX1100 || family == CHIP_GFX1101) { + program->dev.physical_vgprs = program->wave_size == 32 ? 1536 : 768; + program->dev.vgpr_alloc_granule = program->wave_size == 32 ? 24 : 12; + } else { + program->dev.physical_vgprs = program->wave_size == 32 ? 1024 : 512; + if (gfx_level >= GFX10_3) + program->dev.vgpr_alloc_granule = program->wave_size == 32 ? 16 : 8; + else + program->dev.vgpr_alloc_granule = program->wave_size == 32 ? 8 : 4; + } } else if (program->gfx_level >= GFX8) { program->dev.physical_sgprs = 800; program->dev.sgpr_alloc_granule = 16; diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index b2c9db4..9a10c67 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -2134,7 +2134,7 @@ struct DeviceInfo { uint16_t vgpr_limit; uint16_t sgpr_limit; uint16_t sgpr_alloc_granule; - uint16_t vgpr_alloc_granule; /* must be power of two */ + uint16_t vgpr_alloc_granule; unsigned max_wave64_per_simd; unsigned simd_per_cu; bool has_fast_fma32 = false; diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp index 80bbff4..0ecc957 100644 --- a/src/amd/compiler/aco_live_var_analysis.cpp +++ b/src/amd/compiler/aco_live_var_analysis.cpp @@ -348,7 +348,7 @@ get_vgpr_alloc(Program* program, uint16_t addressable_vgprs) { assert(addressable_vgprs <= program->dev.vgpr_limit); uint16_t granule = program->dev.vgpr_alloc_granule; - return align(std::max(addressable_vgprs, granule), granule); + return ALIGN_NPOT(std::max(addressable_vgprs, granule), granule); } unsigned @@ -370,7 +370,8 @@ get_addr_sgpr_from_waves(Program* program, uint16_t waves) uint16_t get_addr_vgpr_from_waves(Program* program, uint16_t waves) { - uint16_t vgprs = program->dev.physical_vgprs / waves & ~(program->dev.vgpr_alloc_granule - 1); + uint16_t vgprs = program->dev.physical_vgprs / waves; + vgprs = vgprs / program->dev.vgpr_alloc_granule * program->dev.vgpr_alloc_granule; vgprs -= program->config->num_shared_vgprs / 2; return std::min(vgprs, program->dev.vgpr_limit); } diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 0a3401e..eef4ff0 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -3149,7 +3149,8 @@ register_allocation(Program* program, std::vector& live_out_per_block, ra } /* end for BB */ /* num_gpr = rnd_up(max_used_gpr + 1) */ - program->config->num_vgprs = get_vgpr_alloc(program, ctx.max_used_vgpr + 1); + program->config->num_vgprs = + std::min(get_vgpr_alloc(program, ctx.max_used_vgpr + 1), 256); program->config->num_sgprs = get_sgpr_alloc(program, ctx.max_used_sgpr + 1); program->progress = CompilationProgress::after_ra;