From bc3c74e2a296b48c4473a37c863e9df3005355fb Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 16 Oct 2020 05:02:58 -0400 Subject: [PATCH] radeonsi: tweak LATE_ALLOC_GS numbers for faster NGG culling Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_state_shaders.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 866d458..393e0f7 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1168,15 +1168,18 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader late_alloc_wave64 = 0; else if (num_cu_per_sh <= 6) late_alloc_wave64 = num_cu_per_sh - 2; /* All CUs enabled */ - else if (shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL) - late_alloc_wave64 = (num_cu_per_sh - 2) * 6; + else if (shader->key.opt.ngg_culling) + late_alloc_wave64 = num_cu_per_sh * 10; else - late_alloc_wave64 = (num_cu_per_sh - 2) * 4; + late_alloc_wave64 = num_cu_per_sh * 4; /* Limit LATE_ALLOC_GS for prevent a hang (hw bug). */ if (sscreen->info.chip_class == GFX10) late_alloc_wave64 = MIN2(late_alloc_wave64, 64); + /* Max number that fits into the register field. */ + late_alloc_wave64 = MIN2(late_alloc_wave64, 127); + si_pm4_set_reg( pm4, R_00B204_SPI_SHADER_PGM_RSRC4_GS, S_00B204_CU_EN(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64)); -- 2.7.4