From: Marek Olšák Date: Fri, 16 Oct 2020 09:02:58 +0000 (-0400) Subject: radeonsi: tweak LATE_ALLOC_GS numbers for faster NGG culling X-Git-Tag: upstream/21.0.0~3591 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=bc3c74e2a296b48c4473a37c863e9df3005355fb;p=platform%2Fupstream%2Fmesa.git radeonsi: tweak LATE_ALLOC_GS numbers for faster NGG culling Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 866d458..393e0f7 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1168,15 +1168,18 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader late_alloc_wave64 = 0; else if (num_cu_per_sh <= 6) late_alloc_wave64 = num_cu_per_sh - 2; /* All CUs enabled */ - else if (shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL) - late_alloc_wave64 = (num_cu_per_sh - 2) * 6; + else if (shader->key.opt.ngg_culling) + late_alloc_wave64 = num_cu_per_sh * 10; else - late_alloc_wave64 = (num_cu_per_sh - 2) * 4; + late_alloc_wave64 = num_cu_per_sh * 4; /* Limit LATE_ALLOC_GS for prevent a hang (hw bug). */ if (sscreen->info.chip_class == GFX10) late_alloc_wave64 = MIN2(late_alloc_wave64, 64); + /* Max number that fits into the register field. */ + late_alloc_wave64 = MIN2(late_alloc_wave64, 127); + si_pm4_set_reg( pm4, R_00B204_SPI_SHADER_PGM_RSRC4_GS, S_00B204_CU_EN(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64));