From: Marek Olšák Date: Sat, 6 Jul 2019 02:12:36 +0000 (-0400) Subject: radeonsi/gfx10: fix unnecessary LDS overallocation for NGG GS X-Git-Tag: upstream/19.3.0~3965 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=37db9d28650c21d2091a654b7c6a636927ef584d;p=platform%2Fupstream%2Fmesa.git radeonsi/gfx10: fix unnecessary LDS overallocation for NGG GS Acked-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Samuel Pitoiset --- diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index e69bc81..de02997 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -1279,17 +1279,11 @@ void gfx10_ngg_calculate_subgroup_info(struct si_shader *shader) /* We can't allow using the whole LDS, because GS waves compete with * other shader stages for LDS space. * - * Streamout can increase the ESGS buffer size later on, so be more - * conservative with streamout and use 4K dwords. This may be suboptimal. - * - * Otherwise, use the limit of 7K dwords. The reason is that we need - * to leave some headroom for the max_esverts increase at the end. - * * TODO: We should really take the shader's internal LDS use into * account. The linker will fail if the size is greater than * 8K dwords. */ - const unsigned max_lds_size = (gs_sel->so.num_outputs ? 4 : 7) * 1024 - 128; + const unsigned max_lds_size = 8 * 1024 - 768; const unsigned target_lds_size = max_lds_size; unsigned esvert_lds_size = 0; unsigned gsprim_lds_size = 0; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index f217abd..8f392d6 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5242,7 +5242,7 @@ static bool si_shader_binary_open(struct si_screen *screen, } if (sel && shader->key.as_ngg) { - if (sel->so.num_outputs) { + if (sel->type != PIPE_SHADER_GEOMETRY && sel->so.num_outputs) { unsigned esgs_vertex_bytes = 4 * (4 * sel->info.num_outputs + 1); esgs_ring_size = MAX2(esgs_ring_size, shader->ngg.max_out_verts * esgs_vertex_bytes);