From 43fd5528728ec8c04267749debfa1e66f84316d6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 25 Feb 2023 17:41:39 -0500 Subject: [PATCH] radeonsi: allow using 64K LDS for NGG to allow larger workgroups This should help with NGG streamout performance, which is limited by the workgroup size (it should be as large as possible). Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index 6418605..6fe8b1d 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -106,10 +106,8 @@ bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader) const unsigned max_verts_per_prim = u_vertices_per_prim(input_prim); const unsigned min_verts_per_prim = gs_stage == MESA_SHADER_GEOMETRY ? max_verts_per_prim : 1; - /* All these are in dwords: */ - /* GE can only use 8K dwords (32KB) of LDS per workgroup. - */ - const unsigned max_lds_size = 8 * 1024 - gfx10_ngg_get_scratch_dw_size(shader); + /* All these are in dwords. The maximum is 16K dwords (64KB) of LDS per workgroup. */ + const unsigned max_lds_size = 16 * 1024 - gfx10_ngg_get_scratch_dw_size(shader); const unsigned target_lds_size = max_lds_size; unsigned esvert_lds_size = 0; unsigned gsprim_lds_size = 0; -- 2.7.4