From fc3d8e1125524eb8f7dfdefd6ca2a716c68c0e7d Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Tue, 4 Apr 2023 09:42:24 +0800 Subject: [PATCH] radeonsi: fix max scrach lds size calculation when ngg MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Fixes: 028d0590f85 ("radeonsi: replace llvm ngg vs/tes with nir lowering") Reviewed-by: Marek Olšák Signed-off-by: Qiang Yu Part-of: --- src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index 2880f32..923dc79 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -107,7 +107,9 @@ bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader) const unsigned min_verts_per_prim = gs_stage == MESA_SHADER_GEOMETRY ? max_verts_per_prim : 1; /* All these are in dwords. The maximum is 16K dwords (64KB) of LDS per workgroup. */ - const unsigned max_lds_size = 16 * 1024 - gfx10_ngg_get_scratch_dw_size(shader); + const unsigned scratch_lds_size = gfx10_ngg_get_scratch_dw_size(shader); + /* Scrach is at last of LDS space and 2 dwords aligned, so it may cost more for alignment. */ + const unsigned max_lds_size = 16 * 1024 - ALIGN(scratch_lds_size, 2); const unsigned target_lds_size = max_lds_size; unsigned esvert_lds_size = 0; unsigned gsprim_lds_size = 0; -- 2.7.4