From e93fe403bc0e85994d1be59ad3ad5bf65ecbf971 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 5 Jun 2018 01:20:23 -0400 Subject: [PATCH] radeonsi: properly compute an LS-HS thread group size limit MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit "64 / max * 4" is less than "64 * 4 / max". Tested-by: Dieter Nützel --- src/gallium/drivers/radeonsi/si_state_draw.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 942cb3c..e7f8389 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -146,7 +146,8 @@ static bool si_emit_derived_tess_state(struct si_context *sctx, * resource usage. Also ensures that the number of tcs in and out * vertices per threadgroup are at most 256. */ - *num_patches = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp) * 4; + unsigned max_verts_per_patch = MAX2(num_tcs_input_cp, num_tcs_output_cp); + *num_patches = 256 / max_verts_per_patch; /* Make sure that the data fits in LDS. This assumes the shaders only * use LDS for the inputs and outputs. @@ -173,7 +174,7 @@ static bool si_emit_derived_tess_state(struct si_context *sctx, /* SI bug workaround, related to power management. Limit LS-HS * threadgroups to only one wave. */ - unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp); + unsigned one_wave = 64 / max_verts_per_patch; *num_patches = MIN2(*num_patches, one_wave); } -- 2.7.4