From ce4f4a8d13c5b1b0816f6b3121131d7b63f746c2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 24 May 2021 17:23:56 -0400 Subject: [PATCH] radeonsi: set more precise max_waves in NGG code Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index 8f08f27..5f3763b 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -477,7 +477,13 @@ static void build_streamout(struct si_shader_context *ctx, struct ngg_streamout &ctx->ac, ctx->gs_ngg_scratch, LLVMConstInt(ctx->ac.i32, 12 + 8 * stream, false)); primemit_scan[stream].waveidx = get_wave_id_in_tg(ctx); primemit_scan[stream].numwaves = get_tgsize(ctx); - primemit_scan[stream].maxwaves = 8; + if (ctx->stage == MESA_SHADER_GEOMETRY) { + /* ngg_subgroup_size is only the input size. GS can always generate up to 256 vertices. */ + primemit_scan[stream].maxwaves = DIV_ROUND_UP(256, ctx->ac.wave_size); + } else { + primemit_scan[stream].maxwaves = DIV_ROUND_UP(ctx->screen->ngg_subgroup_size, + ctx->ac.wave_size); + } ac_build_wg_scan_top(&ctx->ac, &primemit_scan[stream]); } } @@ -813,9 +819,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out struct si_shader_selector *sel = shader->selector; struct si_shader_info *info = &sel->info; LLVMBuilderRef builder = ctx->ac.builder; - unsigned subgroup_size = ctx->screen->ngg_subgroup_size; - unsigned max_waves = ctx->ac.wave_size == 64 ? DIV_ROUND_UP(subgroup_size, 64) : - DIV_ROUND_UP(subgroup_size, 32); + unsigned max_waves = DIV_ROUND_UP(ctx->screen->ngg_subgroup_size, ctx->ac.wave_size); assert(shader->key.opt.ngg_culling); assert(shader->key.as_ngg); @@ -1864,7 +1868,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx) vertlive_scan.scratch = ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, ctx->ac.i32_0); vertlive_scan.waveidx = get_wave_id_in_tg(ctx); vertlive_scan.numwaves = get_tgsize(ctx); - vertlive_scan.maxwaves = 8; + vertlive_scan.maxwaves = DIV_ROUND_UP(256, ctx->ac.wave_size); ac_build_wg_scan(&ctx->ac, &vertlive_scan); -- 2.7.4