From ffbf3a5f8b51cb2f2a98fc0eb64e5ff369e4b15c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 14 Jan 2021 07:49:11 -0500 Subject: [PATCH] radeonsi: simplify the NGG culling condition in si_draw_vbo Changes: - disallow NGG culling for GS, fast launch for tess using template args (GS can't do NGG culling, tess can't do fast launch) - skip checking current_rast_prim with tessellation (bake the condition into ngg_cull_vert_threshold) - use only 1 vertex count threshold for enabling NGG shader culling to simplify it. I think it doesn't have a big impact. The threshold computation depends on more parameters than just fast launch. Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_shader.h | 1 - src/gallium/drivers/radeonsi/si_state_draw.cpp | 21 ++++++++++----------- src/gallium/drivers/radeonsi/si_state_shaders.c | 14 ++++---------- 3 files changed, 14 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 0f8ace2..7c0874f 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -449,7 +449,6 @@ struct si_shader_selector { ubyte num_vbos_in_user_sgprs; unsigned pa_cl_vs_out_cntl; unsigned ngg_cull_vert_threshold; /* UINT32_MAX = disabled */ - unsigned ngg_cull_nonindexed_fast_launch_vert_threshold; /* UINT32_MAX = disabled */ ubyte clipdist_mask; ubyte culldist_mask; enum pipe_prim_type rast_prim; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index b4ef3ea..16a7421 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -1974,23 +1974,22 @@ static void si_draw_vbo(struct pipe_context *ctx, /* Update NGG culling settings. */ uint8_t old_ngg_culling = sctx->ngg_culling; if (GFX_VERSION >= GFX10) { - struct si_shader_selector *hw_vs; - if (NGG && !dispatch_prim_discard_cs && sctx->current_rast_prim == PIPE_PRIM_TRIANGLES && - (hw_vs = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->cso) && - (total_direct_count > hw_vs->ngg_cull_vert_threshold || - (!index_size && - total_direct_count > hw_vs->ngg_cull_nonindexed_fast_launch_vert_threshold && - prim & ((1 << PIPE_PRIM_TRIANGLES) | - (1 << PIPE_PRIM_TRIANGLE_STRIP))))) { + struct si_shader_selector *hw_vs = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->cso; + + if (NGG && !HAS_GS && !dispatch_prim_discard_cs && + /* Tessellation sets ngg_cull_vert_threshold to UINT_MAX if the prim type + * is not triangles, so this check is only needed without tessellation. */ + (HAS_TESS || sctx->current_rast_prim == PIPE_PRIM_TRIANGLES) && + total_direct_count > hw_vs->ngg_cull_vert_threshold) { uint8_t ngg_culling = sctx->viewport0_y_inverted ? rs->ngg_cull_flags_y_inverted : rs->ngg_cull_flags; /* Use NGG fast launch for certain primitive types. * A draw must have at least 1 full primitive. + * The fast launch doesn't work with tessellation. */ - if (ngg_culling && - hw_vs->ngg_cull_nonindexed_fast_launch_vert_threshold < UINT32_MAX && - min_direct_count >= 3 && !HAS_TESS && !HAS_GS) { + if (!HAS_TESS && ngg_culling && min_direct_count >= 3 && + !(sctx->screen->debug_flags & DBG(NO_FAST_LAUNCH))) { if (prim == PIPE_PRIM_TRIANGLES && !index_size) { ngg_culling |= SI_NGG_CULL_GS_FAST_LAUNCH_TRI_LIST; } else if (prim == PIPE_PRIM_TRIANGLE_STRIP) { diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 50257c0..c087549 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -2798,16 +2798,9 @@ static void *si_create_shader_selector(struct pipe_context *ctx, !sel->info.base.vs.window_space_position)); sel->ngg_cull_vert_threshold = UINT_MAX; /* disabled (changed below) */ - sel->ngg_cull_nonindexed_fast_launch_vert_threshold = UINT_MAX; if (ngg_culling_allowed) { if (sel->info.stage == MESA_SHADER_VERTEX) { - /* 1000 non-indexed vertices (roughly 8 primgroups) are needed - * per draw call (no TES/GS) to enable NGG culling by default. - */ - if (!(sscreen->debug_flags & DBG(NO_FAST_LAUNCH))) - sel->ngg_cull_nonindexed_fast_launch_vert_threshold = 1000; - if (sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_ALL)) sel->ngg_cull_vert_threshold = 0; /* always enabled */ else if (sscreen->options.shader_culling || @@ -2816,9 +2809,10 @@ static void *si_create_shader_selector(struct pipe_context *ctx, sscreen->info.is_pro_graphics)) sel->ngg_cull_vert_threshold = 1500; /* vertex count must be more than this */ } else if (sel->info.stage == MESA_SHADER_TESS_EVAL) { - if (sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_ALL) || - sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_TESS) || - sscreen->info.chip_class == GFX10_3) + if (sel->rast_prim == PIPE_PRIM_TRIANGLES && + (sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_ALL) || + sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_TESS) || + sscreen->info.chip_class == GFX10_3)) sel->ngg_cull_vert_threshold = 0; /* always enabled */ } } -- 2.7.4