radeonsi: tweak triangle list culling performance for GS fast launch
authorMarek Olšák <marek.olsak@amd.com>
Wed, 21 Oct 2020 16:34:51 +0000 (12:34 -0400)
committerMarge Bot <eric+marge@anholt.net>
Wed, 18 Nov 2020 06:19:59 +0000 (06:19 +0000)
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7542>

src/gallium/drivers/radeonsi/gfx10_shader_ngg.c

index 9ccde8f..1cad171 100644 (file)
@@ -1941,7 +1941,10 @@ bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader)
    unsigned max_esverts_base = 128;
 
    if (shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_TRI_LIST) {
-      max_gsprims_base = 128 / 3;
+      /* Exactly 1 wave32 executes culling in primitive threads (there is no
+       * divergence), other waves are idle.
+       */
+      max_gsprims_base = 32;
       max_esverts_base = max_gsprims_base * 3;
    } else if (shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_TRI_STRIP) {
       max_gsprims_base = 126;