gs_sel->screen->info.gfx_level >= GFX11 ? 3 : /* gfx11 requires at least 1 primitive per TG */
gs_sel->screen->info.gfx_level >= GFX10_3 ? 29 : (24 - 1 + max_verts_per_prim);
bool max_vert_out_per_gs_instance = false;
- unsigned max_gsprims_base = gs_sel->screen->ngg_subgroup_size; /* default prim group size clamp */
- unsigned max_esverts_base = gs_sel->screen->ngg_subgroup_size;
+ unsigned max_gsprims_base, max_esverts_base;
+
+ max_gsprims_base = max_esverts_base = si_get_max_workgroup_size(shader);
if (gs_stage == MESA_SHADER_GEOMETRY) {
bool force_multi_cycling = false;
}
}
- sscreen->ngg_subgroup_size = 128;
-
if (sscreen->info.gfx_level >= GFX11) {
unsigned attr_ring_size = sscreen->info.attribute_ring_size_per_se * sscreen->info.max_se;
sscreen->attribute_ring = si_aligned_buffer_create(&sscreen->b,
* We want to minimize the impact on multithreaded Mesa. */
struct ac_llvm_compiler compiler_lowp[10];
- unsigned ngg_subgroup_size;
-
struct util_idalloc_mt buffer_ids;
struct util_vertex_state_cache vertex_state_cache;
switch (shader->selector->stage) {
case MESA_SHADER_VERTEX:
case MESA_SHADER_TESS_EVAL:
- return shader->key.ge.as_ngg ? shader->selector->screen->ngg_subgroup_size : 0;
+ /* Use the largest workgroup size for streamout */
+ if (shader->key.ge.as_ngg)
+ return si_shader_uses_streamout(shader) ? 256 : 128;
+ else
+ return 0;
case MESA_SHADER_TESS_CTRL:
/* Return this so that LLVM doesn't remove s_barrier
return shader->selector->screen->info.gfx_level >= GFX7 ? 128 : 0;
case MESA_SHADER_GEOMETRY:
- /* ngg_subgroup_size is only the input size. GS can always generate up to 256 vertices. */
+ /* GS can always generate up to 256 vertices. */
return shader->selector->screen->info.gfx_level >= GFX9 ? 256 : 0;
case MESA_SHADER_COMPUTE:
shader->selector->info.writes_edgeflag;
}
-static inline bool si_shader_uses_streamout(struct si_shader *shader)
+static inline bool si_shader_uses_streamout(const struct si_shader *shader)
{
return shader->selector->stage <= MESA_SHADER_GEOMETRY &&
shader->selector->info.enabled_streamout_buffer_mask &&