nir_variable *prim_exp_arg_var;
nir_variable *es_accepted_var;
nir_variable *gs_accepted_var;
+ nir_variable *gs_vtx_indices_vars[3];
struct u_vector saved_uniforms;
return arg;
}
-static nir_ssa_def *
-ngg_input_primitive_vertex_index(nir_builder *b, unsigned vertex)
+static void
+ngg_nogs_init_vertex_indices_vars(nir_builder *b, nir_function_impl *impl, lower_ngg_nogs_state *st)
{
- return nir_ubfe(b, nir_load_gs_vertex_offset_amd(b, .base = vertex / 2u),
- nir_imm_int(b, (vertex & 1u) * 16u), nir_imm_int(b, 16u));
+ for (unsigned v = 0; v < st->num_vertices_per_primitives; ++v) {
+ st->gs_vtx_indices_vars[v] = nir_local_variable_create(impl, glsl_uint_type(), "gs_vtx_addr");
+
+ nir_ssa_def *vtx = nir_ubfe(b, nir_load_gs_vertex_offset_amd(b, .base = v / 2u),
+ nir_imm_int(b, (v & 1u) * 16u), nir_imm_int(b, 16u));
+ nir_store_var(b, st->gs_vtx_indices_vars[v], vtx, 0x1);
+ }
}
static nir_ssa_def *
} else {
nir_ssa_def *vtx_idx[3] = {0};
- vtx_idx[0] = ngg_input_primitive_vertex_index(b, 0);
- vtx_idx[1] = st->num_vertices_per_primitives >= 2
- ? ngg_input_primitive_vertex_index(b, 1)
- : nir_imm_zero(b, 1, 32);
- vtx_idx[2] = st->num_vertices_per_primitives >= 3
- ? ngg_input_primitive_vertex_index(b, 2)
- : nir_imm_zero(b, 1, 32);
+ for (unsigned v = 0; v < st->num_vertices_per_primitives; ++v)
+ vtx_idx[v] = nir_load_var(b, st->gs_vtx_indices_vars[v]);
return emit_pack_ngg_prim_exp_arg(b, st->num_vertices_per_primitives, vtx_idx, NULL, st->use_edgeflags);
}
arg = emit_ngg_nogs_prim_exp_arg(b, st);
if (st->export_prim_id && b->shader->info.stage == MESA_SHADER_VERTEX) {
- /* Copy Primitive IDs from GS threads to the LDS address corresponding to the ES thread of the provoking vertex. */
- nir_ssa_def *prim_id = nir_load_primitive_id(b);
- nir_ssa_def *provoking_vtx_idx = ngg_input_primitive_vertex_index(b, st->provoking_vtx_idx);
- nir_ssa_def *addr = pervertex_lds_addr(b, provoking_vtx_idx, 4u);
-
- nir_store_shared(b, prim_id, addr);
+ nir_ssa_def *prim_valid = nir_ieq_imm(b, nir_ushr_imm(b, arg, 31), 0);
+ nir_if *if_prim_valid = nir_push_if(b, prim_valid);
+ {
+ /* Copy Primitive IDs from GS threads to the LDS address
+ * corresponding to the ES thread of the provoking vertex.
+ * It will be exported as a per-vertex attribute.
+ */
+ nir_ssa_def *prim_id = nir_load_primitive_id(b);
+ nir_ssa_def *provoking_vtx_idx = nir_load_var(b, st->gs_vtx_indices_vars[st->provoking_vtx_idx]);
+ nir_ssa_def *addr = pervertex_lds_addr(b, provoking_vtx_idx, 4u);
+
+ nir_store_shared(b, prim_id, addr);
+ }
+ nir_pop_if(b, if_prim_valid);
}
nir_export_primitive_amd(b, arg);
nir_ssa_def *vtx_addr = nir_load_var(b, gs_vtxaddr_vars[v]);
nir_ssa_def *exporter_vtx_idx = nir_load_shared(b, 1, 8, vtx_addr, .base = lds_es_exporter_tid);
exporter_vtx_indices[v] = nir_u2u32(b, exporter_vtx_idx);
+ nir_store_var(b, nogs_state->gs_vtx_indices_vars[v], exporter_vtx_indices[v], 0x1);
}
nir_ssa_def *prim_exp_arg = emit_pack_ngg_prim_exp_arg(b, 3, exporter_vtx_indices, NULL, nogs_state->use_edgeflags);
/* Load vertex indices from input VGPRs */
nir_ssa_def *vtx_idx[3] = {0};
for (unsigned vertex = 0; vertex < 3; ++vertex)
- vtx_idx[vertex] = ngg_input_primitive_vertex_index(b, vertex);
+ vtx_idx[vertex] = nir_load_var(b, nogs_state->gs_vtx_indices_vars[vertex]);
nir_ssa_def *vtx_addr[3] = {0};
nir_ssa_def *pos[3][4] = {0};
nir_cf_extract(&extracted, nir_before_cf_list(&impl->body), nir_after_cf_list(&impl->body));
b->cursor = nir_before_cf_list(&impl->body);
+ ngg_nogs_init_vertex_indices_vars(b, impl, &state);
+
if (!can_cull) {
/* Allocate export space on wave 0 - confirm to the HW that we want to use all possible space */
nir_if *if_wave_0 = nir_push_if(b, nir_ieq(b, nir_load_subgroup_id(b), nir_imm_int(b, 0)));