"description": "ac/nir,radv: rework and fix NGG queries enables for VS/TES",
"nominated": true,
"nomination_type": 0,
- "resolution": 0,
+ "resolution": 1,
"main_sha": null,
"because_sha": null
},
const uint8_t *vs_output_param_offset; /* GFX11+ */
bool can_cull;
bool disable_streamout;
+ bool has_gen_prim_query;
+ bool has_xfb_prim_query;
/* VS */
unsigned num_vertices_per_primitive;
bool early_prim_export;
bool passthrough;
bool use_edgeflags;
- bool has_prim_query;
int primitive_id_location;
uint32_t instance_rate_inputs;
uint32_t clipdist_enable_mask;
/* GS */
unsigned gs_out_vtx_bytes;
- bool has_xfb_query;
} ac_nir_lower_ngg_options;
void
arg = nir_iand(b, arg, mask);
}
- if (st->options->has_prim_query) {
+ if (st->options->has_gen_prim_query) {
nir_if *if_shader_query = nir_push_if(b, nir_load_prim_gen_query_enabled_amd(b));
{
/* Number of active GS threads. Each has 1 output primitive. */
static void
ngg_build_streamout_buffer_info(nir_builder *b,
nir_xfb_info *info,
- bool has_prim_query,
+ bool has_xfb_prim_query,
nir_ssa_def *scratch_base,
nir_ssa_def *tid_in_tg,
nir_ssa_def *gen_prim[4],
}
/* Update shader query. */
- if (has_prim_query) {
+ if (has_xfb_prim_query) {
nir_if *if_shader_query = nir_push_if(b, nir_load_prim_xfb_query_enabled_amd(b));
{
for (unsigned stream = 0; stream < 4; stream++) {
nir_ssa_def *so_buffer[4] = {0};
nir_ssa_def *prim_stride[4] = {0};
nir_ssa_def *tid_in_tg = nir_load_local_invocation_index(b);
- ngg_build_streamout_buffer_info(b, info, s->options->has_prim_query,
+ ngg_build_streamout_buffer_info(b, info, s->options->has_xfb_prim_query,
lds_scratch_base, tid_in_tg,
gen_prim_per_stream, prim_stride,
so_buffer, buffer_offsets,
static void
ngg_gs_shader_query(nir_builder *b, nir_intrinsic_instr *intrin, lower_ngg_gs_state *s)
{
- bool has_xfb_query = s->options->has_xfb_query;
+ bool has_gen_prim_query = s->options->has_gen_prim_query;
bool has_pipeline_stats_query = s->options->gfx_level < GFX11;
nir_ssa_def *pipeline_query_enabled = NULL;
nir_ssa_def *prim_gen_query_enabled = NULL;
nir_ssa_def *shader_query_enabled = NULL;
- if (has_xfb_query) {
+ if (has_gen_prim_query) {
prim_gen_query_enabled = nir_load_prim_gen_query_enabled_amd(b);
if (has_pipeline_stats_query) {
pipeline_query_enabled = nir_load_pipeline_stat_query_enabled_amd(b);
nir_pop_if(b, if_pipeline_query);
}
- if (has_xfb_query) {
+ if (has_gen_prim_query) {
nir_if *if_prim_gen_query = nir_push_if(b, prim_gen_query_enabled);
{
/* Add to the counter for this stream. */
nir_ssa_def *buffer_offsets[4] = {0};
nir_ssa_def *so_buffer[4] = {0};
nir_ssa_def *prim_stride[4] = {0};
- ngg_build_streamout_buffer_info(b, info, st->options->has_xfb_query,
+ ngg_build_streamout_buffer_info(b, info, st->options->has_xfb_prim_query,
st->lds_addr_gs_scratch, tid_in_tg, gen_prim,
prim_stride, so_buffer, buffer_offsets, emit_prim);
options.vs_output_param_offset = info->outinfo.vs_output_param_offset;
options.can_cull = nir->info.stage != MESA_SHADER_GEOMETRY && info->has_ngg_culling;
options.disable_streamout = !device->physical_device->use_ngg_streamout;
+ options.has_gen_prim_query = info->has_ngg_prim_query;
+ options.has_xfb_prim_query = info->has_ngg_xfb_query;
if (nir->info.stage == MESA_SHADER_VERTEX ||
nir->info.stage == MESA_SHADER_TESS_EVAL) {
options.num_vertices_per_primitive = num_vertices_per_prim;
options.early_prim_export = info->has_ngg_early_prim_export;
options.passthrough = info->is_ngg_passthrough;
- options.has_prim_query = pl_key->primitives_generated_query;
options.primitive_id_location = info->outinfo.export_prim_id ? VARYING_SLOT_PRIMITIVE_ID : -1;
options.instance_rate_inputs = pl_key->vs.instance_rate_inputs;
assert(info->is_ngg);
options.gs_out_vtx_bytes = info->gs.gsvs_vertex_size;
- options.has_xfb_query = true;
NIR_PASS_V(nir, ac_nir_lower_ngg_gs, &options);
} else if (nir->info.stage == MESA_SHADER_MESH) {
bool is_ngg_passthrough;
bool has_ngg_culling;
bool has_ngg_early_prim_export;
+ bool has_ngg_prim_query;
+ bool has_ngg_xfb_query;
uint32_t num_lds_blocks_when_not_culling;
uint32_t num_tess_patches;
uint32_t esgs_itemsize; /* Only for VS or TES as ES */
unsigned invocations;
unsigned es_type; /* GFX9: VS or TES */
uint8_t num_linked_inputs;
+ bool has_ngg_pipeline_stat_query;
} gs;
struct {
uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
{
struct user_sgpr_info user_sgpr_info;
bool needs_view_index = info->uses_view_index;
- bool has_ngg_query = stage == MESA_SHADER_GEOMETRY || key->primitives_generated_query;
+ bool has_ngg_query = info->has_ngg_prim_query || info->has_ngg_xfb_query ||
+ (stage == MESA_SHADER_GEOMETRY && info->gs.has_ngg_pipeline_stat_query);
if (gfx_level >= GFX10 && info->is_ngg && stage != MESA_SHADER_GEOMETRY) {
/* Handle all NGG shaders as GS to simplify the code here. */
}
static void
+gfx10_get_ngg_query_info(const struct radv_device *device, struct radv_pipeline_stage *es_stage,
+ struct radv_pipeline_stage *gs_stage,
+ const struct radv_pipeline_key *pipeline_key)
+{
+ struct radv_shader_info *info = gs_stage ? &gs_stage->info : &es_stage->info;
+
+ info->gs.has_ngg_pipeline_stat_query = !!gs_stage;
+ info->has_ngg_xfb_query = gs_stage ? !!gs_stage->nir->xfb_info : !!es_stage->nir->xfb_info;
+ info->has_ngg_prim_query = pipeline_key->primitives_generated_query || info->has_ngg_xfb_query;
+}
+
+static void
radv_determine_ngg_settings(struct radv_device *device, struct radv_pipeline_stage *es_stage,
struct radv_pipeline_stage *fs_stage,
const struct radv_pipeline_key *pipeline_key)
consumer && consumer->stage == MESA_SHADER_GEOMETRY ? consumer : NULL;
gfx10_get_ngg_info(device, producer, gs_stage);
+ gfx10_get_ngg_query_info(device, producer, gs_stage, pipeline_key);
/* Determine other NGG settings like culling for VS or TES without GS. */
if (!gs_stage && consumer) {