From 1391564493ceb375e14c78fc1bba83f7ceb87724 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 3 Nov 2022 09:02:14 +0100 Subject: [PATCH] ac/nir,radv: rework and fix NGG queries enables for VS/TES XFB queries need to be enabled with NGG streamout and VS/TES. Previously, the NGG lowering code relied on has_prim_query for XFB. This fixes failures with RADV_PERFTEST=ngg_streamout on GFX10.3 with the vkd3d-proton testsuite. Vulkan CTS is missing TES tests with XFB queries apparently. Cc: 22.3 mesa-stable Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Part-of: (cherry picked from commit 505290dc44a5b728dbc6ec9ed038300ab158c323) --- .pick_status.json | 2 +- src/amd/common/ac_nir.h | 4 ++-- src/amd/common/ac_nir_lower_ngg.c | 16 ++++++++-------- src/amd/vulkan/radv_shader.c | 4 ++-- src/amd/vulkan/radv_shader.h | 3 +++ src/amd/vulkan/radv_shader_args.c | 3 ++- src/amd/vulkan/radv_shader_info.c | 13 +++++++++++++ 7 files changed, 31 insertions(+), 14 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 4646b58..953b81e 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -742,7 +742,7 @@ "description": "ac/nir,radv: rework and fix NGG queries enables for VS/TES", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null }, diff --git a/src/amd/common/ac_nir.h b/src/amd/common/ac_nir.h index ee8d025..86aaf55 100644 --- a/src/amd/common/ac_nir.h +++ b/src/amd/common/ac_nir.h @@ -128,13 +128,14 @@ typedef struct { const uint8_t *vs_output_param_offset; /* GFX11+ */ bool can_cull; bool disable_streamout; + bool has_gen_prim_query; + bool has_xfb_prim_query; /* VS */ unsigned num_vertices_per_primitive; bool early_prim_export; bool passthrough; bool use_edgeflags; - bool has_prim_query; int primitive_id_location; uint32_t instance_rate_inputs; uint32_t clipdist_enable_mask; @@ -142,7 +143,6 @@ typedef struct { /* GS */ unsigned gs_out_vtx_bytes; - bool has_xfb_query; } ac_nir_lower_ngg_options; void diff --git a/src/amd/common/ac_nir_lower_ngg.c b/src/amd/common/ac_nir_lower_ngg.c index 48da19d..7a3c474 100644 --- a/src/amd/common/ac_nir_lower_ngg.c +++ b/src/amd/common/ac_nir_lower_ngg.c @@ -506,7 +506,7 @@ emit_ngg_nogs_prim_export(nir_builder *b, lower_ngg_nogs_state *st, nir_ssa_def arg = nir_iand(b, arg, mask); } - if (st->options->has_prim_query) { + if (st->options->has_gen_prim_query) { nir_if *if_shader_query = nir_push_if(b, nir_load_prim_gen_query_enabled_amd(b)); { /* Number of active GS threads. Each has 1 output primitive. */ @@ -1677,7 +1677,7 @@ ngg_nogs_store_all_outputs_to_lds(nir_shader *shader, lower_ngg_nogs_state *st) static void ngg_build_streamout_buffer_info(nir_builder *b, nir_xfb_info *info, - bool has_prim_query, + bool has_xfb_prim_query, nir_ssa_def *scratch_base, nir_ssa_def *tid_in_tg, nir_ssa_def *gen_prim[4], @@ -1769,7 +1769,7 @@ ngg_build_streamout_buffer_info(nir_builder *b, } /* Update shader query. */ - if (has_prim_query) { + if (has_xfb_prim_query) { nir_if *if_shader_query = nir_push_if(b, nir_load_prim_xfb_query_enabled_amd(b)); { for (unsigned stream = 0; stream < 4; stream++) { @@ -1859,7 +1859,7 @@ ngg_nogs_build_streamout(nir_builder *b, lower_ngg_nogs_state *s) nir_ssa_def *so_buffer[4] = {0}; nir_ssa_def *prim_stride[4] = {0}; nir_ssa_def *tid_in_tg = nir_load_local_invocation_index(b); - ngg_build_streamout_buffer_info(b, info, s->options->has_prim_query, + ngg_build_streamout_buffer_info(b, info, s->options->has_xfb_prim_query, lds_scratch_base, tid_in_tg, gen_prim_per_stream, prim_stride, so_buffer, buffer_offsets, @@ -2387,13 +2387,13 @@ ngg_gs_clear_primflags(nir_builder *b, nir_ssa_def *num_vertices, unsigned strea static void ngg_gs_shader_query(nir_builder *b, nir_intrinsic_instr *intrin, lower_ngg_gs_state *s) { - bool has_xfb_query = s->options->has_xfb_query; + bool has_gen_prim_query = s->options->has_gen_prim_query; bool has_pipeline_stats_query = s->options->gfx_level < GFX11; nir_ssa_def *pipeline_query_enabled = NULL; nir_ssa_def *prim_gen_query_enabled = NULL; nir_ssa_def *shader_query_enabled = NULL; - if (has_xfb_query) { + if (has_gen_prim_query) { prim_gen_query_enabled = nir_load_prim_gen_query_enabled_amd(b); if (has_pipeline_stats_query) { pipeline_query_enabled = nir_load_pipeline_stat_query_enabled_amd(b); @@ -2443,7 +2443,7 @@ ngg_gs_shader_query(nir_builder *b, nir_intrinsic_instr *intrin, lower_ngg_gs_st nir_pop_if(b, if_pipeline_query); } - if (has_xfb_query) { + if (has_gen_prim_query) { nir_if *if_prim_gen_query = nir_push_if(b, prim_gen_query_enabled); { /* Add to the counter for this stream. */ @@ -3036,7 +3036,7 @@ ngg_gs_build_streamout(nir_builder *b, lower_ngg_gs_state *st) nir_ssa_def *buffer_offsets[4] = {0}; nir_ssa_def *so_buffer[4] = {0}; nir_ssa_def *prim_stride[4] = {0}; - ngg_build_streamout_buffer_info(b, info, st->options->has_xfb_query, + ngg_build_streamout_buffer_info(b, info, st->options->has_xfb_prim_query, st->lds_addr_gs_scratch, tid_in_tg, gen_prim, prim_stride, so_buffer, buffer_offsets, emit_prim); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 94275b7..04e3714 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -1442,6 +1442,8 @@ void radv_lower_ngg(struct radv_device *device, struct radv_pipeline_stage *ngg_ options.vs_output_param_offset = info->outinfo.vs_output_param_offset; options.can_cull = nir->info.stage != MESA_SHADER_GEOMETRY && info->has_ngg_culling; options.disable_streamout = !device->physical_device->use_ngg_streamout; + options.has_gen_prim_query = info->has_ngg_prim_query; + options.has_xfb_prim_query = info->has_ngg_xfb_query; if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) { @@ -1453,7 +1455,6 @@ void radv_lower_ngg(struct radv_device *device, struct radv_pipeline_stage *ngg_ options.num_vertices_per_primitive = num_vertices_per_prim; options.early_prim_export = info->has_ngg_early_prim_export; options.passthrough = info->is_ngg_passthrough; - options.has_prim_query = pl_key->primitives_generated_query; options.primitive_id_location = info->outinfo.export_prim_id ? VARYING_SLOT_PRIMITIVE_ID : -1; options.instance_rate_inputs = pl_key->vs.instance_rate_inputs; @@ -1465,7 +1466,6 @@ void radv_lower_ngg(struct radv_device *device, struct radv_pipeline_stage *ngg_ assert(info->is_ngg); options.gs_out_vtx_bytes = info->gs.gsvs_vertex_size; - options.has_xfb_query = true; NIR_PASS_V(nir, ac_nir_lower_ngg_gs, &options); } else if (nir->info.stage == MESA_SHADER_MESH) { diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index a0d75fd..bb8bf15 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -247,6 +247,8 @@ struct radv_shader_info { bool is_ngg_passthrough; bool has_ngg_culling; bool has_ngg_early_prim_export; + bool has_ngg_prim_query; + bool has_ngg_xfb_query; uint32_t num_lds_blocks_when_not_culling; uint32_t num_tess_patches; uint32_t esgs_itemsize; /* Only for VS or TES as ES */ @@ -284,6 +286,7 @@ struct radv_shader_info { unsigned invocations; unsigned es_type; /* GFX9: VS or TES */ uint8_t num_linked_inputs; + bool has_ngg_pipeline_stat_query; } gs; struct { uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index 10b83fe..94cec1b 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -561,7 +561,8 @@ radv_declare_shader_args(enum amd_gfx_level gfx_level, const struct radv_pipelin { struct user_sgpr_info user_sgpr_info; bool needs_view_index = info->uses_view_index; - bool has_ngg_query = stage == MESA_SHADER_GEOMETRY || key->primitives_generated_query; + bool has_ngg_query = info->has_ngg_prim_query || info->has_ngg_xfb_query || + (stage == MESA_SHADER_GEOMETRY && info->gs.has_ngg_pipeline_stat_query); if (gfx_level >= GFX10 && info->is_ngg && stage != MESA_SHADER_GEOMETRY) { /* Handle all NGG shaders as GS to simplify the code here. */ diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index 72d4e58..979dd28 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -1199,6 +1199,18 @@ gfx10_get_ngg_info(const struct radv_device *device, struct radv_pipeline_stage } static void +gfx10_get_ngg_query_info(const struct radv_device *device, struct radv_pipeline_stage *es_stage, + struct radv_pipeline_stage *gs_stage, + const struct radv_pipeline_key *pipeline_key) +{ + struct radv_shader_info *info = gs_stage ? &gs_stage->info : &es_stage->info; + + info->gs.has_ngg_pipeline_stat_query = !!gs_stage; + info->has_ngg_xfb_query = gs_stage ? !!gs_stage->nir->xfb_info : !!es_stage->nir->xfb_info; + info->has_ngg_prim_query = pipeline_key->primitives_generated_query || info->has_ngg_xfb_query; +} + +static void radv_determine_ngg_settings(struct radv_device *device, struct radv_pipeline_stage *es_stage, struct radv_pipeline_stage *fs_stage, const struct radv_pipeline_key *pipeline_key) @@ -1299,6 +1311,7 @@ radv_link_shaders_info(struct radv_device *device, consumer && consumer->stage == MESA_SHADER_GEOMETRY ? consumer : NULL; gfx10_get_ngg_info(device, producer, gs_stage); + gfx10_get_ngg_query_info(device, producer, gs_stage, pipeline_key); /* Determine other NGG settings like culling for VS or TES without GS. */ if (!gs_stage && consumer) { -- 2.7.4