From a7f2faea46c719ad78550ad3ccaf6e60f88af244 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Fri, 17 Sep 2021 21:36:21 +0200 Subject: [PATCH] ac/nir: Emit edge flag instructions conditionally. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit They are not needed by RADV but will be needed by RadeonSI. Fossil DB results on Sienna Cichlid (with NGGC on): Totals from 56917 (44.24% of 128647) affected shaders: VGPRs: 1982664 -> 1975936 (-0.34%); split: -0.43%, +0.09% CodeSize: 152790880 -> 149510316 (-2.15%); split: -2.15%, +0.00% MaxWaves: 1617984 -> 1621900 (+0.24%) Instrs: 29272825 -> 28907038 (-1.25%); split: -1.26%, +0.01% Latency: 128744182 -> 127565678 (-0.92%); split: -1.14%, +0.22% InvThroughput: 20125915 -> 19805168 (-1.59%); split: -1.63%, +0.03% VClause: 521312 -> 519804 (-0.29%); split: -0.77%, +0.48% SClause: 688861 -> 688897 (+0.01%); split: -0.04%, +0.05% Copies: 3205421 -> 3177799 (-0.86%); split: -1.68%, +0.82% Branches: 1181457 -> 1183147 (+0.14%); split: -0.03%, +0.17% PreVGPRs: 1626681 -> 1595406 (-1.92%) Signed-off-by: Timur Kristóf Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/common/ac_nir.h | 1 + src/amd/common/ac_nir_lower_ngg.c | 14 +++++++++----- src/amd/vulkan/radv_shader.c | 1 + 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/amd/common/ac_nir.h b/src/amd/common/ac_nir.h index f6d5796..f77104e 100644 --- a/src/amd/common/ac_nir.h +++ b/src/amd/common/ac_nir.h @@ -111,6 +111,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, bool consider_passthrough, bool export_prim_id, bool provoking_vtx_last, + bool use_edgeflags, uint32_t instance_rate_inputs); void diff --git a/src/amd/common/ac_nir_lower_ngg.c b/src/amd/common/ac_nir_lower_ngg.c index a3d9416..84ebbfc 100644 --- a/src/amd/common/ac_nir_lower_ngg.c +++ b/src/amd/common/ac_nir_lower_ngg.c @@ -51,6 +51,7 @@ typedef struct bool passthrough; bool export_prim_id; bool early_prim_export; + bool use_edgeflags; unsigned wave_size; unsigned max_num_waves; unsigned num_vertices_per_primitives; @@ -294,9 +295,10 @@ pervertex_lds_addr(nir_builder *b, nir_ssa_def *vertex_idx, unsigned per_vtx_byt static nir_ssa_def * emit_pack_ngg_prim_exp_arg(nir_builder *b, unsigned num_vertices_per_primitives, - nir_ssa_def *vertex_indices[3], nir_ssa_def *is_null_prim) + nir_ssa_def *vertex_indices[3], nir_ssa_def *is_null_prim, + bool use_edgeflags) { - nir_ssa_def *arg = b->shader->info.stage == MESA_SHADER_VERTEX + nir_ssa_def *arg = use_edgeflags ? nir_build_load_initial_edgeflags_amd(b) : nir_imm_int(b, 0); @@ -339,7 +341,7 @@ emit_ngg_nogs_prim_exp_arg(nir_builder *b, lower_ngg_nogs_state *st) ? ngg_input_primitive_vertex_index(b, 2) : nir_imm_zero(b, 1, 32); - return emit_pack_ngg_prim_exp_arg(b, st->num_vertices_per_primitives, vtx_idx, NULL); + return emit_pack_ngg_prim_exp_arg(b, st->num_vertices_per_primitives, vtx_idx, NULL, st->use_edgeflags); } } @@ -741,7 +743,7 @@ compact_vertices_after_culling(nir_builder *b, exporter_vtx_indices[v] = nir_u2u32(b, exporter_vtx_idx); } - nir_ssa_def *prim_exp_arg = emit_pack_ngg_prim_exp_arg(b, 3, exporter_vtx_indices, NULL); + nir_ssa_def *prim_exp_arg = emit_pack_ngg_prim_exp_arg(b, 3, exporter_vtx_indices, NULL, nogs_state->use_edgeflags); nir_store_var(b, prim_exp_arg_var, prim_exp_arg, 0x1u); } nir_pop_if(b, if_gs_accepted); @@ -1256,6 +1258,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, bool consider_passthrough, bool export_prim_id, bool provoking_vtx_last, + bool use_edgeflags, uint32_t instance_rate_inputs) { nir_function_impl *impl = nir_shader_get_entrypoint(shader); @@ -1276,6 +1279,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, .passthrough = passthrough, .export_prim_id = export_prim_id, .early_prim_export = exec_list_is_singular(&impl->body), + .use_edgeflags = use_edgeflags, .num_vertices_per_primitives = num_vertices_per_primitives, .provoking_vtx_idx = provoking_vtx_last ? (num_vertices_per_primitives - 1) : 0, .position_value_var = position_value_var, @@ -1705,7 +1709,7 @@ ngg_gs_export_primitives(nir_builder *b, nir_ssa_def *max_num_out_prims, nir_ssa } } - nir_ssa_def *arg = emit_pack_ngg_prim_exp_arg(b, s->num_vertices_per_primitive, vtx_indices, is_null_prim); + nir_ssa_def *arg = emit_pack_ngg_prim_exp_arg(b, s->num_vertices_per_primitive, vtx_indices, is_null_prim, false); nir_build_export_primitive_amd(b, arg); nir_pop_if(b, if_prim_export_thread); } diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 03a3ff7..2bddfd5 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -982,6 +982,7 @@ void radv_lower_ngg(struct radv_device *device, struct nir_shader *nir, key->vs_common_out.as_ngg_passthrough, key->vs_common_out.export_prim_id, key->vs.provoking_vtx_last, + false, key->vs.instance_rate_inputs); info->has_ngg_culling = out_conf.can_cull; -- 2.7.4