ac/nir/nggc: Move gs_alloc_req up in NGG culling shaders.

author Timur Kristóf <timur.kristof@gmail.com>

Mon, 2 Aug 2021 15:57:39 +0000 (17:57 +0200)

committer Marge Bot <eric+marge@anholt.net>

Wed, 1 Sep 2021 14:45:14 +0000 (14:45 +0000)
author Timur Kristóf <timur.kristof@gmail.com>
Mon, 2 Aug 2021 15:57:39 +0000 (17:57 +0200)
committer Marge Bot <eric+marge@anholt.net>
Wed, 1 Sep 2021 14:45:14 +0000 (14:45 +0000)
diff --git a/src/amd/common/ac_nir_lower_ngg.c b/src/amd/common/ac_nir_lower_ngg.c

index 223d0d71fc63a85fba7196edb3a11e3263be1ca7..6e63e153fb8a58215ff848ba75df5904596999a2 100644 (file)
--- a/src/amd/common/ac_nir_lower_ngg.c
+++ b/src/amd/common/ac_nir_lower_ngg.c
@@ -645,6 +645,9 @@ compact_vertices_after_culling(nir_builder *b,
                                 nir_variable **gs_vtxaddr_vars,
                                 nir_ssa_def *invocation_index,
                                 nir_ssa_def *es_vertex_lds_addr,
+                               nir_ssa_def *es_exporter_tid,
+                               nir_ssa_def *num_live_vertices_in_workgroup,
+                               nir_ssa_def *fully_culled,
                                 unsigned ngg_scratch_lds_base_addr,
                                 unsigned pervertex_lds_bytes,
                                 unsigned max_exported_args)
@@ -654,15 +657,7 @@ compact_vertices_after_culling(nir_builder *b,
     nir_variable *position_value_var = nogs_state->position_value_var;
     nir_variable *prim_exp_arg_var = nogs_state->prim_exp_arg_var;
  
-   nir_ssa_def *es_accepted = nir_load_var(b, es_accepted_var);
-
-   /* Repack the vertices that survived the culling. */
-   wg_repack_result rep = repack_invocations_in_workgroup(b, es_accepted, ngg_scratch_lds_base_addr,
-                                                          nogs_state->max_num_waves, nogs_state->wave_size);
-   nir_ssa_def *num_live_vertices_in_workgroup = rep.num_repacked_invocations;
-   nir_ssa_def *es_exporter_tid = rep.repacked_invocation_index;
-
-   nir_if *if_es_accepted = nir_push_if(b, es_accepted);
+   nir_if *if_es_accepted = nir_push_if(b, nir_load_var(b, es_accepted_var));
     {
        nir_ssa_def *exporter_addr = pervertex_lds_addr(b, es_exporter_tid, pervertex_lds_bytes);
  
@@ -683,20 +678,6 @@ compact_vertices_after_culling(nir_builder *b,
     }
     nir_pop_if(b, if_es_accepted);
  
-   /* If all vertices are culled, set primitive count to 0 as well. */
-   nir_ssa_def *num_exported_prims = nir_build_load_workgroup_num_input_primitives_amd(b);
-   nir_ssa_def *fully_culled = nir_ieq_imm(b, num_live_vertices_in_workgroup, 0u);
-   num_exported_prims = nir_bcsel(b, fully_culled, nir_imm_int(b, 0u), num_exported_prims);
-
-   nir_if *if_wave_0 = nir_push_if(b, nir_ieq(b, nir_build_load_subgroup_id(b), nir_imm_int(b, 0)));
-   {
-      /* Tell the final vertex and primitive count to the HW.
-       * We do this here to mask some of the latency of the LDS.
-       */
-      nir_build_alloc_vertices_and_primitives_amd(b, num_live_vertices_in_workgroup, num_exported_prims);
-   }
-   nir_pop_if(b, if_wave_0);
-
     /* TODO: Consider adding a shortcut exit.
      * Waves that have no vertices and primitives left can s_endpgm right here.
      */
@@ -1143,10 +1124,31 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c
        }
        nir_pop_if(b, if_es_thread);
  
+      nir_ssa_def *es_accepted = nir_load_var(b, es_accepted_var);
+
+      /* Repack the vertices that survived the culling. */
+      wg_repack_result rep = repack_invocations_in_workgroup(b, es_accepted, ngg_scratch_lds_base_addr,
+                                                            nogs_state->max_num_waves, nogs_state->wave_size);
+      nir_ssa_def *num_live_vertices_in_workgroup = rep.num_repacked_invocations;
+      nir_ssa_def *es_exporter_tid = rep.repacked_invocation_index;
+
+      /* If all vertices are culled, set primitive count to 0 as well. */
+      nir_ssa_def *num_exported_prims = nir_build_load_workgroup_num_input_primitives_amd(b);
+      nir_ssa_def *fully_culled = nir_ieq_imm(b, num_live_vertices_in_workgroup, 0u);
+      num_exported_prims = nir_bcsel(b, fully_culled, nir_imm_int(b, 0u), num_exported_prims);
+
+      nir_if *if_wave_0 = nir_push_if(b, nir_ieq(b, nir_build_load_subgroup_id(b), nir_imm_int(b, 0)));
+      {
+         /* Tell the final vertex and primitive count to the HW. */
+         nir_build_alloc_vertices_and_primitives_amd(b, num_live_vertices_in_workgroup, num_exported_prims);
+      }
+      nir_pop_if(b, if_wave_0);
+
        /* Vertex compaction. */
        compact_vertices_after_culling(b, nogs_state,
                                       repacked_arg_vars, gs_vtxaddr_vars,
                                       invocation_index, es_vertex_lds_addr,
+                                     es_exporter_tid, num_live_vertices_in_workgroup, fully_culled,
                                       ngg_scratch_lds_base_addr, pervertex_lds_bytes, max_exported_args);
     }
     nir_push_else(b, if_cull_en);
author	Timur Kristóf <timur.kristof@gmail.com>
	Mon, 2 Aug 2021 15:57:39 +0000 (17:57 +0200)
committer	Marge Bot <eric+marge@anholt.net>
	Wed, 1 Sep 2021 14:45:14 +0000 (14:45 +0000)