ac/nir/ngg: Include culled primitives in query.
authorTimur Kristóf <timur.kristof@gmail.com>
Wed, 1 Feb 2023 12:07:42 +0000 (13:07 +0100)
committerEric Engestrom <eric@engestrom.ch>
Wed, 8 Feb 2023 20:34:44 +0000 (20:34 +0000)
Vulkan spec 18.8. Primitives Generated Queries:

When a generated primitive query for a vertex stream is active,
the primitives-generated count is incremented every time a
primitive emitted to that stream reaches the transform feedback
stage, whether or not transform feedback is active.

We can see the order of stages in chapter 27 Fixed-Function
Vertex Post-Processing, which shows that the transform feedback
stage is before rasterization (and therefore culling).

Conclusion is that culled primitives should be included
in the primitives generated query.

This commit makes sure to emit the primitives generated query
code before culling and uses the input primitive count passed
to the current wave instead of the exec mask after culling.

Cc: mesa-stable
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21037>
(cherry picked from commit 3a819bd22ec5a7eea2e0e337977cbc0af9e38032)

.pick_status.json
src/amd/common/ac_nir_lower_ngg.c

index 699fafa..924a948 100644 (file)
         "description": "ac/nir/ngg: Include culled primitives in query.",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "main_sha": null,
         "because_sha": null
     },
index 909690f..901245b 100644 (file)
@@ -468,6 +468,29 @@ has_input_primitive(nir_builder *b)
 }
 
 static void
+nogs_prim_gen_query(nir_builder *b, lower_ngg_nogs_state *s)
+{
+   if (!s->options->has_gen_prim_query)
+      return;
+
+   nir_if *if_shader_query = nir_push_if(b, nir_load_prim_gen_query_enabled_amd(b));
+   {
+      /* Activate only 1 lane and add the number of primitives to query result. */
+      nir_if *if_elected = nir_push_if(b, nir_elect(b, 1));
+      {
+         /* Number of input primitives in the current wave. */
+         nir_ssa_def *num_input_prims = nir_ubfe(b, nir_load_merged_wave_info_amd(b),
+                                                 nir_imm_int(b, 8), nir_imm_int(b, 8));
+
+         /* Add to stream 0 primitive generated counter. */
+         nir_atomic_add_gen_prim_count_amd(b, num_input_prims, .stream_id = 0);
+      }
+      nir_pop_if(b, if_elected);
+   }
+   nir_pop_if(b, if_shader_query);
+}
+
+static void
 emit_ngg_nogs_prim_export(nir_builder *b, lower_ngg_nogs_state *st, nir_ssa_def *arg)
 {
    nir_ssa_def *gs_thread =
@@ -506,23 +529,6 @@ emit_ngg_nogs_prim_export(nir_builder *b, lower_ngg_nogs_state *st, nir_ssa_def
          arg = nir_iand(b, arg, mask);
       }
 
-      if (st->options->has_gen_prim_query) {
-         nir_if *if_shader_query = nir_push_if(b, nir_load_prim_gen_query_enabled_amd(b));
-         {
-            /* Number of active GS threads. Each has 1 output primitive. */
-            nir_ssa_def *num_gs_threads =
-               nir_bit_count(b, nir_ballot(b, 1, st->options->wave_size, nir_imm_bool(b, true)));
-            /* Activate only 1 lane and add the number of primitives to query result. */
-            nir_if *if_elected = nir_push_if(b, nir_elect(b, 1));
-            {
-               /* Add to stream 0 primitive generated counter. */
-               nir_atomic_add_gen_prim_count_amd(b, num_gs_threads, .stream_id = 0);
-            }
-            nir_pop_if(b, if_elected);
-         }
-         nir_pop_if(b, if_shader_query);
-      }
-
       nir_export_primitive_amd(b, arg);
    }
    nir_pop_if(b, if_gs_thread);
@@ -1367,8 +1373,6 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c
       nir_local_variable_create(impl, glsl_uint_type(), "repacked_arg_3"),
    };
 
-   b->cursor = nir_before_cf_list(&impl->body);
-
    if (nogs_state->options->clipdist_enable_mask ||
        nogs_state->options->user_clip_plane_enable_mask) {
       nogs_state->clip_vertex_var =
@@ -2153,6 +2157,11 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
 
    ngg_nogs_init_vertex_indices_vars(b, impl, &state);
 
+   /* Emit primitives generated query code here, so that
+    * it executes before culling and isn't in the extracted CF.
+    */
+   nogs_prim_gen_query(b, &state);
+
    if (!options->can_cull) {
       /* Newer chips can use PRIMGEN_PASSTHRU_NO_MSG to skip gs_alloc_req for NGG passthrough. */
       if (!(options->passthrough && options->family >= CHIP_NAVI23)) {