radv: enable NGG culling unconditionally for GPL but disable it dynamically

author Samuel Pitoiset <samuel.pitoiset@gmail.com>

Mon, 19 Sep 2022 16:51:20 +0000 (18:51 +0200)

committer Samuel Pitoiset <samuel.pitoiset@gmail.com>

Mon, 26 Sep 2022 07:28:14 +0000 (09:28 +0200)
author Samuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 19 Sep 2022 16:51:20 +0000 (18:51 +0200)
committer Samuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 26 Sep 2022 07:28:14 +0000 (09:28 +0200)
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c

index e4e1409..a2106df 100644 (file)
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -7063,16 +7063,33 @@ radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer,
  }
  
  ALWAYS_INLINE static bool
-radv_skip_ngg_culling(bool has_tess, const unsigned vtx_cnt,
-                      bool indirect)
+radv_skip_ngg_culling(struct radv_cmd_buffer *cmd_buffer,
+                      const struct radv_graphics_pipeline *pipeline,
+                      const struct radv_draw_info *draw_info)
  {
+   const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+
     /* If we have to draw only a few vertices, we get better latency if
      * we disable NGG culling.
      *
      * When tessellation is used, what matters is the number of tessellated
      * vertices, so let's always assume it's not a small draw.
      */
-   return !has_tess && !indirect && vtx_cnt < 128;
+   if (pipeline->last_vgt_api_stage != MESA_SHADER_VERTEX)
+      return false;
+
+   if (!draw_info->indirect && draw_info->count < 128)
+      return true;
+
+   /* With graphics pipeline library, NGG culling is enabled unconditionally because we don't know
+    * the primitive topology at compile time, but we should still disable it dynamically for points
+    * or lines.
+    */
+   unsigned num_vertices_per_prim = si_conv_prim_to_gs_out(d->primitive_topology) + 1;
+   if (num_vertices_per_prim != 3)
+      return true;
+
+   return false;
  }
  
  ALWAYS_INLINE static uint32_t
@@ -7148,8 +7165,7 @@ radv_emit_ngg_culling_state(struct radv_cmd_buffer *cmd_buffer, const struct rad
     /* Check small draw status:
      * For small draw calls, we disable culling by setting the SGPR to 0.
      */
-   const bool skip =
-      radv_skip_ngg_culling(stage == MESA_SHADER_TESS_EVAL, draw_info->count, draw_info->indirect);
+   const bool skip = radv_skip_ngg_culling(cmd_buffer, pipeline, draw_info);
  
     /* See if anything changed. */
     if (!dirty && skip == cmd_buffer->state.last_nggc_skip)
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h

index 4e05c42..ca0cd06 100644 (file)
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -2958,6 +2958,22 @@ radv_rast_prim_is_points_or_lines(unsigned rast_prim)
     return radv_rast_prim_is_point(rast_prim) || radv_rast_prim_is_line(rast_prim);
  }
  
+static inline unsigned
+radv_get_num_vertices_per_prim(const struct radv_pipeline_key *pipeline_key)
+{
+   if (pipeline_key->vs.topology == V_008958_DI_PT_NONE) {
+      /* When the topology is unknown (with graphics pipeline library), return the maximum number of
+       * vertices per primitives for VS. This is used to lower NGG (the HW will ignore the extra
+       * bits for points/lines) and also to enable NGG culling unconditionally (it will be disabled
+       * dynamically for points/lines).
+       */
+      return 3;
+   } else {
+      /* Need to add 1, because: V_028A6C_POINTLIST=0, V_028A6C_LINESTRIP=1, V_028A6C_TRISTRIP=2, etc. */
+      return si_conv_prim_to_gs_out(pipeline_key->vs.topology) + 1;
+   }
+}
+
  static inline uint32_t
  si_translate_stencil_op(enum VkStencilOp op)
  {
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c

index 0a1e55e..58f5d1e 100644 (file)
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -1347,16 +1347,7 @@ void radv_lower_ngg(struct radv_device *device, struct radv_pipeline_stage *ngg_
           BITSET_SET(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
  
     } else if (nir->info.stage == MESA_SHADER_VERTEX) {
-      if (pl_key->vs.topology == V_008958_DI_PT_NONE) {
-         /* When the topology is unknown (with graphics pipeline library), use the maximum number of
-          * vertices per primitives for simplicity, the HW will ignore the extra bits if points or
-          * lines are used anyways.
-          */
-         num_vertices_per_prim = 3;
-      } else {
-         /* Need to add 1, because: V_028A6C_POINTLIST=0, V_028A6C_LINESTRIP=1, V_028A6C_TRISTRIP=2, etc. */
-         num_vertices_per_prim = si_conv_prim_to_gs_out(pl_key->vs.topology) + 1;
-      }
+      num_vertices_per_prim = radv_get_num_vertices_per_prim(pl_key);
  
        /* Manually mark the instance ID used, so the shader can repack it. */
        if (pl_key->vs.instance_rate_inputs)
diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c

index 3ddcdaa..2b6463f 100644 (file)
--- a/src/amd/vulkan/radv_shader_info.c
+++ b/src/amd/vulkan/radv_shader_info.c
@@ -1207,8 +1207,10 @@ radv_determine_ngg_settings(struct radv_device *device, struct radv_pipeline_sta
  
     uint64_t ps_inputs_read = fs_stage->nir->info.inputs_read;
  
-   unsigned num_vertices_per_prim = si_conv_prim_to_gs_out(pipeline_key->vs.topology) + 1;
-   if (es_stage->stage == MESA_SHADER_TESS_EVAL) {
+   unsigned num_vertices_per_prim = 0;
+   if (es_stage->stage == MESA_SHADER_VERTEX) {
+      num_vertices_per_prim = radv_get_num_vertices_per_prim(pipeline_key);
+   } else if (es_stage->stage == MESA_SHADER_TESS_EVAL) {
        num_vertices_per_prim = es_stage->nir->info.tess.point_mode ? 1 :
           es_stage->nir->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES ? 2 : 3;
     }
author	Samuel Pitoiset <samuel.pitoiset@gmail.com>
	Mon, 19 Sep 2022 16:51:20 +0000 (18:51 +0200)
committer	Samuel Pitoiset <samuel.pitoiset@gmail.com>
	Mon, 26 Sep 2022 07:28:14 +0000 (09:28 +0200)
src/amd/vulkan/radv_cmd_buffer.c		patch \| blob \| history
src/amd/vulkan/radv_private.h		patch \| blob \| history
src/amd/vulkan/radv_shader.c		patch \| blob \| history
src/amd/vulkan/radv_shader_info.c		patch \| blob \| history