}
ALWAYS_INLINE static bool
-radv_skip_ngg_culling(bool has_tess, const unsigned vtx_cnt,
- bool indirect)
+radv_skip_ngg_culling(struct radv_cmd_buffer *cmd_buffer,
+ const struct radv_graphics_pipeline *pipeline,
+ const struct radv_draw_info *draw_info)
{
+ const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+
/* If we have to draw only a few vertices, we get better latency if
* we disable NGG culling.
*
* When tessellation is used, what matters is the number of tessellated
* vertices, so let's always assume it's not a small draw.
*/
- return !has_tess && !indirect && vtx_cnt < 128;
+ if (pipeline->last_vgt_api_stage != MESA_SHADER_VERTEX)
+ return false;
+
+ if (!draw_info->indirect && draw_info->count < 128)
+ return true;
+
+ /* With graphics pipeline library, NGG culling is enabled unconditionally because we don't know
+ * the primitive topology at compile time, but we should still disable it dynamically for points
+ * or lines.
+ */
+ unsigned num_vertices_per_prim = si_conv_prim_to_gs_out(d->primitive_topology) + 1;
+ if (num_vertices_per_prim != 3)
+ return true;
+
+ return false;
}
ALWAYS_INLINE static uint32_t
/* Check small draw status:
* For small draw calls, we disable culling by setting the SGPR to 0.
*/
- const bool skip =
- radv_skip_ngg_culling(stage == MESA_SHADER_TESS_EVAL, draw_info->count, draw_info->indirect);
+ const bool skip = radv_skip_ngg_culling(cmd_buffer, pipeline, draw_info);
/* See if anything changed. */
if (!dirty && skip == cmd_buffer->state.last_nggc_skip)
return radv_rast_prim_is_point(rast_prim) || radv_rast_prim_is_line(rast_prim);
}
+static inline unsigned
+radv_get_num_vertices_per_prim(const struct radv_pipeline_key *pipeline_key)
+{
+ if (pipeline_key->vs.topology == V_008958_DI_PT_NONE) {
+ /* When the topology is unknown (with graphics pipeline library), return the maximum number of
+ * vertices per primitives for VS. This is used to lower NGG (the HW will ignore the extra
+ * bits for points/lines) and also to enable NGG culling unconditionally (it will be disabled
+ * dynamically for points/lines).
+ */
+ return 3;
+ } else {
+ /* Need to add 1, because: V_028A6C_POINTLIST=0, V_028A6C_LINESTRIP=1, V_028A6C_TRISTRIP=2, etc. */
+ return si_conv_prim_to_gs_out(pipeline_key->vs.topology) + 1;
+ }
+}
+
static inline uint32_t
si_translate_stencil_op(enum VkStencilOp op)
{
BITSET_SET(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
} else if (nir->info.stage == MESA_SHADER_VERTEX) {
- if (pl_key->vs.topology == V_008958_DI_PT_NONE) {
- /* When the topology is unknown (with graphics pipeline library), use the maximum number of
- * vertices per primitives for simplicity, the HW will ignore the extra bits if points or
- * lines are used anyways.
- */
- num_vertices_per_prim = 3;
- } else {
- /* Need to add 1, because: V_028A6C_POINTLIST=0, V_028A6C_LINESTRIP=1, V_028A6C_TRISTRIP=2, etc. */
- num_vertices_per_prim = si_conv_prim_to_gs_out(pl_key->vs.topology) + 1;
- }
+ num_vertices_per_prim = radv_get_num_vertices_per_prim(pl_key);
/* Manually mark the instance ID used, so the shader can repack it. */
if (pl_key->vs.instance_rate_inputs)
uint64_t ps_inputs_read = fs_stage->nir->info.inputs_read;
- unsigned num_vertices_per_prim = si_conv_prim_to_gs_out(pipeline_key->vs.topology) + 1;
- if (es_stage->stage == MESA_SHADER_TESS_EVAL) {
+ unsigned num_vertices_per_prim = 0;
+ if (es_stage->stage == MESA_SHADER_VERTEX) {
+ num_vertices_per_prim = radv_get_num_vertices_per_prim(pipeline_key);
+ } else if (es_stage->stage == MESA_SHADER_TESS_EVAL) {
num_vertices_per_prim = es_stage->nir->info.tess.point_mode ? 1 :
es_stage->nir->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES ? 2 : 3;
}