This helps some viewperf subtests.
Only view XY culling is done. Edgeflags are always disabled with lines.
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13048>
};
static void ac_analyze_position_w(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
- struct ac_position_w_info *w)
+ struct ac_position_w_info *w, unsigned num_vertices)
{
LLVMBuilderRef builder = ctx->builder;
LLVMValueRef all_w_negative = ctx->i1true;
w->w_reflection = ctx->i1false;
w->any_w_negative = ctx->i1false;
- for (unsigned i = 0; i < 3; i++) {
+ for (unsigned i = 0; i < num_vertices; i++) {
LLVMValueRef neg_w;
neg_w = LLVMBuildFCmp(builder, LLVMRealOLT, pos[i][3], ctx->f32_0, "");
/* Compute the primitive bounding box for easy culling. */
for (unsigned chan = 0; chan < (options->cull_view_near_z ||
options->cull_view_far_z ? 3 : 2); chan++) {
+ assert(options->num_vertices >= 2);
bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]);
- bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]);
-
bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]);
- bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]);
+
+ if (options->num_vertices == 3) {
+ bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]);
+ bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]);
+ }
}
/* View culling. */
void *userdata)
{
struct ac_position_w_info w;
- ac_analyze_position_w(ctx, pos, &w);
+ ac_analyze_position_w(ctx, pos, &w, options->num_vertices);
/* W culling. */
LLVMValueRef accepted = options->cull_w ? w.w_accepted : ctx->i1true;
bool cull_w; /* cull primitives with all W < 0 */
bool use_halfz_clip_space;
+
+ uint8_t num_vertices; /* 1..3 */
};
/* Callback invoked in the inner-most branch where the primitive is accepted. */
/* Blits always use axis-aligned rectangles with 3 vertices. */
*num_vertices = 3;
return LLVMConstInt(ctx->ac.i32, 3, 0);
+ } else if (ctx->shader->key.opt.ngg_culling & SI_NGG_CULL_LINES) {
+ *num_vertices = 2;
+ return LLVMConstInt(ctx->ac.i32, 2, 0);
} else {
/* We always build up all three indices for the prim export
* independent of the primitive type. The additional garbage
/* Execute culling code. */
struct ac_cull_options options = {};
- options.cull_front = shader->key.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE;
- options.cull_back = shader->key.opt.ngg_culling & SI_NGG_CULL_BACK_FACE;
options.cull_view_xy = true;
- options.cull_small_prims = true; /* this would only be false with conservative rasterization */
- options.cull_zero_area = options.cull_front || options.cull_back;
options.cull_w = true;
+ if (shader->key.opt.ngg_culling & SI_NGG_CULL_LINES) {
+ options.num_vertices = 2;
+
+ assert(!(shader->key.opt.ngg_culling & SI_NGG_CULL_BACK_FACE));
+ assert(!(shader->key.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE));
+ assert(!(shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL));
+ } else {
+ options.num_vertices = 3;
+ options.cull_front = shader->key.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE;
+ options.cull_back = shader->key.opt.ngg_culling & SI_NGG_CULL_BACK_FACE;
+ options.cull_small_prims = true; /* this would only be false with conservative rasterization */
+ options.cull_zero_area = options.cull_front || options.cull_back;
+ }
+
/* Tell ES threads whether their vertex survived. */
LLVMValueRef params[] = {
gs_accepted,
shader->previous_stage_sel ? shader->previous_stage_sel : gs_sel;
const gl_shader_stage gs_stage = gs_sel->info.stage;
const unsigned gs_num_invocations = MAX2(gs_sel->info.base.gs.invocations, 1);
- const unsigned input_prim = si_get_input_prim(gs_sel);
+ const unsigned input_prim = si_get_input_prim(gs_sel, &shader->key);
const bool use_adjacency =
input_prim >= PIPE_PRIM_LINES_ADJACENCY && input_prim <= PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY;
const unsigned max_verts_per_prim = u_vertices_per_prim(input_prim);
((1 << PIPE_PRIM_LINES) | (1 << PIPE_PRIM_LINE_LOOP) | (1 << PIPE_PRIM_LINE_STRIP) | \
(1 << PIPE_PRIM_LINES_ADJACENCY) | (1 << PIPE_PRIM_LINE_STRIP_ADJACENCY))
+#define UTIL_ALL_PRIM_TRIANGLE_MODES \
+ ((1 << PIPE_PRIM_TRIANGLES) | (1 << PIPE_PRIM_TRIANGLE_STRIP) | \
+ (1 << PIPE_PRIM_TRIANGLE_FAN) | (1 << PIPE_PRIM_QUADS) | (1 << PIPE_PRIM_QUAD_STRIP) | \
+ (1 << PIPE_PRIM_POLYGON) | (1 << PIPE_PRIM_TRIANGLES_ADJACENCY) | \
+ (1 << PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY))
+
static inline bool util_prim_is_lines(unsigned prim)
{
return ((1 << prim) & UTIL_ALL_PRIM_LINE_MODES) != 0;
static inline bool util_rast_prim_is_triangles(unsigned prim)
{
- return ((1 << prim) &
- ((1 << PIPE_PRIM_TRIANGLES) | (1 << PIPE_PRIM_TRIANGLE_STRIP) |
- (1 << PIPE_PRIM_TRIANGLE_FAN) | (1 << PIPE_PRIM_QUADS) | (1 << PIPE_PRIM_QUAD_STRIP) |
- (1 << PIPE_PRIM_POLYGON) | (1 << PIPE_PRIM_TRIANGLES_ADJACENCY) |
- (1 << PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY)));
+ return ((1 << prim) & UTIL_ALL_PRIM_TRIANGLE_MODES) != 0;
+}
+
+static inline bool util_rast_prim_is_lines_or_triangles(unsigned prim)
+{
+ return ((1 << prim) & (UTIL_ALL_PRIM_LINE_MODES | UTIL_ALL_PRIM_TRIANGLE_MODES)) != 0;
}
/**
#define SI_NGG_CULL_GS_FAST_LAUNCH_INDEX_SIZE_PACKED(x) (((x) & 0x3) << 5) /* 0->0, 1->1, 2->2, 3->4 */
#define SI_GET_NGG_CULL_GS_FAST_LAUNCH_INDEX_SIZE_PACKED(x) (((x) >> 5) & 0x3)
#define SI_NGG_CULL_GS_FAST_LAUNCH_ALL (0xf << 3) /* GS fast launch (both prim types) */
+#define SI_NGG_CULL_LINES (1 << 7) /* the primitive type is lines */
/**
* For VS shader keys, describe any fixups required for vertex fetch.
unsigned kill_pointsize : 1;
/* For NGG VS and TES. */
- unsigned ngg_culling : 7; /* SI_NGG_CULL_* */
+ unsigned ngg_culling : 8; /* SI_NGG_CULL_* */
/* For shaders where monolithic variants have better code.
*
static inline bool gfx10_edgeflags_have_effect(struct si_shader *shader)
{
if (shader->selector->info.stage == MESA_SHADER_VERTEX &&
- !shader->selector->info.base.vs.blit_sgprs_amd)
+ !shader->selector->info.base.vs.blit_sgprs_amd &&
+ !(shader->key.opt.ngg_culling & SI_NGG_CULL_LINES))
return true;
return false;
void si_update_ps_inputs_read_or_disabled(struct si_context *sctx);
void si_update_ps_kill_enable(struct si_context *sctx);
void si_update_vrs_flat_shading(struct si_context *sctx);
-unsigned si_get_input_prim(const struct si_shader_selector *gs);
+unsigned si_get_input_prim(const struct si_shader_selector *gs, const struct si_shader_key *key);
bool si_update_ngg(struct si_context *sctx);
void si_ps_key_update_framebuffer(struct si_context *sctx);
void si_ps_key_update_framebuffer_blend(struct si_context *sctx);
if (NGG && !HAS_GS &&
/* Tessellation sets ngg_cull_vert_threshold to UINT_MAX if the prim type
- * is not triangles, so this check is only needed without tessellation. */
- (HAS_TESS || sctx->current_rast_prim == PIPE_PRIM_TRIANGLES) &&
+ * is not points, so this check is only needed without tessellation. */
+ (HAS_TESS || util_rast_prim_is_lines_or_triangles(sctx->current_rast_prim)) &&
/* Only the first draw for a shader starts with culling disabled and it's disabled
* until we pass the total_direct_count check and then it stays enabled until
* the shader is changed. This eliminates most culling on/off state changes. */
rs->ngg_cull_flags;
assert(ngg_culling); /* rasterizer state should always set this to non-zero */
+ if (util_prim_is_lines(sctx->current_rast_prim)) {
+ /* Overwrite it to mask out face cull flags. */
+ ngg_culling = SI_NGG_CULL_ENABLED | SI_NGG_CULL_LINES;
+ }
+
/* Use NGG fast launch for certain primitive types.
* A draw must have at least 1 full primitive.
* The fast launch doesn't work with tessellation.
gfx10_emit_shader_ngg_tail(sctx, shader);
}
-unsigned si_get_input_prim(const struct si_shader_selector *gs)
+unsigned si_get_input_prim(const struct si_shader_selector *gs, const struct si_shader_key *key)
{
if (gs->info.stage == MESA_SHADER_GEOMETRY)
return gs->info.base.gs.input_primitive;
return PIPE_PRIM_TRIANGLES;
}
- /* TODO: Set this correctly if the primitive type is set in the shader key. */
+ if (key->opt.ngg_culling & SI_NGG_CULL_LINES)
+ return PIPE_PRIM_LINES;
+
return PIPE_PRIM_TRIANGLES; /* worst case for all callers */
}
gs_info->base.vs.window_space_position : 0;
bool es_enable_prim_id = shader->key.mono.u.vs_export_prim_id || es_info->uses_primid;
unsigned gs_num_invocations = MAX2(gs_sel->info.base.gs.invocations, 1);
- unsigned input_prim = si_get_input_prim(gs_sel);
+ unsigned input_prim = si_get_input_prim(gs_sel, &shader->key);
bool break_wave_at_eoi = false;
struct si_pm4_state *pm4 = si_get_shader_pm4_state(shader);
if (!pm4)
sel->ngg_cull_vert_threshold = 128;
}
} else if (sel->info.stage == MESA_SHADER_TESS_EVAL) {
- if (sel->rast_prim == PIPE_PRIM_TRIANGLES &&
+ if (sel->rast_prim != PIPE_PRIM_POINTS &&
(sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_ALL) ||
sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_TESS) ||
sscreen->info.chip_class == GFX10_3))