for (i = 0; i < noutput; i++) {
semantic_name = outputs[i].name;
semantic_index = outputs[i].sid;
+ bool export_param = true;
+
+ switch (semantic_name) {
+ case TGSI_SEMANTIC_POSITION: /* ignore these */
+ case TGSI_SEMANTIC_PSIZE:
+ case TGSI_SEMANTIC_CLIPVERTEX:
+ case TGSI_SEMANTIC_EDGEFLAG:
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ case TGSI_SEMANTIC_CLIPDIST:
+ if (shader->key.opt.hw_vs.kill_outputs &
+ (1ull << si_shader_io_get_unique_index(semantic_name, semantic_index)))
+ export_param = false;
+ break;
+ default:
+ if (shader->key.opt.hw_vs.kill_outputs2 &
+ (1u << si_shader_io_get_unique_index2(semantic_name, semantic_index)))
+ export_param = false;
+ break;
+ }
handle_semantic:
/* Select the correct target */
break;
case TGSI_SEMANTIC_COLOR:
case TGSI_SEMANTIC_BCOLOR:
+ if (!export_param)
+ continue;
target = V_008DFC_SQ_EXP_PARAM + param_count;
assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset));
shader->info.vs_output_param_offset[i] = param_count;
case TGSI_SEMANTIC_FOG:
case TGSI_SEMANTIC_TEXCOORD:
case TGSI_SEMANTIC_GENERIC:
+ if (!export_param)
+ continue;
target = V_008DFC_SQ_EXP_PARAM + param_count;
assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset));
shader->info.vs_output_param_offset[i] = param_count;
si_init_shader_ctx(&ctx, sscreen, shader, tm);
ctx.separate_prolog = !is_monolithic;
- memset(shader->info.vs_output_param_offset, 0xff,
+ memset(shader->info.vs_output_param_offset, EXP_PARAM_UNDEFINED,
sizeof(shader->info.vs_output_param_offset));
shader->info.uses_instanceid = sel->info.uses_instanceid;
/* Common VS bits between the shader key and the epilog key. */
struct si_vs_epilog_bits {
unsigned export_prim_id:1; /* when PS needs it and GS is disabled */
- /* TODO:
- * - skip layer, viewport, clipdist, and culldist parameter exports
- * if PS doesn't read them
- */
};
/* Common TCS bits between the shader key and the epilog key. */
/* Optimization flags for asynchronous compilation only. */
union {
struct {
+ uint64_t kill_outputs; /* "get_unique_index" bits */
+ uint32_t kill_outputs2; /* "get_unique_index2" bits */
unsigned clip_disable:1;
} hw_vs; /* HW VS (it can be VS, TES, GS) */
} opt;
EXP_PARAM_DEFAULT_VAL_0001,
EXP_PARAM_DEFAULT_VAL_1110,
EXP_PARAM_DEFAULT_VAL_1111,
+ EXP_PARAM_UNDEFINED = 255,
};
/* GCN-specific shader info. */
struct si_shader_selector *vs,
struct si_shader_key *key)
{
+ struct si_shader_selector *ps = sctx->ps_shader.cso;
+
key->opt.hw_vs.clip_disable =
sctx->queued.named.rasterizer->clip_plane_enable == 0 &&
(vs->info.clipdist_writemask ||
vs->info.writes_clipvertex) &&
!vs->info.culldist_writemask;
+
+ /* Find out if PS is disabled. */
+ bool ps_disabled = ps == NULL;
+
+ /* Find out which VS outputs aren't used by the PS. */
+ uint64_t outputs_written = vs->outputs_written;
+ uint32_t outputs_written2 = vs->outputs_written2;
+ uint64_t inputs_read = 0;
+ uint32_t inputs_read2 = 0;
+
+ outputs_written &= ~0x3; /* ignore POSITION, PSIZE */
+
+ if (!ps_disabled) {
+ inputs_read = ps->inputs_read;
+ inputs_read2 = ps->inputs_read2;
+ }
+
+ uint64_t linked = outputs_written & inputs_read;
+ uint32_t linked2 = outputs_written2 & inputs_read2;
+
+ key->opt.hw_vs.kill_outputs = ~linked & outputs_written;
+ key->opt.hw_vs.kill_outputs2 = ~linked2 & outputs_written2;
}
/* Compute the key for the hw shader variant */
/* The input is loaded from parameter memory. */
ps_input_cntl |= S_028644_OFFSET(offset);
} else if (!G_028644_PT_SPRITE_TEX(ps_input_cntl)) {
- /* The input is a DEFAULT_VAL constant. */
- assert(offset >= EXP_PARAM_DEFAULT_VAL_0000 &&
- offset <= EXP_PARAM_DEFAULT_VAL_1111);
+ if (offset == EXP_PARAM_UNDEFINED) {
+ /* This can happen with depth-only rendering. */
+ offset = 0;
+ } else {
+ /* The input is a DEFAULT_VAL constant. */
+ assert(offset >= EXP_PARAM_DEFAULT_VAL_0000 &&
+ offset <= EXP_PARAM_DEFAULT_VAL_1111);
+ offset -= EXP_PARAM_DEFAULT_VAL_0000;
+ }
- offset -= EXP_PARAM_DEFAULT_VAL_0000;
ps_input_cntl = S_028644_OFFSET(0x20) |
S_028644_DEFAULT_VAL(offset);
}