ubyte input_semantic[PIPE_MAX_SHADER_INPUTS];
ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS];
ubyte input_usage_mask[PIPE_MAX_SHADER_INPUTS];
+ ubyte input_fp16_lo_hi_valid[PIPE_MAX_SHADER_INPUTS];
ubyte output_semantic[PIPE_MAX_SHADER_OUTPUTS];
- char output_semantic_to_slot[VARYING_SLOT_TESS_MAX];
+ char output_semantic_to_slot[VARYING_SLOT_VAR15_16BIT + 1];
ubyte output_usagemask[PIPE_MAX_SHADER_OUTPUTS];
ubyte output_readmask[PIPE_MAX_SHADER_OUTPUTS];
ubyte output_streams[PIPE_MAX_SHADER_OUTPUTS];
if (mask) {
info->input_usage_mask[loc] |= mask;
+ if (bit_size == 16) {
+ if (nir_intrinsic_io_semantics(intr).high_16bits)
+ info->input_fp16_lo_hi_valid[loc] |= 0x2;
+ else
+ info->input_fp16_lo_hi_valid[loc] |= 0x1;
+ }
info->num_inputs = MAX2(info->num_inputs, loc + 1);
}
}
NIR_PASS_V(nir, nir_lower_compute_system_values, &options);
}
- if (nir->info.stage == MESA_SHADER_FRAGMENT &&
- sscreen->info.has_packed_math_16bit &&
- sscreen->b.get_shader_param(&sscreen->b, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_FP16))
- NIR_PASS_V(nir, nir_lower_mediump_io, nir_var_shader_out, 0, false);
+ if (sscreen->b.get_shader_param(&sscreen->b, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_FP16)) {
+ NIR_PASS_V(nir, nir_lower_mediump_io,
+ /* TODO: LLVM fails to compile this test if VS inputs are 16-bit:
+ * dEQP-GLES31.functional.shaders.builtin_functions.integer.bitfieldinsert.uvec3_lowp_geometry
+ */
+ (nir->info.stage != MESA_SHADER_VERTEX ? nir_var_shader_in : 0) | nir_var_shader_out,
+ BITFIELD64_BIT(VARYING_SLOT_PNTC) | BITFIELD64_RANGE(VARYING_SLOT_VAR0, 32),
+ true);
+ }
si_nir_opts(sscreen, nir, true);
}
static unsigned si_get_ps_input_cntl(struct si_context *sctx, struct si_shader *vs,
- unsigned semantic, enum glsl_interp_mode interpolate)
+ unsigned semantic, enum glsl_interp_mode interpolate,
+ ubyte fp16_lo_hi_mask)
{
struct si_shader_info *vsinfo = &vs->selector->info;
unsigned offset, ps_input_cntl = 0;
(semantic >= VARYING_SLOT_TEX0 && semantic <= VARYING_SLOT_TEX7 &&
sctx->sprite_coord_enable & (1 << (semantic - VARYING_SLOT_TEX0)))) {
ps_input_cntl |= S_028644_PT_SPRITE_TEX(1);
+ if (fp16_lo_hi_mask & 0x1) {
+ ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) |
+ S_028644_ATTR0_VALID(1);
+ }
}
int vs_slot = vsinfo->output_semantic_to_slot[semantic];
ps_input_cntl = S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(offset);
}
+
+ if (fp16_lo_hi_mask && !G_028644_PT_SPRITE_TEX(ps_input_cntl)) {
+ assert(offset <= AC_EXP_PARAM_OFFSET_31 || offset == AC_EXP_PARAM_DEFAULT_VAL_0000);
+
+ ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) |
+ S_028644_USE_DEFAULT_ATTR1(offset == AC_EXP_PARAM_DEFAULT_VAL_0000) |
+ S_028644_DEFAULT_VAL_ATTR1(0) |
+ S_028644_ATTR0_VALID(1) | /* this must be set if FP16_INTERP_MODE is set */
+ S_028644_ATTR1_VALID(!!(fp16_lo_hi_mask & 0x2));
+ }
} else {
/* VS output not found. */
if (semantic == VARYING_SLOT_PRIMITIVE_ID) {
for (i = 0; i < psinfo->num_inputs; i++) {
unsigned semantic = psinfo->input_semantic[i];
unsigned interpolate = psinfo->input_interpolate[i];
+ ubyte fp16_lo_hi_mask = psinfo->input_fp16_lo_hi_valid[i];
- spi_ps_input_cntl[num_written++] = si_get_ps_input_cntl(sctx, vs, semantic, interpolate);
+ spi_ps_input_cntl[num_written++] = si_get_ps_input_cntl(sctx, vs, semantic, interpolate,
+ fp16_lo_hi_mask);
}
if (ps->key.part.ps.prolog.color_two_side) {
unsigned semantic = VARYING_SLOT_BFC0 + i;
spi_ps_input_cntl[num_written++] = si_get_ps_input_cntl(sctx, vs, semantic,
- psinfo->color_interpolate[i]);
+ psinfo->color_interpolate[i],
+ false);
}
}
assert(num_interp == num_written);