unsigned gs_streams:8; /* xxyyzzww: 2-bit stream index for each component */
unsigned medium_precision:1; /* GLSL mediump qualifier */
unsigned per_view:1;
- unsigned _pad:7;
+ unsigned high_16bits:1; /* whether accessing low or high half of the slot */
+ unsigned _pad:6;
} nir_io_semantics;
#define NIR_INTRINSIC_MAX_INPUTS 11
void *dead_ctx)
{
uint64_t slot_mask = 0;
+ uint16_t slot_mask_16bit = 0;
if (nir_intrinsic_infos[instr->intrinsic].index_map[NIR_INTRINSIC_IO_SEMANTICS] > 0) {
nir_io_semantics semantics = nir_intrinsic_io_semantics(instr);
- if (semantics.location >= VARYING_SLOT_PATCH0) {
+ if (semantics.location >= VARYING_SLOT_PATCH0 &&
+ semantics.location <= VARYING_SLOT_PATCH31) {
/* Generic per-patch I/O. */
assert((shader->info.stage == MESA_SHADER_TESS_EVAL &&
instr->intrinsic == nir_intrinsic_load_input) ||
semantics.location -= VARYING_SLOT_PATCH0;
}
- slot_mask = BITFIELD64_RANGE(semantics.location, semantics.num_slots);
- assert(util_bitcount64(slot_mask) == semantics.num_slots);
+ if (semantics.location >= VARYING_SLOT_VAR0_16BIT &&
+ semantics.location <= VARYING_SLOT_VAR15_16BIT) {
+ /* Convert num_slots from the units of half vectors to full vectors. */
+ unsigned num_slots = (semantics.num_slots + semantics.high_16bits + 1) / 2;
+ slot_mask_16bit =
+ BITFIELD_RANGE(semantics.location - VARYING_SLOT_VAR0_16BIT, num_slots);
+ } else {
+ slot_mask = BITFIELD64_RANGE(semantics.location, semantics.num_slots);
+ assert(util_bitcount64(slot_mask) == semantics.num_slots);
+ }
}
switch (instr->intrinsic) {
shader->info.patch_inputs_read_indirectly |= slot_mask;
} else {
shader->info.inputs_read |= slot_mask;
- if (!nir_src_is_const(*nir_get_io_offset_src(instr)))
+ shader->info.inputs_read_16bit |= slot_mask_16bit;
+ if (!nir_src_is_const(*nir_get_io_offset_src(instr))) {
shader->info.inputs_read_indirectly |= slot_mask;
+ shader->info.inputs_read_indirectly_16bit |= slot_mask_16bit;
+ }
}
if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
shader->info.patch_outputs_accessed_indirectly |= slot_mask;
} else {
shader->info.outputs_read |= slot_mask;
- if (!nir_src_is_const(*nir_get_io_offset_src(instr)))
+ shader->info.outputs_read_16bit |= slot_mask_16bit;
+ if (!nir_src_is_const(*nir_get_io_offset_src(instr))) {
shader->info.outputs_accessed_indirectly |= slot_mask;
+ shader->info.outputs_accessed_indirectly_16bit |= slot_mask_16bit;
+ }
}
if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
shader->info.patch_outputs_accessed_indirectly |= slot_mask;
} else {
shader->info.outputs_written |= slot_mask;
- if (!nir_src_is_const(*nir_get_io_offset_src(instr)))
+ shader->info.outputs_written_16bit |= slot_mask_16bit;
+ if (!nir_src_is_const(*nir_get_io_offset_src(instr))) {
shader->info.outputs_accessed_indirectly |= slot_mask;
+ shader->info.outputs_accessed_indirectly_16bit |= slot_mask_16bit;
+ }
}
if (shader->info.stage == MESA_SHADER_FRAGMENT &&
shader->info.inputs_read = 0;
shader->info.outputs_written = 0;
shader->info.outputs_read = 0;
+ shader->info.inputs_read_16bit = 0;
+ shader->info.outputs_written_16bit = 0;
+ shader->info.outputs_read_16bit = 0;
+ shader->info.inputs_read_indirectly_16bit = 0;
+ shader->info.outputs_accessed_indirectly_16bit = 0;
shader->info.patch_outputs_read = 0;
shader->info.patch_inputs_read = 0;
shader->info.patch_outputs_written = 0;
}
fprintf(fp, ")");
}
- if (state->shader->info.stage == MESA_SHADER_FRAGMENT &&
- nir_intrinsic_io_semantics(instr).medium_precision) {
+ if (nir_intrinsic_io_semantics(instr).medium_precision) {
fprintf(fp, " mediump");
}
+ if (nir_intrinsic_io_semantics(instr).high_16bits) {
+ fprintf(fp, " high_16bits");
+ }
}
break;
VARYING_SLOT_VAR29,
VARYING_SLOT_VAR30,
VARYING_SLOT_VAR31,
+ /* Per-patch varyings for tessellation. */
+ VARYING_SLOT_PATCH0,
+ VARYING_SLOT_PATCH1,
+ VARYING_SLOT_PATCH2,
+ VARYING_SLOT_PATCH3,
+ VARYING_SLOT_PATCH4,
+ VARYING_SLOT_PATCH5,
+ VARYING_SLOT_PATCH6,
+ VARYING_SLOT_PATCH7,
+ VARYING_SLOT_PATCH8,
+ VARYING_SLOT_PATCH9,
+ VARYING_SLOT_PATCH10,
+ VARYING_SLOT_PATCH11,
+ VARYING_SLOT_PATCH12,
+ VARYING_SLOT_PATCH13,
+ VARYING_SLOT_PATCH14,
+ VARYING_SLOT_PATCH15,
+ VARYING_SLOT_PATCH16,
+ VARYING_SLOT_PATCH17,
+ VARYING_SLOT_PATCH18,
+ VARYING_SLOT_PATCH19,
+ VARYING_SLOT_PATCH20,
+ VARYING_SLOT_PATCH21,
+ VARYING_SLOT_PATCH22,
+ VARYING_SLOT_PATCH23,
+ VARYING_SLOT_PATCH24,
+ VARYING_SLOT_PATCH25,
+ VARYING_SLOT_PATCH26,
+ VARYING_SLOT_PATCH27,
+ VARYING_SLOT_PATCH28,
+ VARYING_SLOT_PATCH29,
+ VARYING_SLOT_PATCH30,
+ VARYING_SLOT_PATCH31,
+ /* 32 16-bit vec4 slots packed in 16 32-bit vec4 slots for GLES/mediump.
+ * They are really just additional generic slots used for 16-bit data to
+ * prevent conflicts between neighboring mediump and non-mediump varyings
+ * that can't be packed without breaking one or the other, which is
+ * a limitation of separate shaders. This allows linking shaders in 32 bits
+ * and then get an optimally packed 16-bit varyings by remapping the IO
+ * locations to these slots. The remapping can also be undone trivially.
+ *
+ * nir_io_semantics::high_16bit determines which half of the slot is
+ * accessed. The low and high halves share the same IO "base" number.
+ * Drivers can treat these as 32-bit slots everywhere except for FP16
+ * interpolation.
+ */
+ VARYING_SLOT_VAR0_16BIT,
+ VARYING_SLOT_VAR1_16BIT,
+ VARYING_SLOT_VAR2_16BIT,
+ VARYING_SLOT_VAR3_16BIT,
+ VARYING_SLOT_VAR4_16BIT,
+ VARYING_SLOT_VAR5_16BIT,
+ VARYING_SLOT_VAR6_16BIT,
+ VARYING_SLOT_VAR7_16BIT,
+ VARYING_SLOT_VAR8_16BIT,
+ VARYING_SLOT_VAR9_16BIT,
+ VARYING_SLOT_VAR10_16BIT,
+ VARYING_SLOT_VAR11_16BIT,
+ VARYING_SLOT_VAR12_16BIT,
+ VARYING_SLOT_VAR13_16BIT,
+ VARYING_SLOT_VAR14_16BIT,
+ VARYING_SLOT_VAR15_16BIT,
+
+ NUM_TOTAL_VARYING_SLOTS,
} gl_varying_slot;
#define VARYING_SLOT_MAX (VARYING_SLOT_VAR0 + MAX_VARYING)
-#define VARYING_SLOT_PATCH0 (VARYING_SLOT_MAX)
#define VARYING_SLOT_TESS_MAX (VARYING_SLOT_PATCH0 + MAX_VARYING)
#define MAX_VARYINGS_INCL_PATCH (VARYING_SLOT_TESS_MAX - VARYING_SLOT_VAR0)
/* Which system values are actually read */
BITSET_DECLARE(system_values_read, SYSTEM_VALUE_MAX);
+ /* Which 16-bit inputs and outputs are used corresponding to
+ * VARYING_SLOT_VARn_16BIT.
+ */
+ uint16_t inputs_read_16bit;
+ uint16_t outputs_written_16bit;
+ uint16_t outputs_read_16bit;
+ uint16_t inputs_read_indirectly_16bit;
+ uint16_t outputs_accessed_indirectly_16bit;
+
/* Which patch inputs are actually read */
uint32_t patch_inputs_read;
/* Which patch outputs are actually written */