system_value("tess_factor_base_ir3", 2)
system_value("tess_param_base_ir3", 2)
system_value("tcs_header_ir3", 1)
+system_value("rel_patch_id_ir3", 1)
# System values for freedreno compute shaders.
system_value("subgroup_id_shift_ir3", 1)
ENUM(SYSTEM_VALUE_RAY_GEOMETRY_INDEX),
ENUM(SYSTEM_VALUE_GS_HEADER_IR3),
ENUM(SYSTEM_VALUE_TCS_HEADER_IR3),
+ ENUM(SYSTEM_VALUE_REL_PATCH_ID_IR3),
ENUM(SYSTEM_VALUE_FRAG_SHADING_RATE),
};
STATIC_ASSERT(ARRAY_SIZE(names) == SYSTEM_VALUE_MAX);
SYSTEM_VALUE_GS_HEADER_IR3,
SYSTEM_VALUE_TCS_HEADER_IR3,
+ /* IR3 specific system value that contains the patch id for the current
+ * subdraw.
+ */
+ SYSTEM_VALUE_REL_PATCH_ID_IR3,
+
/**
* Fragment shading rate used for KHR_fragment_shading_rate (Vulkan).
*/
dEQP-VK.api.info.image_format_properties.2d.optimal.g8_b8_r8_3plane_420_unorm,Fail
dEQP-VK.graphicsfuzz.spv-stable-maze-flatten-copy-composite,Fail
dEQP-VK.graphicsfuzz.spv-stable-pillars-volatile-nontemporal-store,Fail
-dEQP-VK.pipeline.misc.primitive_id_from_tess,Fail
dEQP-VK.spirv_assembly.instruction.spirv1p4.opcopylogical.nested_arrays_different_strides,Fail
dEQP-VK.ssbo.layout.2_level_array.scalar.row_major_mat4_store_cols,Fail
dEQP-VK.ssbo.layout.2_level_array.std140.row_major_mat4_comp_access_store_cols,Fail
dst[0] = ctx->tcs_header;
break;
+ case nir_intrinsic_load_rel_patch_id_ir3:
+ dst[0] = ctx->rel_patch_id;
+ break;
+
case nir_intrinsic_load_primitive_id:
+ if (!ctx->primitive_id) {
+ ctx->primitive_id =
+ create_sysval_input(ctx, SYSTEM_VALUE_PRIMITIVE_ID, 0x1);
+ }
dst[0] = ctx->primitive_id;
break;
if (has_tess) {
ctx->tcs_header =
create_sysval_input(ctx, SYSTEM_VALUE_TCS_HEADER_IR3, 0x1);
+ ctx->rel_patch_id =
+ create_sysval_input(ctx, SYSTEM_VALUE_REL_PATCH_ID_IR3, 0x1);
ctx->primitive_id =
create_sysval_input(ctx, SYSTEM_VALUE_PRIMITIVE_ID, 0x1);
} else if (has_gs) {
case MESA_SHADER_TESS_CTRL:
ctx->tcs_header =
create_sysval_input(ctx, SYSTEM_VALUE_TCS_HEADER_IR3, 0x1);
- ctx->primitive_id =
- create_sysval_input(ctx, SYSTEM_VALUE_PRIMITIVE_ID, 0x1);
+ ctx->rel_patch_id =
+ create_sysval_input(ctx, SYSTEM_VALUE_REL_PATCH_ID_IR3, 0x1);
break;
case MESA_SHADER_TESS_EVAL:
- if (has_gs)
+ if (has_gs) {
ctx->gs_header =
create_sysval_input(ctx, SYSTEM_VALUE_GS_HEADER_IR3, 0x1);
- ctx->primitive_id =
- create_sysval_input(ctx, SYSTEM_VALUE_PRIMITIVE_ID, 0x1);
+ ctx->primitive_id =
+ create_sysval_input(ctx, SYSTEM_VALUE_PRIMITIVE_ID, 0x1);
+ }
+ ctx->rel_patch_id =
+ create_sysval_input(ctx, SYSTEM_VALUE_REL_PATCH_ID_IR3, 0x1);
break;
case MESA_SHADER_GEOMETRY:
ctx->gs_header =
create_sysval_input(ctx, SYSTEM_VALUE_GS_HEADER_IR3, 0x1);
- ctx->primitive_id =
- create_sysval_input(ctx, SYSTEM_VALUE_PRIMITIVE_ID, 0x1);
break;
default:
break;
struct ir3_instruction *out = ir3_collect(ctx, ctx->primitive_id);
outputs[outputs_count] = out;
outidxs[outputs_count] = n;
+ if (so->type == MESA_SHADER_VERTEX && ctx->rel_patch_id)
+ regids[outputs_count] = regid(0, 2);
+ else
+ regids[outputs_count] = regid(0, 1);
+ outputs_count++;
+ }
+
+ if (so->type == MESA_SHADER_VERTEX && ctx->rel_patch_id) {
+ unsigned n = so->outputs_count++;
+ so->outputs[n].slot = VARYING_SLOT_REL_PATCH_ID_IR3;
+ struct ir3_instruction *out = ir3_collect(ctx, ctx->rel_patch_id);
+ outputs[outputs_count] = out;
+ outidxs[outputs_count] = n;
regids[outputs_count] = regid(0, 1);
outputs_count++;
}
*/
ctx->tcs_header->dsts[0]->num = regid(0, 0);
- ctx->primitive_id->dsts[0]->num = regid(0, 1);
+ ctx->rel_patch_id->dsts[0]->num = regid(0, 1);
+ if (ctx->primitive_id)
+ ctx->primitive_id->dsts[0]->num = regid(0, 2);
} else if (ctx->gs_header) {
/* We need to have these values in the same registers between producer
* (VS or DS) and GS since the producer chains to GS and doesn't get
*/
ctx->gs_header->dsts[0]->num = regid(0, 0);
- ctx->primitive_id->dsts[0]->num = regid(0, 1);
+ if (ctx->primitive_id)
+ ctx->primitive_id->dsts[0]->num = regid(0, 1);
} else if (so->num_sampler_prefetch) {
assert(so->type == MESA_SHADER_FRAGMENT);
int idx = 0;
struct ir3_instruction *patch_vertices_in;
struct ir3_instruction *tcs_header;
struct ir3_instruction *tess_coord;
+ struct ir3_instruction *rel_patch_id;
/* Compute shader inputs: */
struct ir3_instruction *local_invocation_id, *work_group_id;
static bool
is_tess_levels(gl_varying_slot slot)
{
- return (slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
+ return (slot == VARYING_SLOT_PRIMITIVE_ID ||
+ slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
slot == VARYING_SLOT_TESS_LEVEL_INNER);
}
nir_ssa_def *vertex, uint32_t location, uint32_t comp,
nir_ssa_def *offset)
{
- nir_ssa_def *primitive_id = nir_load_primitive_id(b);
+ nir_ssa_def *patch_id = nir_load_rel_patch_id_ir3(b);
nir_ssa_def *patch_stride = nir_load_hs_patch_stride_ir3(b);
- nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, patch_stride);
+ nir_ssa_def *patch_offset = nir_imul24(b, patch_id, patch_stride);
nir_ssa_def *attr_offset;
if (nir_src_is_const(nir_src_for_ssa(offset))) {
const uint32_t patch_stride = 1 + inner_levels + outer_levels;
- nir_ssa_def *primitive_id = nir_load_primitive_id(b);
+ nir_ssa_def *patch_id = nir_load_rel_patch_id_ir3(b);
nir_ssa_def *patch_offset =
- nir_imul24(b, primitive_id, nir_imm_int(b, patch_stride));
+ nir_imul24(b, patch_id, nir_imm_int(b, patch_stride));
uint32_t offset;
switch (slot) {
+ case VARYING_SLOT_PRIMITIVE_ID:
+ offset = 0;
+ break;
case VARYING_SLOT_TESS_LEVEL_OUTER:
- /* There's some kind of header dword, tess levels start at index 1. */
offset = 1;
break;
case VARYING_SLOT_TESS_LEVEL_INNER:
gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
if (is_tess_levels(location)) {
- /* with tess levels are defined as float[4] and float[2],
- * but tess factor BO has smaller sizes for tris/isolines,
- * so we have to discard any writes beyond the number of
- * components for inner/outer levels */
uint32_t inner_levels, outer_levels, levels;
tess_level_components(state, &inner_levels, &outer_levels);
- if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
- levels = outer_levels;
- else
- levels = inner_levels;
-
assert(intr->src[0].ssa->num_components == 1);
nir_ssa_def *offset =
nir_iadd_imm(b, intr->src[1].ssa, nir_intrinsic_component(intr));
- nir_if *nif =
- nir_push_if(b, nir_ult(b, offset, nir_imm_int(b, levels)));
+ nir_if *nif = NULL;
+ if (location != VARYING_SLOT_PRIMITIVE_ID) {
+ /* with tess levels are defined as float[4] and float[2],
+ * but tess factor BO has smaller sizes for tris/isolines,
+ * so we have to discard any writes beyond the number of
+ * components for inner/outer levels
+ */
+ if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
+ levels = outer_levels;
+ else
+ levels = inner_levels;
+
+ nif = nir_push_if(b, nir_ult(b, offset, nir_imm_int(b, levels)));
+ }
replace_intrinsic(
b, intr, nir_intrinsic_store_global_ir3, intr->src[0].ssa,
nir_load_tess_factor_base_ir3(b),
nir_iadd(b, offset, build_tessfactor_base(b, location, state)));
- nir_pop_if(b, nif);
+ if (location != VARYING_SLOT_PRIMITIVE_ID) {
+ nir_pop_if(b, nif);
+ }
} else {
nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
nir_ssa_def *offset = build_patch_offset(
state.header = nir_load_tcs_header_ir3(&b);
+ /* If required, store gl_PrimitiveID. */
+ if (v->key.tcs_store_primid) {
+ b.cursor = nir_after_cf_list(&impl->body);
+
+ nir_store_output(&b, nir_load_primitive_id(&b), nir_imm_int(&b, 0),
+ .io_semantics = {
+ .location = VARYING_SLOT_PRIMITIVE_ID,
+ .num_slots = 1
+ });
+
+ b.cursor = nir_before_cf_list(&impl->body);
+ }
+
nir_foreach_block_safe (block, impl)
lower_tess_ctrl_block(block, &b, &state);
key->vastc_srgb = ~0;
key->vsamples = ~0;
}
+
+ if (info->stage == MESA_SHADER_TESS_CTRL)
+ key->tcs_store_primid = true;
}
}
unsigned has_gs : 1;
+ /* Whether stages after TCS read gl_PrimitiveID, used to determine
+ * whether the TCS has to store it in the tess factor BO.
+ */
+ unsigned tcs_store_primid : 1;
+
/* Whether this variant sticks to the "safe" maximum constlen,
* which guarantees that the combined stages will never go over
* the limit:
#define VARYING_SLOT_GS_HEADER_IR3 (VARYING_SLOT_MAX + 0)
#define VARYING_SLOT_GS_VERTEX_FLAGS_IR3 (VARYING_SLOT_MAX + 1)
#define VARYING_SLOT_TCS_HEADER_IR3 (VARYING_SLOT_MAX + 2)
+#define VARYING_SLOT_REL_PATCH_ID_IR3 (VARYING_SLOT_MAX + 3)
static inline uint32_t
ir3_find_sysval_regid(const struct ir3_shader_variant *so, unsigned slot)
tess_coord_x_regid + 1 :
regid(63, 0);
const uint32_t hs_rel_patch_regid = hs ?
- ir3_find_sysval_regid(hs, SYSTEM_VALUE_PRIMITIVE_ID) :
+ ir3_find_sysval_regid(hs, SYSTEM_VALUE_REL_PATCH_ID_IR3) :
regid(63, 0);
const uint32_t ds_rel_patch_regid = hs ?
- ir3_find_sysval_regid(ds, SYSTEM_VALUE_PRIMITIVE_ID) :
+ ir3_find_sysval_regid(ds, SYSTEM_VALUE_REL_PATCH_ID_IR3) :
regid(63, 0);
const uint32_t hs_invocation_regid = hs ?
ir3_find_sysval_regid(hs, SYSTEM_VALUE_TCS_HEADER_IR3) :
regid(63, 0);
- const uint32_t primitiveid_regid = gs ?
+ const uint32_t gs_primitiveid_regid = gs ?
ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID) :
regid(63, 0);
+ const uint32_t hs_primitiveid_regid = hs ?
+ ir3_find_sysval_regid(hs, SYSTEM_VALUE_PRIMITIVE_ID) :
+ regid(63, 0);
+ const uint32_t vs_primitiveid_regid = gs ? gs_primitiveid_regid :
+ hs_primitiveid_regid;
+ const uint32_t ds_primitiveid_regid = ds ?
+ ir3_find_sysval_regid(ds, SYSTEM_VALUE_PRIMITIVE_ID) :
+ regid(63, 0);
const uint32_t gsheader_regid = gs ?
ir3_find_sysval_regid(gs, SYSTEM_VALUE_GS_HEADER_IR3) :
regid(63, 0);
tu_cs_emit_pkt4(cs, REG_A6XX_VFD_CONTROL_1, 6);
tu_cs_emit(cs, A6XX_VFD_CONTROL_1_REGID4VTX(vertexid_regid) |
A6XX_VFD_CONTROL_1_REGID4INST(instanceid_regid) |
- A6XX_VFD_CONTROL_1_REGID4PRIMID(primitiveid_regid) |
+ A6XX_VFD_CONTROL_1_REGID4PRIMID(vs_primitiveid_regid) |
A6XX_VFD_CONTROL_1_REGID4VIEWID(viewid_regid));
tu_cs_emit(cs, A6XX_VFD_CONTROL_2_REGID_HSRELPATCHID(hs_rel_patch_regid) |
A6XX_VFD_CONTROL_2_REGID_INVOCATIONID(hs_invocation_regid));
tu_cs_emit(cs, A6XX_VFD_CONTROL_3_REGID_DSRELPATCHID(ds_rel_patch_regid) |
A6XX_VFD_CONTROL_3_REGID_TESSX(tess_coord_x_regid) |
A6XX_VFD_CONTROL_3_REGID_TESSY(tess_coord_y_regid) |
- 0xfc);
+ A6XX_VFD_CONTROL_3_REGID_DSPRIMID(ds_primitiveid_regid));
tu_cs_emit(cs, 0x000000fc); /* VFD_CONTROL_4 */
tu_cs_emit(cs, A6XX_VFD_CONTROL_5_REGID_GSHEADER(gsheader_regid) |
0xfc00); /* VFD_CONTROL_5 */
key.tessellation = tu6_get_tessmode(shader);
}
+ if (stage > MESA_SHADER_TESS_CTRL) {
+ if (stage == MESA_SHADER_FRAGMENT) {
+ key.tcs_store_primid = key.tcs_store_primid ||
+ (nir[stage]->info.inputs_read & (1ull << VARYING_SLOT_PRIMITIVE_ID));
+ } else {
+ key.tcs_store_primid = key.tcs_store_primid ||
+ BITSET_TEST(nir[stage]->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
+ }
+ }
+
/* Keep track of the status of each shader's active descriptor sets,
* which is set in tu_lower_io. */
desc_sets |= shader->active_desc_sets;
struct shader_info *ds_info = ir3_get_shader_info(emit.key.ds);
emit.key.key.tessellation = ir3_tess_mode(ds_info->tess.primitive_mode);
ctx->gen_dirty |= BIT(FD6_GROUP_PRIMITIVE_PARAMS);
+
+ struct shader_info *fs_info = ir3_get_shader_info(emit.key.fs);
+ emit.key.key.tcs_store_primid =
+ BITSET_TEST(ds_info->system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) ||
+ (gs_info && BITSET_TEST(gs_info->system_values_read, SYSTEM_VALUE_PRIMITIVE_ID)) ||
+ (fs_info && (fs_info->inputs_read & (1ull << VARYING_SLOT_PRIMITIVE_ID)));
}
if (emit.key.gs) {
uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid;
uint32_t smask_in_regid, smask_regid;
uint32_t stencilref_regid;
- uint32_t vertex_regid, instance_regid, layer_regid, primitive_regid;
+ uint32_t vertex_regid, instance_regid, layer_regid, vs_primitive_regid;
uint32_t hs_invocation_regid;
uint32_t tess_coord_x_regid, tess_coord_y_regid, hs_rel_patch_regid,
- ds_rel_patch_regid;
+ ds_rel_patch_regid, ds_primitive_regid;
uint32_t ij_regid[IJ_COUNT];
uint32_t gs_header_regid;
enum a6xx_threadsize fssz;
layer_regid = ir3_find_output_regid(vs, VARYING_SLOT_LAYER);
vertex_regid = ir3_find_sysval_regid(vs, SYSTEM_VALUE_VERTEX_ID);
instance_regid = ir3_find_sysval_regid(vs, SYSTEM_VALUE_INSTANCE_ID);
+ if (gs)
+ vs_primitive_regid = ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID);
+ else if (hs)
+ vs_primitive_regid = ir3_find_sysval_regid(hs, SYSTEM_VALUE_PRIMITIVE_ID);
+ else
+ vs_primitive_regid = regid(63, 0);
if (hs) {
tess_coord_x_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_TESS_COORD);
tess_coord_y_regid = next_regid(tess_coord_x_regid, 1);
- hs_rel_patch_regid = ir3_find_sysval_regid(hs, SYSTEM_VALUE_PRIMITIVE_ID);
- ds_rel_patch_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_PRIMITIVE_ID);
+ hs_rel_patch_regid = ir3_find_sysval_regid(hs, SYSTEM_VALUE_REL_PATCH_ID_IR3);
+ ds_rel_patch_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_REL_PATCH_ID_IR3);
+ ds_primitive_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_PRIMITIVE_ID);
hs_invocation_regid =
ir3_find_sysval_regid(hs, SYSTEM_VALUE_TCS_HEADER_IR3);
tess_coord_y_regid = regid(63, 0);
hs_rel_patch_regid = regid(63, 0);
ds_rel_patch_regid = regid(63, 0);
+ ds_primitive_regid = regid(63, 0);
hs_invocation_regid = regid(63, 0);
}
+ bool gs_reads_primid = false;
if (gs) {
gs_header_regid = ir3_find_sysval_regid(gs, SYSTEM_VALUE_GS_HEADER_IR3);
- primitive_regid = ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID);
+ gs_reads_primid = VALIDREG(ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID));
pos_regid = ir3_find_output_regid(gs, VARYING_SLOT_POS);
psize_regid = ir3_find_output_regid(gs, VARYING_SLOT_PSIZ);
clip0_regid = ir3_find_output_regid(gs, VARYING_SLOT_CLIP_DIST0);
layer_regid = ir3_find_output_regid(gs, VARYING_SLOT_LAYER);
} else {
gs_header_regid = regid(63, 0);
- primitive_regid = regid(63, 0);
}
if (fs->color0_mrt) {
A6XX_PC_GS_OUT_CNTL_STRIDE_IN_VPC(l.max_loc) |
CONDREG(psize_regid, A6XX_PC_GS_OUT_CNTL_PSIZE) |
CONDREG(layer_regid, A6XX_PC_GS_OUT_CNTL_LAYER) |
- CONDREG(primitive_regid, A6XX_PC_GS_OUT_CNTL_PRIMITIVE_ID) |
+ COND(gs_reads_primid, A6XX_PC_GS_OUT_CNTL_PRIMITIVE_ID) |
A6XX_PC_GS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
uint32_t output;
OUT_PKT4(ring, REG_A6XX_VFD_CONTROL_1, 6);
OUT_RING(ring, A6XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
A6XX_VFD_CONTROL_1_REGID4INST(instance_regid) |
- A6XX_VFD_CONTROL_1_REGID4PRIMID(primitive_regid) |
+ A6XX_VFD_CONTROL_1_REGID4PRIMID(vs_primitive_regid) |
0xfc000000);
OUT_RING(ring,
A6XX_VFD_CONTROL_2_REGID_HSRELPATCHID(hs_rel_patch_regid) |
A6XX_VFD_CONTROL_2_REGID_INVOCATIONID(hs_invocation_regid));
OUT_RING(ring, A6XX_VFD_CONTROL_3_REGID_DSRELPATCHID(ds_rel_patch_regid) |
A6XX_VFD_CONTROL_3_REGID_TESSX(tess_coord_x_regid) |
- A6XX_VFD_CONTROL_3_REGID_TESSY(tess_coord_y_regid) | 0xfc);
+ A6XX_VFD_CONTROL_3_REGID_TESSY(tess_coord_y_regid) |
+ A6XX_VFD_CONTROL_3_REGID_DSPRIMID(ds_primitive_regid));
OUT_RING(ring, 0x000000fc); /* VFD_CONTROL_4 */
OUT_RING(ring, A6XX_VFD_CONTROL_5_REGID_GSHEADER(gs_header_regid) |
0xfc00); /* VFD_CONTROL_5 */
spec@arb_shader_image_load_store@indexing,Crash
spec@arb_shader_storage_buffer_object@array-ssbo-auto-binding,Fail
spec@arb_shader_storage_buffer_object@linker@instance-matching-shader-storage-blocks-member-array-size-mismatch,Fail
-spec@arb_tessellation_shader@execution@fs-primitiveid-instanced,Fail
spec@arb_tessellation_shader@execution@gs-primitiveid-instanced,Fail
spec@arb_tessellation_shader@execution@invocation-counting-even,Fail
spec@arb_tessellation_shader@execution@invocation-counting-odd,Fail