const struct zink_shader_key *key = &state->shader_keys.key[stage];
/* non-generated tcs won't use the shader key */
const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
- zm = malloc(sizeof(struct zink_shader_module) + key->size + (!has_nonseamless ? nonseamless_size : 0) + inline_size * sizeof(uint32_t));
+ const bool shadow_needs_shader_swizzle = stage == MESA_SHADER_FRAGMENT && key->key.fs.base.shadow_needs_shader_swizzle;
+ zm = malloc(sizeof(struct zink_shader_module) + key->size +
+ (!has_nonseamless ? nonseamless_size : 0) + inline_size * sizeof(uint32_t) +
+ (shadow_needs_shader_swizzle ? sizeof(struct zink_fs_shadow_key) : 0));
if (!zm) {
return NULL;
}
zm->hash = patch_vertices;
else
zm->hash = shader_module_hash(zm);
+ if (unlikely(shadow_needs_shader_swizzle)) {
+ memcpy(zm->key + key->size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.shadow, sizeof(struct zink_fs_shadow_key));
+ zm->hash ^= _mesa_hash_data(&ctx->di.shadow, sizeof(struct zink_fs_shadow_key));
+ }
zm->default_variant = !inline_size && !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
if (inline_size)
prog->inlined_variant_count[stage]++;
const struct zink_shader_key *key = &state->shader_keys.key[stage];
/* non-generated tcs won't use the shader key */
const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
+ const bool shadow_needs_shader_swizzle = stage == MESA_SHADER_FRAGMENT && unlikely(key->key.fs.base.shadow_needs_shader_swizzle);
struct util_dynarray *shader_cache = &prog->shader_cache[stage][!has_nonseamless ? !!nonseamless_size : 0][has_inline ? !!inline_size : 0];
unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
continue;
if (!shader_key_matches(iter, key, inline_size, has_inline, has_nonseamless))
continue;
+ if (unlikely(shadow_needs_shader_swizzle)) {
+ /* shadow swizzle data needs a manual compare since it's so fat */
+ if (memcmp(iter->key + iter->key_size + nonseamless_size + iter->num_uniforms * sizeof(uint32_t),
+ &ctx->di.shadow, sizeof(struct zink_fs_shadow_key)))
+ continue;
+ }
}
if (i > 0) {
struct zink_shader_module *zero = pzm[0];
struct zink_shader_module *zm;
uint16_t *key;
unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
+ bool shadow_needs_shader_swizzle = false;
if (zs == prog->last_vertex_stage) {
key = (uint16_t*)&state->shader_keys_optimal.key.vs_base;
} else if (stage == MESA_SHADER_FRAGMENT) {
key = (uint16_t*)&state->shader_keys_optimal.key.fs;
+ shadow_needs_shader_swizzle = ctx ? ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle : false;
} else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) {
key = (uint16_t*)&state->shader_keys_optimal.key.tcs;
} else {
key = NULL;
}
size_t key_size = sizeof(uint16_t);
- zm = calloc(1, sizeof(struct zink_shader_module) + (key ? key_size : 0));
+ zm = calloc(1, sizeof(struct zink_shader_module) + (key ? key_size : 0) + (unlikely(shadow_needs_shader_swizzle) ? sizeof(struct zink_fs_shadow_key) : 0));
if (!zm) {
return NULL;
}
uint16_t *data = (uint16_t*)zm->key;
/* sanitize actual key bits */
*data = (*key) & mask;
+ if (unlikely(shadow_needs_shader_swizzle))
+ memcpy(&data[1], &ctx->di.shadow, sizeof(struct zink_fs_shadow_key));
}
zm->default_variant = !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
util_dynarray_append(&prog->shader_cache[stage][0][0], void*, zm);
{
/* non-generated tcs won't use the shader key */
const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
+ bool shadow_needs_shader_swizzle = false;
uint16_t *key;
unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
if (zs == prog->last_vertex_stage) {
key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_base;
} else if (stage == MESA_SHADER_FRAGMENT) {
key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.fs;
+ shadow_needs_shader_swizzle = ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle;
} else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) {
key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs;
} else {
/* no key is bigger than uint16_t */
if (memcmp(iter->key, &val, sizeof(uint16_t)))
continue;
+ if (unlikely(shadow_needs_shader_swizzle)) {
+ /* shadow swizzle data needs a manual compare since it's so fat */
+ if (memcmp(iter->key + sizeof(uint16_t), &ctx->di.shadow, sizeof(struct zink_fs_shadow_key)))
+ continue;
+ }
}
if (i > 0) {
struct zink_shader_module *zero = pzm[0];
bool changed = update_gfx_shader_module_optimal(ctx, prog, ctx->last_vertex_stage->nir->info.stage);
ctx->gfx_pipeline_state.modules_changed |= changed;
}
- if (ctx->gfx_pipeline_state.shader_keys_optimal.key.fs_bits != optimal_key->fs_bits) {
+ const bool shadow_needs_shader_swizzle = optimal_key->fs.shadow_needs_shader_swizzle && (ctx->dirty_gfx_stages & BITFIELD_BIT(MESA_SHADER_FRAGMENT));
+ if (ctx->gfx_pipeline_state.shader_keys_optimal.key.fs_bits != optimal_key->fs_bits ||
+ /* always recheck shadow swizzles since they aren't directly part of the key */
+ unlikely(shadow_needs_shader_swizzle)) {
bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_FRAGMENT);
ctx->gfx_pipeline_state.modules_changed |= changed;
+ if (unlikely(shadow_needs_shader_swizzle)) {
+ struct zink_shader_module **pzm = prog->shader_cache[MESA_SHADER_FRAGMENT][0][0].data;
+ ctx->gfx_pipeline_state.shadow = (struct zink_fs_shadow_key*)pzm[0]->key + sizeof(uint16_t);
+ }
}
if (prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated &&
ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs_bits != optimal_key->tcs_bits) {
if (STAGE_MASK & STAGE_MASK_OPTIMAL) {
if (sa->optimal_key != sb->optimal_key)
return false;
+ if (STAGE_MASK & STAGE_MASK_OPTIMAL_SHADOW) {
+ if (sa->shadow != sb->shadow)
+ return false;
+ }
} else {
if (STAGE_MASK & BITFIELD_BIT(MESA_SHADER_TESS_CTRL)) {
if (sa->modules[MESA_SHADER_TESS_CTRL] != sb->modules[MESA_SHADER_TESS_CTRL])
/* below is a bunch of code to pick the right equals_gfx_pipeline_state template for runtime */
template <zink_pipeline_dynamic_state DYNAMIC_STATE, unsigned STAGE_MASK>
static equals_gfx_pipeline_state_func
-get_optimal_gfx_pipeline_stage_eq_func(bool optimal_keys)
+get_optimal_gfx_pipeline_stage_eq_func(bool optimal_keys, bool shadow_needs_shader_swizzle)
{
- if (optimal_keys)
+ if (optimal_keys) {
+ if (shadow_needs_shader_swizzle)
+ return equals_gfx_pipeline_state<DYNAMIC_STATE, STAGE_MASK | STAGE_MASK_OPTIMAL | STAGE_MASK_OPTIMAL_SHADOW>;
return equals_gfx_pipeline_state<DYNAMIC_STATE, STAGE_MASK | STAGE_MASK_OPTIMAL>;
+ }
return equals_gfx_pipeline_state<DYNAMIC_STATE, STAGE_MASK>;
}
static equals_gfx_pipeline_state_func
get_gfx_pipeline_stage_eq_func(struct zink_gfx_program *prog, bool optimal_keys)
{
+ bool shadow_needs_shader_swizzle = prog->shaders[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask > 0;
unsigned vertex_stages = prog->stages_present & BITFIELD_MASK(MESA_SHADER_FRAGMENT);
if (vertex_stages & BITFIELD_BIT(MESA_SHADER_TESS_CTRL)) {
if (prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated)
if (vertex_stages == BITFIELD_MASK(MESA_SHADER_FRAGMENT))
/* all stages */
return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
- BITFIELD_MASK(MESA_SHADER_COMPUTE)>(optimal_keys);
+ BITFIELD_MASK(MESA_SHADER_COMPUTE)>(optimal_keys, shadow_needs_shader_swizzle);
if (vertex_stages == BITFIELD_MASK(MESA_SHADER_GEOMETRY))
/* tess only: includes generated tcs too */
return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
- BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys);
+ BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys, shadow_needs_shader_swizzle);
if (vertex_stages == (BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)))
/* geom only */
return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
- BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys);
+ BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys, shadow_needs_shader_swizzle);
}
if (vertex_stages == (BITFIELD_MASK(MESA_SHADER_FRAGMENT) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL)))
/* all stages but tcs */
return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
- BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL)>(optimal_keys);
+ BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL)>(optimal_keys, shadow_needs_shader_swizzle);
if (vertex_stages == (BITFIELD_MASK(MESA_SHADER_GEOMETRY) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL)))
/* tess only: generated tcs */
return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
- BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~(BITFIELD_BIT(MESA_SHADER_GEOMETRY) | BITFIELD_BIT(MESA_SHADER_TESS_CTRL))>(optimal_keys);
+ BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~(BITFIELD_BIT(MESA_SHADER_GEOMETRY) | BITFIELD_BIT(MESA_SHADER_TESS_CTRL))>(optimal_keys, shadow_needs_shader_swizzle);
if (vertex_stages == (BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)))
/* geom only */
return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
- BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys);
+ BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys, shadow_needs_shader_swizzle);
return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
- BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT)>(optimal_keys);
+ BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT)>(optimal_keys, shadow_needs_shader_swizzle);
}
equals_gfx_pipeline_state_func