zink: plug in the program/module parts of shadow texture mode emulation
authorMike Blumenkrantz <michael.blumenkrantz@gmail.com>
Thu, 5 Jan 2023 21:43:08 +0000 (16:43 -0500)
committerMarge Bot <emma+marge@anholt.net>
Tue, 24 Jan 2023 08:30:09 +0000 (08:30 +0000)
this is clunky because of how big the swizzle data block is,
but the gist of it is the data block is stored onto the shader module key
after all the other data, and then it gets manually hashed/compared in
relevant cases

it's gross, but so is this functionality

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20598>

src/gallium/drivers/zink/zink_program.c
src/gallium/drivers/zink/zink_program_state.hpp
src/gallium/drivers/zink/zink_types.h

index 7e211ac..8a688db 100644 (file)
@@ -131,7 +131,10 @@ create_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *scr
    const struct zink_shader_key *key = &state->shader_keys.key[stage];
    /* non-generated tcs won't use the shader key */
    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
-   zm = malloc(sizeof(struct zink_shader_module) + key->size + (!has_nonseamless ? nonseamless_size : 0) + inline_size * sizeof(uint32_t));
+   const bool shadow_needs_shader_swizzle = stage == MESA_SHADER_FRAGMENT && key->key.fs.base.shadow_needs_shader_swizzle;
+   zm = malloc(sizeof(struct zink_shader_module) + key->size +
+               (!has_nonseamless ? nonseamless_size : 0) + inline_size * sizeof(uint32_t) +
+               (shadow_needs_shader_swizzle ? sizeof(struct zink_fs_shadow_key) : 0));
    if (!zm) {
       return NULL;
    }
@@ -166,6 +169,10 @@ create_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *scr
       zm->hash = patch_vertices;
    else
       zm->hash = shader_module_hash(zm);
+   if (unlikely(shadow_needs_shader_swizzle)) {
+      memcpy(zm->key + key->size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.shadow, sizeof(struct zink_fs_shadow_key));
+      zm->hash ^= _mesa_hash_data(&ctx->di.shadow, sizeof(struct zink_fs_shadow_key));
+   }
    zm->default_variant = !inline_size && !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
    if (inline_size)
       prog->inlined_variant_count[stage]++;
@@ -185,6 +192,7 @@ get_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen
    const struct zink_shader_key *key = &state->shader_keys.key[stage];
    /* non-generated tcs won't use the shader key */
    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
+   const bool shadow_needs_shader_swizzle = stage == MESA_SHADER_FRAGMENT && unlikely(key->key.fs.base.shadow_needs_shader_swizzle);
 
    struct util_dynarray *shader_cache = &prog->shader_cache[stage][!has_nonseamless ? !!nonseamless_size : 0][has_inline ? !!inline_size : 0];
    unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
@@ -199,6 +207,12 @@ get_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen
             continue;
          if (!shader_key_matches(iter, key, inline_size, has_inline, has_nonseamless))
             continue;
+         if (unlikely(shadow_needs_shader_swizzle)) {
+            /* shadow swizzle data needs a manual compare since it's so fat */
+            if (memcmp(iter->key + iter->key_size + nonseamless_size + iter->num_uniforms * sizeof(uint32_t),
+                       &ctx->di.shadow, sizeof(struct zink_fs_shadow_key)))
+               continue;
+         }
       }
       if (i > 0) {
          struct zink_shader_module *zero = pzm[0];
@@ -221,17 +235,19 @@ create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_scr
    struct zink_shader_module *zm;
    uint16_t *key;
    unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
+   bool shadow_needs_shader_swizzle = false;
    if (zs == prog->last_vertex_stage) {
       key = (uint16_t*)&state->shader_keys_optimal.key.vs_base;
    } else if (stage == MESA_SHADER_FRAGMENT) {
       key = (uint16_t*)&state->shader_keys_optimal.key.fs;
+      shadow_needs_shader_swizzle = ctx ? ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle : false;
    } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) {
       key = (uint16_t*)&state->shader_keys_optimal.key.tcs;
    } else {
       key = NULL;
    }
    size_t key_size = sizeof(uint16_t);
-   zm = calloc(1, sizeof(struct zink_shader_module) + (key ? key_size : 0));
+   zm = calloc(1, sizeof(struct zink_shader_module) + (key ? key_size : 0) + (unlikely(shadow_needs_shader_swizzle) ? sizeof(struct zink_fs_shadow_key) : 0));
    if (!zm) {
       return NULL;
    }
@@ -254,6 +270,8 @@ create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_scr
       uint16_t *data = (uint16_t*)zm->key;
       /* sanitize actual key bits */
       *data = (*key) & mask;
+      if (unlikely(shadow_needs_shader_swizzle))
+         memcpy(&data[1], &ctx->di.shadow, sizeof(struct zink_fs_shadow_key));
    }
    zm->default_variant = !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
    util_dynarray_append(&prog->shader_cache[stage][0][0], void*, zm);
@@ -268,12 +286,14 @@ get_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen
 {
    /* non-generated tcs won't use the shader key */
    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
+   bool shadow_needs_shader_swizzle = false;
    uint16_t *key;
    unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
    if (zs == prog->last_vertex_stage) {
       key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_base;
    } else if (stage == MESA_SHADER_FRAGMENT) {
       key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.fs;
+      shadow_needs_shader_swizzle = ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle;
    } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) {
       key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs;
    } else {
@@ -291,6 +311,11 @@ get_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen
          /* no key is bigger than uint16_t */
          if (memcmp(iter->key, &val, sizeof(uint16_t)))
             continue;
+         if (unlikely(shadow_needs_shader_swizzle)) {
+            /* shadow swizzle data needs a manual compare since it's so fat */
+            if (memcmp(iter->key + sizeof(uint16_t), &ctx->di.shadow, sizeof(struct zink_fs_shadow_key)))
+               continue;
+         }
       }
       if (i > 0) {
          struct zink_shader_module *zero = pzm[0];
@@ -603,9 +628,16 @@ update_gfx_program_optimal(struct zink_context *ctx, struct zink_gfx_program *pr
       bool changed = update_gfx_shader_module_optimal(ctx, prog, ctx->last_vertex_stage->nir->info.stage);
       ctx->gfx_pipeline_state.modules_changed |= changed;
    }
-   if (ctx->gfx_pipeline_state.shader_keys_optimal.key.fs_bits != optimal_key->fs_bits) {
+   const bool shadow_needs_shader_swizzle = optimal_key->fs.shadow_needs_shader_swizzle && (ctx->dirty_gfx_stages & BITFIELD_BIT(MESA_SHADER_FRAGMENT));
+   if (ctx->gfx_pipeline_state.shader_keys_optimal.key.fs_bits != optimal_key->fs_bits ||
+       /* always recheck shadow swizzles since they aren't directly part of the key */
+       unlikely(shadow_needs_shader_swizzle)) {
       bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_FRAGMENT);
       ctx->gfx_pipeline_state.modules_changed |= changed;
+      if (unlikely(shadow_needs_shader_swizzle)) {
+         struct zink_shader_module **pzm = prog->shader_cache[MESA_SHADER_FRAGMENT][0][0].data;
+         ctx->gfx_pipeline_state.shadow = (struct zink_fs_shadow_key*)pzm[0]->key + sizeof(uint16_t);
+      }
    }
    if (prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated &&
        ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs_bits != optimal_key->tcs_bits) {
index 3c8322e..0e78e9e 100644 (file)
@@ -362,6 +362,10 @@ equals_gfx_pipeline_state(const void *a, const void *b)
    if (STAGE_MASK & STAGE_MASK_OPTIMAL) {
       if (sa->optimal_key != sb->optimal_key)
          return false;
+      if (STAGE_MASK & STAGE_MASK_OPTIMAL_SHADOW) {
+         if (sa->shadow != sb->shadow)
+            return false;
+      }
    } else {
       if (STAGE_MASK & BITFIELD_BIT(MESA_SHADER_TESS_CTRL)) {
          if (sa->modules[MESA_SHADER_TESS_CTRL] != sb->modules[MESA_SHADER_TESS_CTRL])
@@ -387,10 +391,13 @@ equals_gfx_pipeline_state(const void *a, const void *b)
 /* below is a bunch of code to pick the right equals_gfx_pipeline_state template for runtime */
 template <zink_pipeline_dynamic_state DYNAMIC_STATE, unsigned STAGE_MASK>
 static equals_gfx_pipeline_state_func
-get_optimal_gfx_pipeline_stage_eq_func(bool optimal_keys)
+get_optimal_gfx_pipeline_stage_eq_func(bool optimal_keys, bool shadow_needs_shader_swizzle)
 {
-   if (optimal_keys)
+   if (optimal_keys) {
+      if (shadow_needs_shader_swizzle)
+         return equals_gfx_pipeline_state<DYNAMIC_STATE, STAGE_MASK | STAGE_MASK_OPTIMAL | STAGE_MASK_OPTIMAL_SHADOW>;
       return equals_gfx_pipeline_state<DYNAMIC_STATE, STAGE_MASK | STAGE_MASK_OPTIMAL>;
+   }
    return equals_gfx_pipeline_state<DYNAMIC_STATE, STAGE_MASK>;
 }
 
@@ -398,6 +405,7 @@ template <zink_pipeline_dynamic_state DYNAMIC_STATE>
 static equals_gfx_pipeline_state_func
 get_gfx_pipeline_stage_eq_func(struct zink_gfx_program *prog, bool optimal_keys)
 {
+   bool shadow_needs_shader_swizzle = prog->shaders[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask > 0;
    unsigned vertex_stages = prog->stages_present & BITFIELD_MASK(MESA_SHADER_FRAGMENT);
    if (vertex_stages & BITFIELD_BIT(MESA_SHADER_TESS_CTRL)) {
       if (prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated)
@@ -407,30 +415,30 @@ get_gfx_pipeline_stage_eq_func(struct zink_gfx_program *prog, bool optimal_keys)
       if (vertex_stages == BITFIELD_MASK(MESA_SHADER_FRAGMENT))
          /* all stages */
          return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
-                                                       BITFIELD_MASK(MESA_SHADER_COMPUTE)>(optimal_keys);
+                                                       BITFIELD_MASK(MESA_SHADER_COMPUTE)>(optimal_keys, shadow_needs_shader_swizzle);
       if (vertex_stages == BITFIELD_MASK(MESA_SHADER_GEOMETRY))
          /* tess only: includes generated tcs too */
          return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
-                                                       BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys);
+                                                       BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys, shadow_needs_shader_swizzle);
       if (vertex_stages == (BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)))
          /* geom only */
          return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
-                                                       BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys);
+                                                       BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys, shadow_needs_shader_swizzle);
    }
    if (vertex_stages == (BITFIELD_MASK(MESA_SHADER_FRAGMENT) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL)))
       /* all stages but tcs */
       return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
-                                                    BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL)>(optimal_keys);
+                                                    BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL)>(optimal_keys, shadow_needs_shader_swizzle);
    if (vertex_stages == (BITFIELD_MASK(MESA_SHADER_GEOMETRY) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL)))
       /* tess only: generated tcs */
       return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
-                                                    BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~(BITFIELD_BIT(MESA_SHADER_GEOMETRY) | BITFIELD_BIT(MESA_SHADER_TESS_CTRL))>(optimal_keys);
+                                                    BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~(BITFIELD_BIT(MESA_SHADER_GEOMETRY) | BITFIELD_BIT(MESA_SHADER_TESS_CTRL))>(optimal_keys, shadow_needs_shader_swizzle);
    if (vertex_stages == (BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)))
       /* geom only */
       return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
-                                                    BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys);
+                                                    BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys, shadow_needs_shader_swizzle);
    return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
-                                                  BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT)>(optimal_keys);
+                                                 BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT)>(optimal_keys, shadow_needs_shader_swizzle);
 }
 
 equals_gfx_pipeline_state_func
index 2b493f8..616983b 100644 (file)
@@ -797,6 +797,7 @@ struct zink_gfx_pipeline_state {
    uint32_t vertex_buffers_enabled_mask;
    uint32_t vertex_strides[PIPE_MAX_ATTRIBS];
    struct zink_vertex_elements_hw_state *element_state;
+   struct zink_fs_shadow_key *shadow;
    bool sample_locations_enabled;
    enum pipe_prim_type shader_rast_prim, rast_prim; /* reduced type or max for unknown */
    union {
@@ -875,7 +876,7 @@ struct zink_shader_module {
    bool has_nonseamless;
    uint8_t num_uniforms;
    uint8_t key_size;
-   uint8_t key[0]; /* | key | uniforms | */
+   uint8_t key[0]; /* | key | uniforms | shadow swizzle | */
 };
 
 struct zink_program {
@@ -900,6 +901,7 @@ struct zink_program {
 };
 
 #define STAGE_MASK_OPTIMAL (1<<16)
+#define STAGE_MASK_OPTIMAL_SHADOW (1<<17)
 typedef bool (*equals_gfx_pipeline_state_func)(const void *a, const void *b);
 
 struct zink_gfx_library_key {