zink: plug in the program/module parts of shadow texture mode emulation

author Mike Blumenkrantz <michael.blumenkrantz@gmail.com>

Thu, 5 Jan 2023 21:43:08 +0000 (16:43 -0500)

committer Marge Bot <emma+marge@anholt.net>

Tue, 24 Jan 2023 08:30:09 +0000 (08:30 +0000)
author Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
Thu, 5 Jan 2023 21:43:08 +0000 (16:43 -0500)
committer Marge Bot <emma+marge@anholt.net>
Tue, 24 Jan 2023 08:30:09 +0000 (08:30 +0000)
diff --git a/src/gallium/drivers/zink/zink_program.c b/src/gallium/drivers/zink/zink_program.c

index 7e211ac..8a688db 100644 (file)
--- a/src/gallium/drivers/zink/zink_program.c
+++ b/src/gallium/drivers/zink/zink_program.c
@@ -131,7 +131,10 @@ create_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *scr
     const struct zink_shader_key *key = &state->shader_keys.key[stage];
     /* non-generated tcs won't use the shader key */
     const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
-   zm = malloc(sizeof(struct zink_shader_module) + key->size + (!has_nonseamless ? nonseamless_size : 0) + inline_size * sizeof(uint32_t));
+   const bool shadow_needs_shader_swizzle = stage == MESA_SHADER_FRAGMENT && key->key.fs.base.shadow_needs_shader_swizzle;
+   zm = malloc(sizeof(struct zink_shader_module) + key->size +
+               (!has_nonseamless ? nonseamless_size : 0) + inline_size * sizeof(uint32_t) +
+               (shadow_needs_shader_swizzle ? sizeof(struct zink_fs_shadow_key) : 0));
     if (!zm) {
        return NULL;
     }
@@ -166,6 +169,10 @@ create_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *scr
        zm->hash = patch_vertices;
     else
        zm->hash = shader_module_hash(zm);
+   if (unlikely(shadow_needs_shader_swizzle)) {
+      memcpy(zm->key + key->size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.shadow, sizeof(struct zink_fs_shadow_key));
+      zm->hash ^= _mesa_hash_data(&ctx->di.shadow, sizeof(struct zink_fs_shadow_key));
+   }
     zm->default_variant = !inline_size && !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
     if (inline_size)
        prog->inlined_variant_count[stage]++;
@@ -185,6 +192,7 @@ get_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen
     const struct zink_shader_key *key = &state->shader_keys.key[stage];
     /* non-generated tcs won't use the shader key */
     const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
+   const bool shadow_needs_shader_swizzle = stage == MESA_SHADER_FRAGMENT && unlikely(key->key.fs.base.shadow_needs_shader_swizzle);
  
     struct util_dynarray *shader_cache = &prog->shader_cache[stage][!has_nonseamless ? !!nonseamless_size : 0][has_inline ? !!inline_size : 0];
     unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
@@ -199,6 +207,12 @@ get_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen
              continue;
           if (!shader_key_matches(iter, key, inline_size, has_inline, has_nonseamless))
              continue;
+         if (unlikely(shadow_needs_shader_swizzle)) {
+            /* shadow swizzle data needs a manual compare since it's so fat */
+            if (memcmp(iter->key + iter->key_size + nonseamless_size + iter->num_uniforms * sizeof(uint32_t),
+                       &ctx->di.shadow, sizeof(struct zink_fs_shadow_key)))
+               continue;
+         }
        }
        if (i > 0) {
           struct zink_shader_module *zero = pzm[0];
@@ -221,17 +235,19 @@ create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_scr
     struct zink_shader_module *zm;
     uint16_t *key;
     unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
+   bool shadow_needs_shader_swizzle = false;
     if (zs == prog->last_vertex_stage) {
        key = (uint16_t*)&state->shader_keys_optimal.key.vs_base;
     } else if (stage == MESA_SHADER_FRAGMENT) {
        key = (uint16_t*)&state->shader_keys_optimal.key.fs;
+      shadow_needs_shader_swizzle = ctx ? ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle : false;
     } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) {
        key = (uint16_t*)&state->shader_keys_optimal.key.tcs;
     } else {
        key = NULL;
     }
     size_t key_size = sizeof(uint16_t);
-   zm = calloc(1, sizeof(struct zink_shader_module) + (key ? key_size : 0));
+   zm = calloc(1, sizeof(struct zink_shader_module) + (key ? key_size : 0) + (unlikely(shadow_needs_shader_swizzle) ? sizeof(struct zink_fs_shadow_key) : 0));
     if (!zm) {
        return NULL;
     }
@@ -254,6 +270,8 @@ create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_scr
        uint16_t *data = (uint16_t*)zm->key;
        /* sanitize actual key bits */
        *data = (*key) & mask;
+      if (unlikely(shadow_needs_shader_swizzle))
+         memcpy(&data[1], &ctx->di.shadow, sizeof(struct zink_fs_shadow_key));
     }
     zm->default_variant = !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
     util_dynarray_append(&prog->shader_cache[stage][0][0], void*, zm);
@@ -268,12 +286,14 @@ get_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen
  {
     /* non-generated tcs won't use the shader key */
     const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
+   bool shadow_needs_shader_swizzle = false;
     uint16_t *key;
     unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
     if (zs == prog->last_vertex_stage) {
        key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_base;
     } else if (stage == MESA_SHADER_FRAGMENT) {
        key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.fs;
+      shadow_needs_shader_swizzle = ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle;
     } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) {
        key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs;
     } else {
@@ -291,6 +311,11 @@ get_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen
           /* no key is bigger than uint16_t */
           if (memcmp(iter->key, &val, sizeof(uint16_t)))
              continue;
+         if (unlikely(shadow_needs_shader_swizzle)) {
+            /* shadow swizzle data needs a manual compare since it's so fat */
+            if (memcmp(iter->key + sizeof(uint16_t), &ctx->di.shadow, sizeof(struct zink_fs_shadow_key)))
+               continue;
+         }
        }
        if (i > 0) {
           struct zink_shader_module *zero = pzm[0];
@@ -603,9 +628,16 @@ update_gfx_program_optimal(struct zink_context *ctx, struct zink_gfx_program *pr
        bool changed = update_gfx_shader_module_optimal(ctx, prog, ctx->last_vertex_stage->nir->info.stage);
        ctx->gfx_pipeline_state.modules_changed |= changed;
     }
-   if (ctx->gfx_pipeline_state.shader_keys_optimal.key.fs_bits != optimal_key->fs_bits) {
+   const bool shadow_needs_shader_swizzle = optimal_key->fs.shadow_needs_shader_swizzle && (ctx->dirty_gfx_stages & BITFIELD_BIT(MESA_SHADER_FRAGMENT));
+   if (ctx->gfx_pipeline_state.shader_keys_optimal.key.fs_bits != optimal_key->fs_bits ||
+       /* always recheck shadow swizzles since they aren't directly part of the key */
+       unlikely(shadow_needs_shader_swizzle)) {
        bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_FRAGMENT);
        ctx->gfx_pipeline_state.modules_changed |= changed;
+      if (unlikely(shadow_needs_shader_swizzle)) {
+         struct zink_shader_module **pzm = prog->shader_cache[MESA_SHADER_FRAGMENT][0][0].data;
+         ctx->gfx_pipeline_state.shadow = (struct zink_fs_shadow_key*)pzm[0]->key + sizeof(uint16_t);
+      }
     }
     if (prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated &&
         ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs_bits != optimal_key->tcs_bits) {
diff --git a/src/gallium/drivers/zink/zink_program_state.hpp b/src/gallium/drivers/zink/zink_program_state.hpp

index 3c8322e..0e78e9e 100644 (file)
--- a/src/gallium/drivers/zink/zink_program_state.hpp
+++ b/src/gallium/drivers/zink/zink_program_state.hpp
@@ -362,6 +362,10 @@ equals_gfx_pipeline_state(const void *a, const void *b)
     if (STAGE_MASK & STAGE_MASK_OPTIMAL) {
        if (sa->optimal_key != sb->optimal_key)
           return false;
+      if (STAGE_MASK & STAGE_MASK_OPTIMAL_SHADOW) {
+         if (sa->shadow != sb->shadow)
+            return false;
+      }
     } else {
        if (STAGE_MASK & BITFIELD_BIT(MESA_SHADER_TESS_CTRL)) {
           if (sa->modules[MESA_SHADER_TESS_CTRL] != sb->modules[MESA_SHADER_TESS_CTRL])
@@ -387,10 +391,13 @@ equals_gfx_pipeline_state(const void *a, const void *b)
  /* below is a bunch of code to pick the right equals_gfx_pipeline_state template for runtime */
  template <zink_pipeline_dynamic_state DYNAMIC_STATE, unsigned STAGE_MASK>
  static equals_gfx_pipeline_state_func
-get_optimal_gfx_pipeline_stage_eq_func(bool optimal_keys)
+get_optimal_gfx_pipeline_stage_eq_func(bool optimal_keys, bool shadow_needs_shader_swizzle)
  {
-   if (optimal_keys)
+   if (optimal_keys) {
+      if (shadow_needs_shader_swizzle)
+         return equals_gfx_pipeline_state<DYNAMIC_STATE, STAGE_MASK | STAGE_MASK_OPTIMAL | STAGE_MASK_OPTIMAL_SHADOW>;
        return equals_gfx_pipeline_state<DYNAMIC_STATE, STAGE_MASK | STAGE_MASK_OPTIMAL>;
+   }
     return equals_gfx_pipeline_state<DYNAMIC_STATE, STAGE_MASK>;
  }
  
@@ -398,6 +405,7 @@ template <zink_pipeline_dynamic_state DYNAMIC_STATE>
  static equals_gfx_pipeline_state_func
  get_gfx_pipeline_stage_eq_func(struct zink_gfx_program *prog, bool optimal_keys)
  {
+   bool shadow_needs_shader_swizzle = prog->shaders[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask > 0;
     unsigned vertex_stages = prog->stages_present & BITFIELD_MASK(MESA_SHADER_FRAGMENT);
     if (vertex_stages & BITFIELD_BIT(MESA_SHADER_TESS_CTRL)) {
        if (prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated)
@@ -407,30 +415,30 @@ get_gfx_pipeline_stage_eq_func(struct zink_gfx_program *prog, bool optimal_keys)
        if (vertex_stages == BITFIELD_MASK(MESA_SHADER_FRAGMENT))
           /* all stages */
           return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
-                                                       BITFIELD_MASK(MESA_SHADER_COMPUTE)>(optimal_keys);
+                                                       BITFIELD_MASK(MESA_SHADER_COMPUTE)>(optimal_keys, shadow_needs_shader_swizzle);
        if (vertex_stages == BITFIELD_MASK(MESA_SHADER_GEOMETRY))
           /* tess only: includes generated tcs too */
           return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
-                                                       BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys);
+                                                       BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys, shadow_needs_shader_swizzle);
        if (vertex_stages == (BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)))
           /* geom only */
           return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
-                                                       BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys);
+                                                       BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys, shadow_needs_shader_swizzle);
     }
     if (vertex_stages == (BITFIELD_MASK(MESA_SHADER_FRAGMENT) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL)))
        /* all stages but tcs */
        return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
-                                                    BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL)>(optimal_keys);
+                                                    BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL)>(optimal_keys, shadow_needs_shader_swizzle);
     if (vertex_stages == (BITFIELD_MASK(MESA_SHADER_GEOMETRY) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL)))
        /* tess only: generated tcs */
        return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
-                                                    BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~(BITFIELD_BIT(MESA_SHADER_GEOMETRY) | BITFIELD_BIT(MESA_SHADER_TESS_CTRL))>(optimal_keys);
+                                                    BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~(BITFIELD_BIT(MESA_SHADER_GEOMETRY) | BITFIELD_BIT(MESA_SHADER_TESS_CTRL))>(optimal_keys, shadow_needs_shader_swizzle);
     if (vertex_stages == (BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)))
        /* geom only */
        return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
-                                                    BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys);
+                                                    BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys, shadow_needs_shader_swizzle);
     return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
-                                                  BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT)>(optimal_keys);
+                                                 BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT)>(optimal_keys, shadow_needs_shader_swizzle);
  }
  
  equals_gfx_pipeline_state_func
diff --git a/src/gallium/drivers/zink/zink_types.h b/src/gallium/drivers/zink/zink_types.h

index 2b493f8..616983b 100644 (file)
--- a/src/gallium/drivers/zink/zink_types.h
+++ b/src/gallium/drivers/zink/zink_types.h
@@ -797,6 +797,7 @@ struct zink_gfx_pipeline_state {
     uint32_t vertex_buffers_enabled_mask;
     uint32_t vertex_strides[PIPE_MAX_ATTRIBS];
     struct zink_vertex_elements_hw_state *element_state;
+   struct zink_fs_shadow_key *shadow;
     bool sample_locations_enabled;
     enum pipe_prim_type shader_rast_prim, rast_prim; /* reduced type or max for unknown */
     union {
@@ -875,7 +876,7 @@ struct zink_shader_module {
     bool has_nonseamless;
     uint8_t num_uniforms;
     uint8_t key_size;
-   uint8_t key[0]; /* | key | uniforms | */
+   uint8_t key[0]; /* | key | uniforms | shadow swizzle | */
  };
  
  struct zink_program {
@@ -900,6 +901,7 @@ struct zink_program {
  };
  
  #define STAGE_MASK_OPTIMAL (1<<16)
+#define STAGE_MASK_OPTIMAL_SHADOW (1<<17)
  typedef bool (*equals_gfx_pipeline_state_func)(const void *a, const void *b);
  
  struct zink_gfx_library_key {
author	Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
	Thu, 5 Jan 2023 21:43:08 +0000 (16:43 -0500)
committer	Marge Bot <emma+marge@anholt.net>
	Tue, 24 Jan 2023 08:30:09 +0000 (08:30 +0000)
src/gallium/drivers/zink/zink_program.c		patch \| blob \| history
src/gallium/drivers/zink/zink_program_state.hpp		patch \| blob \| history
src/gallium/drivers/zink/zink_types.h		patch \| blob \| history