src/gallium/drivers/zink/zink_program.c

   1 /*
   2  * Copyright 2018 Collabora Ltd.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * on the rights to use, copy, modify, merge, publish, distribute, sub
   8  * license, and/or sell copies of the Software, and to permit persons to whom
   9  * the Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #include "zink_program.h"
  25
  26 #include "zink_compiler.h"
  27 #include "zink_context.h"
  28 #include "zink_descriptors.h"
  29 #include "zink_helpers.h"
  30 #include "zink_pipeline.h"
  31 #include "zink_render_pass.h"
  32 #include "zink_resource.h"
  33 #include "zink_screen.h"
  34 #include "zink_state.h"
  35 #include "zink_inlines.h"
  36
  37 #include "util/u_debug.h"
  38 #include "util/u_memory.h"
  39 #include "util/u_prim.h"
  40 #include "nir_serialize.h"
  41 #include "nir/nir_draw_helpers.h"
  42
  43 /* for pipeline cache */
  44 #define XXH_INLINE_ALL
  45 #include "util/xxhash.h"
  46
  47 static void
  48 precompile_job(void *data, void *gdata, int thread_index);
  49 struct zink_gfx_program *
  50 create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch);
  51
  52 void
  53 debug_describe_zink_gfx_program(char *buf, const struct zink_gfx_program *ptr)
  54 {
  55    sprintf(buf, "zink_gfx_program");
  56 }
  57
  58 void
  59 debug_describe_zink_compute_program(char *buf, const struct zink_compute_program *ptr)
  60 {
  61    sprintf(buf, "zink_compute_program");
  62 }
  63
  64 ALWAYS_INLINE static bool
  65 shader_key_matches_tcs_nongenerated(const struct zink_shader_module *zm, const struct zink_shader_key *key, unsigned num_uniforms)
  66 {
  67    if (zm->num_uniforms != num_uniforms || zm->has_nonseamless != !!key->base.nonseamless_cube_mask ||
  68        zm->needs_zs_shader_swizzle != key->base.needs_zs_shader_swizzle)
  69       return false;
  70    const uint32_t nonseamless_size = zm->has_nonseamless ? sizeof(uint32_t) : 0;
  71    return (!nonseamless_size || !memcmp(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size)) &&
  72           (!num_uniforms || !memcmp(zm->key + zm->key_size + nonseamless_size,
  73                                     key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t)));
  74 }
  75
  76 ALWAYS_INLINE static bool
  77 shader_key_matches(const struct zink_shader_module *zm,
  78                    const struct zink_shader_key *key, unsigned num_uniforms,
  79                    bool has_inline, bool has_nonseamless)
  80 {
  81    const uint32_t nonseamless_size = !has_nonseamless && zm->has_nonseamless ? sizeof(uint32_t) : 0;
  82    if (has_inline) {
  83       if (zm->num_uniforms != num_uniforms ||
  84           (num_uniforms &&
  85            memcmp(zm->key + zm->key_size + nonseamless_size,
  86                   key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t))))
  87          return false;
  88    }
  89    if (!has_nonseamless) {
  90       if (zm->has_nonseamless != !!key->base.nonseamless_cube_mask ||
  91           (nonseamless_size && memcmp(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size)))
  92          return false;
  93    }
  94    if (zm->needs_zs_shader_swizzle != key->base.needs_zs_shader_swizzle)
  95       return false;
  96    return !memcmp(zm->key, key, zm->key_size);
  97 }
  98
  99 static uint32_t
 100 shader_module_hash(const struct zink_shader_module *zm)
 101 {
 102    const uint32_t nonseamless_size = zm->has_nonseamless ? sizeof(uint32_t) : 0;
 103    unsigned key_size = zm->key_size + nonseamless_size + zm->num_uniforms * sizeof(uint32_t);
 104    return _mesa_hash_data(zm->key, key_size);
 105 }
 106
 107 ALWAYS_INLINE static void
 108 gather_shader_module_info(struct zink_context *ctx, struct zink_screen *screen,
 109                           struct zink_shader *zs, struct zink_gfx_program *prog,
 110                           struct zink_gfx_pipeline_state *state,
 111                           bool has_inline, //is inlining enabled?
 112                           bool has_nonseamless, //is nonseamless ext present?
 113                           unsigned *inline_size, unsigned *nonseamless_size)
 114 {
 115    gl_shader_stage stage = zs->info.stage;
 116    struct zink_shader_key *key = &state->shader_keys.key[stage];
 117    if (has_inline && ctx && zs->info.num_inlinable_uniforms &&
 118        ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(stage)) {
 119       if (zs->can_inline && (screen->is_cpu || prog->inlined_variant_count[stage] < ZINK_MAX_INLINED_VARIANTS))
 120          *inline_size = zs->info.num_inlinable_uniforms;
 121       else
 122          key->inline_uniforms = false;
 123    }
 124    if (!has_nonseamless && key->base.nonseamless_cube_mask)
 125       *nonseamless_size = sizeof(uint32_t);
 126 }
 127
 128 ALWAYS_INLINE static struct zink_shader_module *
 129 create_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen,
 130                                struct zink_shader *zs, struct zink_gfx_program *prog,
 131                                gl_shader_stage stage,
 132                                struct zink_gfx_pipeline_state *state,
 133                                unsigned inline_size, unsigned nonseamless_size,
 134                                bool has_inline, //is inlining enabled?
 135                                bool has_nonseamless) //is nonseamless ext present?
 136 {
 137    VkShaderModule mod;
 138    struct zink_shader_module *zm;
 139    const struct zink_shader_key *key = &state->shader_keys.key[stage];
 140    /* non-generated tcs won't use the shader key */
 141    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
 142    const bool shadow_needs_shader_swizzle = key->base.needs_zs_shader_swizzle ||
 143                                             (stage == MESA_SHADER_FRAGMENT && key->key.fs.base.shadow_needs_shader_swizzle);
 144    zm = malloc(sizeof(struct zink_shader_module) + key->size +
 145                (!has_nonseamless ? nonseamless_size : 0) + inline_size * sizeof(uint32_t) +
 146                (shadow_needs_shader_swizzle ? sizeof(struct zink_zs_swizzle_key) : 0));
 147    if (!zm) {
 148       return NULL;
 149    }
 150    unsigned patch_vertices = state->shader_keys.key[MESA_SHADER_TESS_CTRL].key.tcs.patch_vertices;
 151    if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) {
 152       assert(ctx); //TODO async
 153       struct zink_shader_object obj = zink_shader_tcs_compile(screen, zs, patch_vertices);
 154       mod = obj.mod;
 155    } else {
 156       mod = zink_shader_compile(screen, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]), key, &ctx->di.zs_swizzle[stage]);
 157    }
 158    if (!mod) {
 159       FREE(zm);
 160       return NULL;
 161    }
 162    zm->shader = mod;
 163    zm->num_uniforms = inline_size;
 164    if (!is_nongenerated_tcs) {
 165       zm->key_size = key->size;
 166       memcpy(zm->key, key, key->size);
 167    } else {
 168       zm->key_size = 0;
 169       memset(zm->key, 0, key->size);
 170    }
 171    if (!has_nonseamless && nonseamless_size) {
 172       /* nonseamless mask gets added to base key if it exists */
 173       memcpy(zm->key + key->size, &key->base.nonseamless_cube_mask, nonseamless_size);
 174    }
 175    zm->needs_zs_shader_swizzle = shadow_needs_shader_swizzle;
 176    zm->has_nonseamless = has_nonseamless ? 0 : !!nonseamless_size;
 177    if (inline_size)
 178       memcpy(zm->key + key->size + nonseamless_size, key->base.inlined_uniform_values, inline_size * sizeof(uint32_t));
 179    if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
 180       zm->hash = patch_vertices;
 181    else
 182       zm->hash = shader_module_hash(zm);
 183    if (unlikely(shadow_needs_shader_swizzle)) {
 184       memcpy(zm->key + key->size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key));
 185       zm->hash ^= _mesa_hash_data(&ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key));
 186    }
 187    zm->default_variant = !shadow_needs_shader_swizzle && !inline_size && !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
 188    if (inline_size)
 189       prog->inlined_variant_count[stage]++;
 190    util_dynarray_append(&prog->shader_cache[stage][has_nonseamless ? 0 : !!nonseamless_size][!!inline_size], void*, zm);
 191    return zm;
 192 }
 193
 194 ALWAYS_INLINE static struct zink_shader_module *
 195 get_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen,
 196                             struct zink_shader *zs, struct zink_gfx_program *prog,
 197                             gl_shader_stage stage,
 198                             struct zink_gfx_pipeline_state *state,
 199                             unsigned inline_size, unsigned nonseamless_size,
 200                             bool has_inline, //is inlining enabled?
 201                             bool has_nonseamless) //is nonseamless ext present?
 202 {
 203    const struct zink_shader_key *key = &state->shader_keys.key[stage];
 204    /* non-generated tcs won't use the shader key */
 205    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
 206    const bool shadow_needs_shader_swizzle = unlikely(key->base.needs_zs_shader_swizzle) ||
 207                                             (stage == MESA_SHADER_FRAGMENT && unlikely(key->key.fs.base.shadow_needs_shader_swizzle));
 208
 209    struct util_dynarray *shader_cache = &prog->shader_cache[stage][!has_nonseamless ? !!nonseamless_size : 0][has_inline ? !!inline_size : 0];
 210    unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
 211    struct zink_shader_module **pzm = shader_cache->data;
 212    for (unsigned i = 0; i < count; i++) {
 213       struct zink_shader_module *iter = pzm[i];
 214       if (is_nongenerated_tcs) {
 215          if (!shader_key_matches_tcs_nongenerated(iter, key, has_inline ? !!inline_size : 0))
 216             continue;
 217       } else {
 218          if (stage == MESA_SHADER_VERTEX && iter->key_size != key->size)
 219             continue;
 220          if (!shader_key_matches(iter, key, inline_size, has_inline, has_nonseamless))
 221             continue;
 222          if (unlikely(shadow_needs_shader_swizzle)) {
 223             /* shadow swizzle data needs a manual compare since it's so fat */
 224             if (memcmp(iter->key + iter->key_size + nonseamless_size + iter->num_uniforms * sizeof(uint32_t),
 225                        &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key)))
 226                continue;
 227          }
 228       }
 229       if (i > 0) {
 230          struct zink_shader_module *zero = pzm[0];
 231          pzm[0] = iter;
 232          pzm[i] = zero;
 233       }
 234       return iter;
 235    }
 236
 237    return NULL;
 238 }
 239
 240 ALWAYS_INLINE static struct zink_shader_module *
 241 create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen *screen,
 242                                        struct zink_shader *zs, struct zink_gfx_program *prog,
 243                                        gl_shader_stage stage,
 244                                        struct zink_gfx_pipeline_state *state)
 245 {
 246    VkShaderModule mod;
 247    struct zink_shader_module *zm;
 248    uint16_t *key;
 249    unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
 250    bool shadow_needs_shader_swizzle = false;
 251    if (zs == prog->last_vertex_stage) {
 252       key = (uint16_t*)&state->shader_keys_optimal.key.vs_base;
 253    } else if (stage == MESA_SHADER_FRAGMENT) {
 254       key = (uint16_t*)&state->shader_keys_optimal.key.fs;
 255       shadow_needs_shader_swizzle = ctx ? ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle : false;
 256    } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) {
 257       key = (uint16_t*)&state->shader_keys_optimal.key.tcs;
 258    } else {
 259       key = NULL;
 260    }
 261    size_t key_size = sizeof(uint16_t);
 262    zm = calloc(1, sizeof(struct zink_shader_module) + (key ? key_size : 0) + (unlikely(shadow_needs_shader_swizzle) ? sizeof(struct zink_zs_swizzle_key) : 0));
 263    if (!zm) {
 264       return NULL;
 265    }
 266    if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) {
 267       assert(ctx); //TODO async
 268       struct zink_tcs_key *tcs = (struct zink_tcs_key*)key;
 269       struct zink_shader_object obj = zink_shader_tcs_compile(screen, zs, tcs->patch_vertices);
 270       mod = obj.mod;
 271    } else {
 272       mod = zink_shader_compile(screen, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]), (struct zink_shader_key*)key, shadow_needs_shader_swizzle ? &ctx->di.zs_swizzle[stage] : NULL);
 273    }
 274    if (!mod) {
 275       FREE(zm);
 276       return NULL;
 277    }
 278    zm->shader = mod;
 279    /* non-generated tcs won't use the shader key */
 280    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
 281    if (key && !is_nongenerated_tcs) {
 282       zm->key_size = key_size;
 283       uint16_t *data = (uint16_t*)zm->key;
 284       /* sanitize actual key bits */
 285       *data = (*key) & mask;
 286       if (unlikely(shadow_needs_shader_swizzle))
 287          memcpy(&data[1], &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key));
 288    }
 289    zm->default_variant = !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
 290    util_dynarray_append(&prog->shader_cache[stage][0][0], void*, zm);
 291    return zm;
 292 }
 293
 294 ALWAYS_INLINE static struct zink_shader_module *
 295 get_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen *screen,
 296                                     struct zink_shader *zs, struct zink_gfx_program *prog,
 297                                     gl_shader_stage stage,
 298                                     struct zink_gfx_pipeline_state *state)
 299 {
 300    /* non-generated tcs won't use the shader key */
 301    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
 302    bool shadow_needs_shader_swizzle = false;
 303    uint16_t *key;
 304    unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
 305    if (zs == prog->last_vertex_stage) {
 306       key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_base;
 307    } else if (stage == MESA_SHADER_FRAGMENT) {
 308       key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.fs;
 309       shadow_needs_shader_swizzle = ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle;
 310    } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) {
 311       key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs;
 312    } else {
 313       key = NULL;
 314    }
 315    struct util_dynarray *shader_cache = &prog->shader_cache[stage][0][0];
 316    unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
 317    struct zink_shader_module **pzm = shader_cache->data;
 318    for (unsigned i = 0; i < count; i++) {
 319       struct zink_shader_module *iter = pzm[i];
 320       if (is_nongenerated_tcs) {
 321          /* always match */
 322       } else if (key) {
 323          uint16_t val = (*key) & mask;
 324          /* no key is bigger than uint16_t */
 325          if (memcmp(iter->key, &val, sizeof(uint16_t)))
 326             continue;
 327          if (unlikely(shadow_needs_shader_swizzle)) {
 328             /* shadow swizzle data needs a manual compare since it's so fat */
 329             if (memcmp(iter->key + sizeof(uint16_t), &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key)))
 330                continue;
 331          }
 332       }
 333       if (i > 0) {
 334          struct zink_shader_module *zero = pzm[0];
 335          pzm[0] = iter;
 336          pzm[i] = zero;
 337       }
 338       return iter;
 339    }
 340
 341    return NULL;
 342 }
 343
 344 static void
 345 zink_destroy_shader_module(struct zink_screen *screen, struct zink_shader_module *zm)
 346 {
 347    VKSCR(DestroyShaderModule)(screen->dev, zm->shader, NULL);
 348    free(zm);
 349 }
 350
 351 static void
 352 destroy_shader_cache(struct zink_screen *screen, struct util_dynarray *sc)
 353 {
 354    while (util_dynarray_contains(sc, void*)) {
 355       struct zink_shader_module *zm = util_dynarray_pop(sc, struct zink_shader_module*);
 356       zink_destroy_shader_module(screen, zm);
 357    }
 358 }
 359
 360 ALWAYS_INLINE static void
 361 update_gfx_shader_modules(struct zink_context *ctx,
 362                       struct zink_screen *screen,
 363                       struct zink_gfx_program *prog, uint32_t mask,
 364                       struct zink_gfx_pipeline_state *state,
 365                       bool has_inline, //is inlining enabled?
 366                       bool has_nonseamless) //is nonseamless ext present?
 367 {
 368    bool hash_changed = false;
 369    bool default_variants = true;
 370    assert(prog->modules[MESA_SHADER_VERTEX]);
 371    uint32_t variant_hash = prog->last_variant_hash;
 372    prog->has_edgeflags = prog->shaders[MESA_SHADER_VERTEX]->has_edgeflags;
 373    for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
 374       if (!(mask & BITFIELD_BIT(i)))
 375          continue;
 376
 377       assert(prog->shaders[i]);
 378
 379       unsigned inline_size = 0, nonseamless_size = 0;
 380       gather_shader_module_info(ctx, screen, prog->shaders[i], prog, state, has_inline, has_nonseamless, &inline_size, &nonseamless_size);
 381       struct zink_shader_module *zm = get_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
 382                                                                   inline_size, nonseamless_size, has_inline, has_nonseamless);
 383       if (!zm)
 384          zm = create_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
 385                                              inline_size, nonseamless_size, has_inline, has_nonseamless);
 386       state->modules[i] = zm->shader;
 387       if (prog->modules[i] == zm->shader)
 388          continue;
 389       prog->optimal_keys &= !prog->shaders[i]->non_fs.is_generated;
 390       variant_hash ^= prog->module_hash[i];
 391       hash_changed = true;
 392       default_variants &= zm->default_variant;
 393       prog->modules[i] = zm->shader;
 394       prog->module_hash[i] = zm->hash;
 395       if (has_inline) {
 396          if (zm->num_uniforms)
 397             prog->inline_variants |= BITFIELD_BIT(i);
 398          else
 399             prog->inline_variants &= ~BITFIELD_BIT(i);
 400       }
 401       variant_hash ^= prog->module_hash[i];
 402    }
 403
 404    if (hash_changed && state) {
 405       if (default_variants)
 406          prog->last_variant_hash = prog->default_variant_hash;
 407       else
 408          prog->last_variant_hash = variant_hash;
 409
 410       state->modules_changed = true;
 411    }
 412 }
 413
 414 static void
 415 generate_gfx_program_modules(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
 416 {
 417    assert(!prog->modules[MESA_SHADER_VERTEX]);
 418    uint32_t variant_hash = 0;
 419    bool default_variants = true;
 420    for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
 421       if (!(prog->stages_present & BITFIELD_BIT(i)))
 422          continue;
 423
 424       assert(prog->shaders[i]);
 425
 426       unsigned inline_size = 0, nonseamless_size = 0;
 427       gather_shader_module_info(ctx, screen, prog->shaders[i], prog, state,
 428                                 screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map,
 429                                 &inline_size, &nonseamless_size);
 430       struct zink_shader_module *zm = create_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
 431                                                                      inline_size, nonseamless_size,
 432                                                                      screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map);
 433       state->modules[i] = zm->shader;
 434       prog->modules[i] = zm->shader;
 435       prog->module_hash[i] = zm->hash;
 436       if (zm->num_uniforms)
 437          prog->inline_variants |= BITFIELD_BIT(i);
 438       default_variants &= zm->default_variant;
 439       variant_hash ^= prog->module_hash[i];
 440    }
 441
 442    p_atomic_dec(&prog->base.reference.count);
 443    state->modules_changed = true;
 444
 445    prog->last_variant_hash = variant_hash;
 446    if (default_variants)
 447       prog->default_variant_hash = prog->last_variant_hash;
 448 }
 449
 450 static void
 451 generate_gfx_program_modules_optimal(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
 452 {
 453    assert(!prog->modules[MESA_SHADER_VERTEX]);
 454    for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
 455       if (!(prog->stages_present & BITFIELD_BIT(i)))
 456          continue;
 457
 458       assert(prog->shaders[i]);
 459
 460       struct zink_shader_module *zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[i], prog, i, state);
 461       prog->modules[i] = zm->shader;
 462    }
 463
 464    p_atomic_dec(&prog->base.reference.count);
 465    state->modules_changed = true;
 466    prog->last_variant_hash = state->shader_keys_optimal.key.val;
 467 }
 468
 469 static uint32_t
 470 hash_pipeline_lib_generated_tcs(const void *key)
 471 {
 472    const struct zink_gfx_library_key *gkey = key;
 473    return gkey->optimal_key;
 474 }
 475
 476
 477 static bool
 478 equals_pipeline_lib_generated_tcs(const void *a, const void *b)
 479 {
 480    return !memcmp(a, b, sizeof(uint32_t));
 481 }
 482
 483 static uint32_t
 484 hash_pipeline_lib(const void *key)
 485 {
 486    const struct zink_gfx_library_key *gkey = key;
 487    /* remove generated tcs bits */
 488    return zink_shader_key_optimal_no_tcs(gkey->optimal_key);
 489 }
 490
 491 static bool
 492 equals_pipeline_lib(const void *a, const void *b)
 493 {
 494    const struct zink_gfx_library_key *ak = a;
 495    const struct zink_gfx_library_key *bk = b;
 496    /* remove generated tcs bits */
 497    uint32_t val_a = zink_shader_key_optimal_no_tcs(ak->optimal_key);
 498    uint32_t val_b = zink_shader_key_optimal_no_tcs(bk->optimal_key);
 499    return val_a == val_b;
 500 }
 501
 502 uint32_t
 503 hash_gfx_input_dynamic(const void *key)
 504 {
 505    const struct zink_gfx_input_key *ikey = key;
 506    return ikey->idx;
 507 }
 508
 509 static bool
 510 equals_gfx_input_dynamic(const void *a, const void *b)
 511 {
 512    const struct zink_gfx_input_key *ikey_a = a;
 513    const struct zink_gfx_input_key *ikey_b = b;
 514    return ikey_a->idx == ikey_b->idx;
 515 }
 516
 517 uint32_t
 518 hash_gfx_input(const void *key)
 519 {
 520    const struct zink_gfx_input_key *ikey = key;
 521    if (ikey->uses_dynamic_stride)
 522       return ikey->input;
 523    return _mesa_hash_data(key, offsetof(struct zink_gfx_input_key, pipeline));
 524 }
 525
 526 static bool
 527 equals_gfx_input(const void *a, const void *b)
 528 {
 529    const struct zink_gfx_input_key *ikey_a = a;
 530    const struct zink_gfx_input_key *ikey_b = b;
 531    if (ikey_a->uses_dynamic_stride)
 532       return ikey_a->element_state == ikey_b->element_state &&
 533              !memcmp(a, b, offsetof(struct zink_gfx_input_key, vertex_buffers_enabled_mask));
 534    return !memcmp(a, b, offsetof(struct zink_gfx_input_key, pipeline));
 535 }
 536
 537 uint32_t
 538 hash_gfx_output_ds3(const void *key)
 539 {
 540    const uint8_t *data = key;
 541    return _mesa_hash_data(data, sizeof(uint32_t));
 542 }
 543
 544 static bool
 545 equals_gfx_output_ds3(const void *a, const void *b)
 546 {
 547    const uint8_t *da = a;
 548    const uint8_t *db = b;
 549    return !memcmp(da, db, sizeof(uint32_t));
 550 }
 551
 552 uint32_t
 553 hash_gfx_output(const void *key)
 554 {
 555    const uint8_t *data = key;
 556    return _mesa_hash_data(data, offsetof(struct zink_gfx_output_key, pipeline));
 557 }
 558
 559 static bool
 560 equals_gfx_output(const void *a, const void *b)
 561 {
 562    const uint8_t *da = a;
 563    const uint8_t *db = b;
 564    return !memcmp(da, db, offsetof(struct zink_gfx_output_key, pipeline));
 565 }
 566
 567 ALWAYS_INLINE static void
 568 update_gfx_program_nonseamless(struct zink_context *ctx, struct zink_gfx_program *prog, bool has_nonseamless)
 569 {
 570    struct zink_screen *screen = zink_screen(ctx->base.screen);
 571    if (screen->driconf.inline_uniforms || prog->needs_inlining)
 572       update_gfx_shader_modules(ctx, screen, prog,
 573                                 ctx->dirty_gfx_stages & prog->stages_present, &ctx->gfx_pipeline_state,
 574                                 true, has_nonseamless);
 575    else
 576       update_gfx_shader_modules(ctx, screen, prog,
 577                                 ctx->dirty_gfx_stages & prog->stages_present, &ctx->gfx_pipeline_state,
 578                                 false, has_nonseamless);
 579 }
 580
 581 static void
 582 update_gfx_program(struct zink_context *ctx, struct zink_gfx_program *prog)
 583 {
 584    struct zink_screen *screen = zink_screen(ctx->base.screen);
 585    if (screen->info.have_EXT_non_seamless_cube_map)
 586       update_gfx_program_nonseamless(ctx, prog, true);
 587    else
 588       update_gfx_program_nonseamless(ctx, prog, false);
 589 }
 590
 591 void
 592 zink_gfx_program_update(struct zink_context *ctx)
 593 {
 594    if (ctx->last_vertex_stage_dirty) {
 595       gl_shader_stage pstage = ctx->last_vertex_stage->info.stage;
 596       ctx->dirty_gfx_stages |= BITFIELD_BIT(pstage);
 597       memcpy(&ctx->gfx_pipeline_state.shader_keys.key[pstage].key.vs_base,
 598              &ctx->gfx_pipeline_state.shader_keys.last_vertex.key.vs_base,
 599              sizeof(struct zink_vs_key_base));
 600       ctx->last_vertex_stage_dirty = false;
 601    }
 602    if (ctx->gfx_dirty) {
 603       struct zink_gfx_program *prog = NULL;
 604
 605       simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
 606       struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
 607       const uint32_t hash = ctx->gfx_hash;
 608       struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
 609       /* this must be done before prog is updated */
 610       if (ctx->curr_program)
 611          ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 612       if (entry) {
 613          prog = (struct zink_gfx_program*)entry->data;
 614          for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
 615             if (prog->stages_present & ~ctx->dirty_gfx_stages & BITFIELD_BIT(i))
 616                ctx->gfx_pipeline_state.modules[i] = prog->modules[i];
 617          }
 618          /* ensure variants are always updated if keys have changed since last use */
 619          ctx->dirty_gfx_stages |= prog->stages_present;
 620          update_gfx_program(ctx, prog);
 621       } else {
 622          ctx->dirty_gfx_stages |= ctx->shader_stages;
 623          prog = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, hash);
 624          zink_screen_get_pipeline_cache(zink_screen(ctx->base.screen), &prog->base, false);
 625          _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
 626          prog->base.removed = false;
 627          generate_gfx_program_modules(ctx, zink_screen(ctx->base.screen), prog, &ctx->gfx_pipeline_state);
 628       }
 629       simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
 630       if (prog && prog != ctx->curr_program)
 631          zink_batch_reference_program(&ctx->batch, &prog->base);
 632       ctx->curr_program = prog;
 633       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 634       ctx->gfx_dirty = false;
 635    } else if (ctx->dirty_gfx_stages) {
 636       /* remove old hash */
 637       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 638       update_gfx_program(ctx, ctx->curr_program);
 639       /* apply new hash */
 640       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 641    }
 642    ctx->dirty_gfx_stages = 0;
 643 }
 644
 645 ALWAYS_INLINE static bool
 646 update_gfx_shader_module_optimal(struct zink_context *ctx, struct zink_gfx_program *prog, gl_shader_stage pstage)
 647 {
 648    struct zink_screen *screen = zink_screen(ctx->base.screen);
 649    if (screen->info.have_EXT_graphics_pipeline_library)
 650       util_queue_fence_wait(&prog->base.cache_fence);
 651    struct zink_shader_module *zm = get_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state);
 652    if (!zm)
 653       zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state);
 654
 655    bool changed = prog->modules[pstage] != zm->shader;
 656    prog->modules[pstage] = zm->shader;
 657    return changed;
 658 }
 659
 660 static void
 661 update_gfx_program_optimal(struct zink_context *ctx, struct zink_gfx_program *prog)
 662 {
 663    const union zink_shader_key_optimal *optimal_key = (union zink_shader_key_optimal*)&prog->last_variant_hash;
 664    if (ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_bits != optimal_key->vs_bits) {
 665       assert(!prog->is_separable);
 666       bool changed = update_gfx_shader_module_optimal(ctx, prog, ctx->last_vertex_stage->info.stage);
 667       ctx->gfx_pipeline_state.modules_changed |= changed;
 668    }
 669    const bool shadow_needs_shader_swizzle = optimal_key->fs.shadow_needs_shader_swizzle && (ctx->dirty_gfx_stages & BITFIELD_BIT(MESA_SHADER_FRAGMENT));
 670    if (ctx->gfx_pipeline_state.shader_keys_optimal.key.fs_bits != optimal_key->fs_bits ||
 671        /* always recheck shadow swizzles since they aren't directly part of the key */
 672        unlikely(shadow_needs_shader_swizzle)) {
 673       assert(!prog->is_separable);
 674       bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_FRAGMENT);
 675       ctx->gfx_pipeline_state.modules_changed |= changed;
 676       if (unlikely(shadow_needs_shader_swizzle)) {
 677          struct zink_shader_module **pzm = prog->shader_cache[MESA_SHADER_FRAGMENT][0][0].data;
 678          ctx->gfx_pipeline_state.shadow = (struct zink_zs_swizzle_key*)pzm[0]->key + sizeof(uint16_t);
 679       }
 680    }
 681    if (prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated &&
 682        ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs_bits != optimal_key->tcs_bits) {
 683       assert(!prog->is_separable);
 684       bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_TESS_CTRL);
 685       ctx->gfx_pipeline_state.modules_changed |= changed;
 686    }
 687    prog->last_variant_hash = ctx->gfx_pipeline_state.shader_keys_optimal.key.val;
 688 }
 689
 690 void
 691 zink_gfx_program_update_optimal(struct zink_context *ctx)
 692 {
 693    if (ctx->gfx_dirty) {
 694       struct zink_gfx_program *prog = NULL;
 695       ctx->gfx_pipeline_state.optimal_key = ctx->gfx_pipeline_state.shader_keys_optimal.key.val;
 696       struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
 697       const uint32_t hash = ctx->gfx_hash;
 698       simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
 699       struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
 700
 701       if (ctx->curr_program)
 702          ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 703       if (entry) {
 704          prog = (struct zink_gfx_program*)entry->data;
 705          if (prog->is_separable) {
 706             /* shader variants can't be handled by separable programs: sync and compile */
 707             if (!ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key))
 708                util_queue_fence_wait(&prog->base.cache_fence);
 709             /* If the optimized linked pipeline is done compiling, swap it into place. */
 710             if (util_queue_fence_is_signalled(&prog->base.cache_fence)) {
 711                struct zink_gfx_program *real = prog->full_prog;
 712                entry->data = real;
 713                entry->key = real->shaders;
 714                real->base.removed = false;
 715                prog->full_prog = NULL;
 716                prog->base.removed = true;
 717                zink_gfx_program_reference(zink_screen(ctx->base.screen), &prog, NULL);
 718                prog = real;
 719             }
 720          }
 721          update_gfx_program_optimal(ctx, prog);
 722       } else {
 723          ctx->dirty_gfx_stages |= ctx->shader_stages;
 724          prog = create_gfx_program_separable(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch);
 725          prog->base.removed = false;
 726          _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
 727          if (!prog->is_separable) {
 728             zink_screen_get_pipeline_cache(zink_screen(ctx->base.screen), &prog->base, false);
 729             generate_gfx_program_modules_optimal(ctx, zink_screen(ctx->base.screen), prog, &ctx->gfx_pipeline_state);
 730          }
 731       }
 732       simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
 733       if (prog && prog != ctx->curr_program)
 734          zink_batch_reference_program(&ctx->batch, &prog->base);
 735       ctx->curr_program = prog;
 736       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 737    } else if (ctx->dirty_gfx_stages) {
 738       /* remove old hash */
 739       ctx->gfx_pipeline_state.optimal_key = ctx->gfx_pipeline_state.shader_keys_optimal.key.val;
 740       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 741       if (ctx->curr_program->is_separable) {
 742          struct zink_gfx_program *prog = ctx->curr_program;
 743          if (prog->is_separable && !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key)) {
 744             util_queue_fence_wait(&prog->base.cache_fence);
 745             /* shader variants can't be handled by separable programs: sync and compile */
 746             struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
 747             const uint32_t hash = ctx->gfx_hash;
 748             simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
 749             struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
 750             struct zink_gfx_program *real = prog->full_prog;
 751             entry->data = real;
 752             entry->key = real->shaders;
 753             real->base.removed = false;
 754             prog->full_prog = NULL;
 755             prog->base.removed = true;
 756             zink_gfx_program_reference(zink_screen(ctx->base.screen), &prog, NULL);
 757             ctx->curr_program = real;
 758             simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
 759          }
 760       }
 761       update_gfx_program_optimal(ctx, ctx->curr_program);
 762       /* apply new hash */
 763       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 764    }
 765    ctx->dirty_gfx_stages = 0;
 766    ctx->gfx_dirty = false;
 767    ctx->last_vertex_stage_dirty = false;
 768 }
 769
 770 static void
 771 optimized_compile_job(void *data, void *gdata, int thread_index)
 772 {
 773    struct zink_gfx_pipeline_cache_entry *pc_entry = data;
 774    struct zink_screen *screen = gdata;
 775    VkPipeline pipeline;
 776    if (pc_entry->gkey)
 777       pipeline = zink_create_gfx_pipeline_combined(screen, pc_entry->prog, pc_entry->ikey->pipeline, &pc_entry->gkey->pipeline, 1, pc_entry->okey->pipeline, true);
 778    else
 779       pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, &pc_entry->state, pc_entry->state.element_state->binding_map, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true);
 780    if (pipeline) {
 781       pc_entry->unoptimized_pipeline = pc_entry->pipeline;
 782       pc_entry->pipeline = pipeline;
 783    }
 784 }
 785
 786 void
 787 zink_gfx_program_compile_queue(struct zink_context *ctx, struct zink_gfx_pipeline_cache_entry *pc_entry)
 788 {
 789    struct zink_screen *screen = zink_screen(ctx->base.screen);
 790    if (screen->driver_workarounds.disable_optimized_compile)
 791       return;
 792    util_queue_add_job(&screen->cache_get_thread, pc_entry, &pc_entry->fence, optimized_compile_job, NULL, 0);
 793 }
 794
 795 static void
 796 update_cs_shader_module(struct zink_context *ctx, struct zink_compute_program *comp)
 797 {
 798    struct zink_screen *screen = zink_screen(ctx->base.screen);
 799    struct zink_shader *zs = comp->shader;
 800    VkShaderModule mod;
 801    struct zink_shader_module *zm = NULL;
 802    unsigned inline_size = 0, nonseamless_size = 0, zs_swizzle_size = 0;
 803    struct zink_shader_key *key = &ctx->compute_pipeline_state.key;
 804    ASSERTED bool check_robustness = screen->driver_workarounds.lower_robustImageAccess2 && (ctx->flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS);
 805    assert(zink_cs_key(key)->robust_access == check_robustness);
 806
 807    if (ctx && zs->info.num_inlinable_uniforms &&
 808        ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(MESA_SHADER_COMPUTE)) {
 809       if (screen->is_cpu || comp->inlined_variant_count < ZINK_MAX_INLINED_VARIANTS)
 810          inline_size = zs->info.num_inlinable_uniforms;
 811       else
 812          key->inline_uniforms = false;
 813    }
 814    if (key->base.nonseamless_cube_mask)
 815       nonseamless_size = sizeof(uint32_t);
 816    if (key->base.needs_zs_shader_swizzle)
 817       zs_swizzle_size = sizeof(struct zink_zs_swizzle_key);
 818
 819    if (inline_size || nonseamless_size || zink_cs_key(key)->robust_access || zs_swizzle_size) {
 820       struct util_dynarray *shader_cache = &comp->shader_cache[!!nonseamless_size];
 821       unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
 822       struct zink_shader_module **pzm = shader_cache->data;
 823       for (unsigned i = 0; i < count; i++) {
 824          struct zink_shader_module *iter = pzm[i];
 825          if (!shader_key_matches(iter, key, inline_size,
 826                                  screen->driconf.inline_uniforms,
 827                                  screen->info.have_EXT_non_seamless_cube_map))
 828             continue;
 829          if (unlikely(zs_swizzle_size)) {
 830             /* zs swizzle data needs a manual compare since it's so fat */
 831             if (memcmp(iter->key + iter->key_size + nonseamless_size + inline_size * sizeof(uint32_t),
 832                        &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE], zs_swizzle_size))
 833                continue;
 834          }
 835          if (i > 0) {
 836             struct zink_shader_module *zero = pzm[0];
 837             pzm[0] = iter;
 838             pzm[i] = zero;
 839          }
 840          zm = iter;
 841       }
 842    } else {
 843       zm = comp->module;
 844    }
 845
 846    if (!zm) {
 847       zm = malloc(sizeof(struct zink_shader_module) + nonseamless_size + inline_size * sizeof(uint32_t) + zs_swizzle_size);
 848       if (!zm) {
 849          return;
 850       }
 851       mod = zink_shader_compile(screen, zs, zink_shader_blob_deserialize(screen, &comp->shader->blob), key, zs_swizzle_size ? &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE] : NULL);
 852       if (!mod) {
 853          FREE(zm);
 854          return;
 855       }
 856       zm->shader = mod;
 857       zm->num_uniforms = inline_size;
 858       zm->key_size = key->size;
 859       memcpy(zm->key, key, key->size);
 860       zm->has_nonseamless = !!nonseamless_size;
 861       zm->needs_zs_shader_swizzle = !!zs_swizzle_size;
 862       assert(nonseamless_size || inline_size || zink_cs_key(key)->robust_access || zs_swizzle_size);
 863       if (nonseamless_size)
 864          memcpy(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size);
 865       if (inline_size)
 866          memcpy(zm->key + zm->key_size + nonseamless_size, key->base.inlined_uniform_values, inline_size * sizeof(uint32_t));
 867       if (zs_swizzle_size)
 868          memcpy(zm->key + zm->key_size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE], zs_swizzle_size);
 869
 870       zm->hash = shader_module_hash(zm);
 871       zm->default_variant = false;
 872       if (inline_size)
 873          comp->inlined_variant_count++;
 874
 875       /* this is otherwise the default variant, which is stored as comp->module */
 876       if (zm->num_uniforms || nonseamless_size || zink_cs_key(key)->robust_access || zs_swizzle_size)
 877          util_dynarray_append(&comp->shader_cache[!!nonseamless_size], void*, zm);
 878    }
 879    if (comp->curr == zm)
 880       return;
 881    ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
 882    comp->curr = zm;
 883    ctx->compute_pipeline_state.module_hash = zm->hash;
 884    ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
 885    ctx->compute_pipeline_state.module_changed = true;
 886 }
 887
 888 void
 889 zink_update_compute_program(struct zink_context *ctx)
 890 {
 891    util_queue_fence_wait(&ctx->curr_compute->base.cache_fence);
 892    update_cs_shader_module(ctx, ctx->curr_compute);
 893 }
 894
 895 VkPipelineLayout
 896 zink_pipeline_layout_create(struct zink_screen *screen, VkDescriptorSetLayout *dsl, unsigned num_dsl, bool is_compute, VkPipelineLayoutCreateFlags flags)
 897 {
 898    VkPipelineLayoutCreateInfo plci = {0};
 899    plci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
 900    plci.flags = flags;
 901
 902    plci.pSetLayouts = dsl;
 903    plci.setLayoutCount = num_dsl;
 904
 905    VkPushConstantRange pcr;
 906    if (!is_compute) {
 907       pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
 908       pcr.offset = 0;
 909       pcr.size = sizeof(struct zink_gfx_push_constant);
 910       plci.pushConstantRangeCount = 1;
 911       plci.pPushConstantRanges = &pcr;
 912    }
 913
 914    VkPipelineLayout layout;
 915    VkResult result = VKSCR(CreatePipelineLayout)(screen->dev, &plci, NULL, &layout);
 916    if (result != VK_SUCCESS) {
 917       mesa_loge("vkCreatePipelineLayout failed (%s)", vk_Result_to_str(result));
 918       return VK_NULL_HANDLE;
 919    }
 920
 921    return layout;
 922 }
 923
 924 static void *
 925 create_program(struct zink_context *ctx, bool is_compute)
 926 {
 927    struct zink_program *pg = rzalloc_size(NULL, is_compute ? sizeof(struct zink_compute_program) : sizeof(struct zink_gfx_program));
 928    if (!pg)
 929       return NULL;
 930
 931    pipe_reference_init(&pg->reference, 1);
 932    util_queue_fence_init(&pg->cache_fence);
 933    pg->is_compute = is_compute;
 934    pg->ctx = ctx;
 935    return (void*)pg;
 936 }
 937
 938 static void
 939 assign_io(struct zink_screen *screen,
 940           nir_shader *shaders[ZINK_GFX_SHADER_COUNT])
 941 {
 942    for (unsigned i = 0; i < MESA_SHADER_FRAGMENT;) {
 943       nir_shader *producer = shaders[i];
 944       for (unsigned j = i + 1; j < ZINK_GFX_SHADER_COUNT; i++, j++) {
 945          nir_shader *consumer = shaders[j];
 946          if (!consumer)
 947             continue;
 948          zink_compiler_assign_io(screen, producer, consumer);
 949          i = j;
 950          break;
 951       }
 952    }
 953 }
 954
 955 void
 956 zink_gfx_lib_cache_unref(struct zink_screen *screen, struct zink_gfx_lib_cache *libs)
 957 {
 958    if (!p_atomic_dec_zero(&libs->refcount))
 959       return;
 960
 961    simple_mtx_destroy(&libs->lock);
 962    set_foreach_remove(&libs->libs, he) {
 963       struct zink_gfx_library_key *gkey = (void*)he->key;
 964       VKSCR(DestroyPipeline)(screen->dev, gkey->pipeline, NULL);
 965       FREE(gkey);
 966    }
 967    ralloc_free(libs->libs.table);
 968    FREE(libs);
 969 }
 970
 971 static struct zink_gfx_lib_cache *
 972 create_lib_cache(struct zink_gfx_program *prog, bool generated_tcs)
 973 {
 974    struct zink_gfx_lib_cache *libs = CALLOC_STRUCT(zink_gfx_lib_cache);
 975    libs->stages_present = prog->stages_present;
 976    simple_mtx_init(&libs->lock, mtx_plain);
 977    if (generated_tcs)
 978       _mesa_set_init(&libs->libs, NULL, hash_pipeline_lib_generated_tcs, equals_pipeline_lib_generated_tcs);
 979    else
 980       _mesa_set_init(&libs->libs, NULL, hash_pipeline_lib, equals_pipeline_lib);
 981    return libs;
 982 }
 983
 984 static struct zink_gfx_lib_cache *
 985 find_or_create_lib_cache(struct zink_screen *screen, struct zink_gfx_program *prog)
 986 {
 987    unsigned stages_present = prog->stages_present;
 988    bool generated_tcs = prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated;
 989    if (generated_tcs)
 990       stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
 991    unsigned idx = zink_program_cache_stages(stages_present);
 992    struct set *ht = &screen->pipeline_libs[idx];
 993    const uint32_t hash = prog->gfx_hash;
 994
 995    simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
 996    bool found = false;
 997    struct set_entry *entry = _mesa_set_search_or_add_pre_hashed(ht, hash, prog->shaders, &found);
 998    struct zink_gfx_lib_cache *libs;
 999    if (found) {
1000       libs = (void*)entry->key;
1001    } else {
1002       libs = create_lib_cache(prog, generated_tcs);
1003       memcpy(libs->shaders, prog->shaders, sizeof(prog->shaders));
1004       entry->key = libs;
1005       unsigned refs = 0;
1006       for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
1007          if (prog->shaders[i] && (!generated_tcs || i != MESA_SHADER_TESS_CTRL)) {
1008             simple_mtx_lock(&prog->shaders[i]->lock);
1009             util_dynarray_append(&prog->shaders[i]->pipeline_libs, struct zink_gfx_lib_cache*, libs);
1010             simple_mtx_unlock(&prog->shaders[i]->lock);
1011             refs++;
1012          }
1013       }
1014       p_atomic_set(&libs->refcount, refs);
1015    }
1016    simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
1017    return libs;
1018 }
1019
1020 struct zink_gfx_program *
1021 zink_create_gfx_program(struct zink_context *ctx,
1022                         struct zink_shader **stages,
1023                         unsigned vertices_per_patch,
1024                         uint32_t gfx_hash)
1025 {
1026    struct zink_screen *screen = zink_screen(ctx->base.screen);
1027    struct zink_gfx_program *prog = create_program(ctx, false);
1028    if (!prog)
1029       goto fail;
1030
1031    prog->ctx = ctx;
1032    prog->gfx_hash = gfx_hash;
1033    prog->base.removed = true;
1034    prog->optimal_keys = screen->optimal_keys;
1035
1036    nir_shader *nir[ZINK_GFX_SHADER_COUNT];
1037
1038    prog->has_edgeflags = prog->shaders[MESA_SHADER_VERTEX] &&
1039                          prog->shaders[MESA_SHADER_VERTEX]->has_edgeflags;
1040    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1041       util_dynarray_init(&prog->shader_cache[i][0][0], prog);
1042       util_dynarray_init(&prog->shader_cache[i][0][1], prog);
1043       util_dynarray_init(&prog->shader_cache[i][1][0], prog);
1044       util_dynarray_init(&prog->shader_cache[i][1][1], prog);
1045       if (stages[i]) {
1046          prog->shaders[i] = stages[i];
1047          prog->stages_present |= BITFIELD_BIT(i);
1048          if (i != MESA_SHADER_FRAGMENT)
1049             prog->optimal_keys &= !prog->shaders[i]->non_fs.is_generated;
1050          prog->needs_inlining |= prog->shaders[i]->needs_inlining;
1051          nir[i] = zink_shader_deserialize(screen, stages[i]);
1052       } else {
1053          nir[i] = NULL;
1054       }
1055    }
1056    if (stages[MESA_SHADER_TESS_EVAL] && !stages[MESA_SHADER_TESS_CTRL]) {
1057       prog->shaders[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs =
1058       prog->shaders[MESA_SHADER_TESS_CTRL] =
1059         zink_shader_tcs_create(screen, nir[MESA_SHADER_TESS_EVAL], vertices_per_patch, &nir[MESA_SHADER_TESS_CTRL]);
1060       prog->stages_present |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
1061    }
1062    prog->stages_remaining = prog->stages_present;
1063
1064    assign_io(screen, nir);
1065    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
1066       if (nir[i])
1067          zink_shader_serialize_blob(nir[i], &prog->blobs[i]);
1068       ralloc_free(nir[i]);
1069    }
1070
1071
1072    if (stages[MESA_SHADER_GEOMETRY])
1073       prog->last_vertex_stage = stages[MESA_SHADER_GEOMETRY];
1074    else if (stages[MESA_SHADER_TESS_EVAL])
1075       prog->last_vertex_stage = stages[MESA_SHADER_TESS_EVAL];
1076    else
1077       prog->last_vertex_stage = stages[MESA_SHADER_VERTEX];
1078
1079    for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) {
1080       for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
1081          _mesa_hash_table_init(&prog->pipelines[r][i], prog, NULL, zink_get_gfx_pipeline_eq_func(screen, prog));
1082          /* only need first 3/4 for point/line/tri/patch */
1083          if (screen->info.have_EXT_extended_dynamic_state &&
1084              i == (prog->last_vertex_stage->info.stage == MESA_SHADER_TESS_EVAL ? 4 : 3))
1085             break;
1086       }
1087    }
1088
1089    if (screen->optimal_keys)
1090       prog->libs = find_or_create_lib_cache(screen, prog);
1091
1092    struct mesa_sha1 sctx;
1093    _mesa_sha1_init(&sctx);
1094    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1095       if (prog->shaders[i]) {
1096          simple_mtx_lock(&prog->shaders[i]->lock);
1097          _mesa_set_add(prog->shaders[i]->programs, prog);
1098          simple_mtx_unlock(&prog->shaders[i]->lock);
1099          zink_gfx_program_reference(screen, NULL, prog);
1100          _mesa_sha1_update(&sctx, prog->shaders[i]->base.sha1, sizeof(prog->shaders[i]->base.sha1));
1101       }
1102    }
1103    _mesa_sha1_final(&sctx, prog->base.sha1);
1104
1105    if (!zink_descriptor_program_init(ctx, &prog->base))
1106       goto fail;
1107
1108    return prog;
1109
1110 fail:
1111    if (prog)
1112       zink_destroy_gfx_program(screen, prog);
1113    return NULL;
1114 }
1115
1116 /* Creates a replacement, optimized zink_gfx_program for this set of separate shaders, which will
1117  * be swapped in in place of the fast-linked separable program once it's done compiling.
1118  */
1119 static void
1120 create_linked_separable_job(void *data, void *gdata, int thread_index)
1121 {
1122    struct zink_gfx_program *prog = data;
1123    prog->full_prog = zink_create_gfx_program(prog->ctx, prog->shaders, 0, prog->gfx_hash);
1124    precompile_job(prog->full_prog, gdata, thread_index);
1125 }
1126
1127 struct zink_gfx_program *
1128 create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch)
1129 {
1130    struct zink_screen *screen = zink_screen(ctx->base.screen);
1131    unsigned shader_stages = BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT);
1132    /* filter cases that need real pipelines */
1133    if (ctx->shader_stages != shader_stages ||
1134        !stages[MESA_SHADER_VERTEX]->precompile.obj.mod || !stages[MESA_SHADER_FRAGMENT]->precompile.obj.mod ||
1135        /* TODO: maybe try variants? grimace */
1136        !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) ||
1137        !zink_can_use_pipeline_libs(ctx))
1138       return zink_create_gfx_program(ctx, stages, vertices_per_patch, ctx->gfx_hash);
1139    /* ensure async gpl creation is done */
1140    util_queue_fence_wait(&stages[MESA_SHADER_VERTEX]->precompile.fence);
1141    util_queue_fence_wait(&stages[MESA_SHADER_FRAGMENT]->precompile.fence);
1142
1143    struct zink_gfx_program *prog = create_program(ctx, false);
1144    if (!prog)
1145       goto fail;
1146
1147    prog->ctx = ctx;
1148    prog->is_separable = true;
1149    prog->gfx_hash = ctx->gfx_hash;
1150
1151    prog->shaders[MESA_SHADER_VERTEX] = stages[MESA_SHADER_VERTEX];
1152    prog->stages_remaining = prog->stages_present = shader_stages;
1153    prog->shaders[MESA_SHADER_FRAGMENT] = stages[MESA_SHADER_FRAGMENT];
1154    prog->last_vertex_stage = stages[MESA_SHADER_VERTEX];
1155
1156    if (!screen->info.have_EXT_shader_object) {
1157       prog->libs = create_lib_cache(prog, false);
1158       /* this libs cache is owned by the program */
1159       p_atomic_set(&prog->libs->refcount, 1);
1160    }
1161
1162    unsigned refs = 0;
1163    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1164       if (prog->shaders[i]) {
1165          simple_mtx_lock(&prog->shaders[i]->lock);
1166          _mesa_set_add(prog->shaders[i]->programs, prog);
1167          simple_mtx_unlock(&prog->shaders[i]->lock);
1168          if (screen->info.have_EXT_shader_object) {
1169             prog->objects[i] = stages[i]->precompile.obj.obj;
1170          }
1171          refs++;
1172       }
1173    }
1174    /* We can do this add after the _mesa_set_adds above because we know the prog->shaders[] are
1175    * referenced by the draw state and zink_gfx_shader_free() can't be called on them while we're in here.
1176    */
1177    p_atomic_add(&prog->base.reference.count, refs);
1178
1179    for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) {
1180       for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
1181          _mesa_hash_table_init(&prog->pipelines[r][i], prog, NULL, zink_get_gfx_pipeline_eq_func(screen, prog));
1182          /* only need first 3/4 for point/line/tri/patch */
1183          if (screen->info.have_EXT_extended_dynamic_state &&
1184              i == (prog->last_vertex_stage->info.stage == MESA_SHADER_TESS_EVAL ? 4 : 3))
1185             break;
1186       }
1187    }
1188
1189    if (prog->shaders[MESA_SHADER_VERTEX]->precompile.dsl) {
1190       prog->base.dd.binding_usage |= BITFIELD_BIT(0);
1191       prog->base.dsl[0] = prog->shaders[MESA_SHADER_VERTEX]->precompile.dsl;
1192       prog->base.num_dsl = 1;
1193    }
1194    if (prog->shaders[MESA_SHADER_FRAGMENT]->precompile.dsl) {
1195       prog->base.dd.binding_usage |= BITFIELD_BIT(1);
1196       prog->base.dsl[1] = prog->shaders[MESA_SHADER_FRAGMENT]->precompile.dsl;
1197       /* guarantee a null dsl if vs doesn't have descriptors */
1198       prog->base.num_dsl = 2;
1199    }
1200    prog->base.dd.bindless = prog->shaders[MESA_SHADER_VERTEX]->bindless | prog->shaders[MESA_SHADER_FRAGMENT]->bindless;
1201    if (prog->base.dd.bindless) {
1202       prog->base.num_dsl = screen->compact_descriptors ? ZINK_DESCRIPTOR_ALL_TYPES - ZINK_DESCRIPTOR_COMPACT : ZINK_DESCRIPTOR_ALL_TYPES;
1203       prog->base.dsl[screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS]] = screen->bindless_layout;
1204    }
1205    prog->base.layout = zink_pipeline_layout_create(screen, prog->base.dsl, prog->base.num_dsl, false, VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT);
1206
1207    prog->last_variant_hash = ctx->gfx_pipeline_state.optimal_key;
1208
1209    if (!screen->info.have_EXT_shader_object) {
1210       VkPipeline libs[] = {stages[MESA_SHADER_VERTEX]->precompile.gpl, stages[MESA_SHADER_FRAGMENT]->precompile.gpl};
1211       struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key);
1212       if (!gkey) {
1213          mesa_loge("ZINK: failed to allocate gkey!");
1214          goto fail;
1215       }
1216       gkey->optimal_key = prog->last_variant_hash;
1217       assert(gkey->optimal_key);
1218       gkey->pipeline = zink_create_gfx_pipeline_combined(screen, prog, VK_NULL_HANDLE, libs, 2, VK_NULL_HANDLE, false);
1219       _mesa_set_add(&prog->libs->libs, gkey);
1220    }
1221
1222    util_queue_add_job(&screen->cache_get_thread, prog, &prog->base.cache_fence, create_linked_separable_job, NULL, 0);
1223
1224    return prog;
1225 fail:
1226    if (prog)
1227       zink_destroy_gfx_program(screen, prog);
1228    return NULL;
1229 }
1230
1231 static uint32_t
1232 hash_compute_pipeline_state_local_size(const void *key)
1233 {
1234    const struct zink_compute_pipeline_state *state = key;
1235    uint32_t hash = _mesa_hash_data(state, offsetof(struct zink_compute_pipeline_state, hash));
1236    hash = XXH32(&state->local_size[0], sizeof(state->local_size), hash);
1237    return hash;
1238 }
1239
1240 static uint32_t
1241 hash_compute_pipeline_state(const void *key)
1242 {
1243    const struct zink_compute_pipeline_state *state = key;
1244    return _mesa_hash_data(state, offsetof(struct zink_compute_pipeline_state, hash));
1245 }
1246
1247 void
1248 zink_program_update_compute_pipeline_state(struct zink_context *ctx, struct zink_compute_program *comp, const uint block[3])
1249 {
1250    if (comp->use_local_size) {
1251       for (int i = 0; i < ARRAY_SIZE(ctx->compute_pipeline_state.local_size); i++) {
1252          if (ctx->compute_pipeline_state.local_size[i] != block[i])
1253             ctx->compute_pipeline_state.dirty = true;
1254          ctx->compute_pipeline_state.local_size[i] = block[i];
1255       }
1256    }
1257 }
1258
1259 static bool
1260 equals_compute_pipeline_state(const void *a, const void *b)
1261 {
1262    const struct zink_compute_pipeline_state *sa = a;
1263    const struct zink_compute_pipeline_state *sb = b;
1264    return !memcmp(a, b, offsetof(struct zink_compute_pipeline_state, hash)) &&
1265           sa->module == sb->module;
1266 }
1267
1268 static bool
1269 equals_compute_pipeline_state_local_size(const void *a, const void *b)
1270 {
1271    const struct zink_compute_pipeline_state *sa = a;
1272    const struct zink_compute_pipeline_state *sb = b;
1273    return !memcmp(a, b, offsetof(struct zink_compute_pipeline_state, hash)) &&
1274           !memcmp(sa->local_size, sb->local_size, sizeof(sa->local_size)) &&
1275           sa->module == sb->module;
1276 }
1277
1278 static void
1279 precompile_compute_job(void *data, void *gdata, int thread_index)
1280 {
1281    struct zink_compute_program *comp = data;
1282    struct zink_screen *screen = gdata;
1283
1284    comp->shader = zink_shader_create(screen, comp->nir, NULL);
1285    comp->curr = comp->module = CALLOC_STRUCT(zink_shader_module);
1286    assert(comp->module);
1287    comp->module->shader = zink_shader_compile(screen, comp->shader, comp->nir, NULL, NULL);
1288    /* comp->nir will be freed by zink_shader_compile */
1289    comp->nir = NULL;
1290    assert(comp->module->shader);
1291    util_dynarray_init(&comp->shader_cache[0], comp);
1292    util_dynarray_init(&comp->shader_cache[1], comp);
1293
1294    struct mesa_sha1 sha1_ctx;
1295    _mesa_sha1_init(&sha1_ctx);
1296    _mesa_sha1_update(&sha1_ctx, comp->shader->blob.data, comp->shader->blob.size);
1297    _mesa_sha1_final(&sha1_ctx, comp->base.sha1);
1298
1299    zink_descriptor_program_init(comp->base.ctx, &comp->base);
1300
1301    zink_screen_get_pipeline_cache(screen, &comp->base, true);
1302    if (comp->base.can_precompile)
1303       comp->base_pipeline = zink_create_compute_pipeline(screen, comp, NULL);
1304    if (comp->base_pipeline)
1305       zink_screen_update_pipeline_cache(screen, &comp->base, true);
1306 }
1307
1308 static struct zink_compute_program *
1309 create_compute_program(struct zink_context *ctx, nir_shader *nir)
1310 {
1311    struct zink_screen *screen = zink_screen(ctx->base.screen);
1312    struct zink_compute_program *comp = create_program(ctx, true);
1313    if (!comp)
1314       return NULL;
1315    comp->nir = nir;
1316    comp->num_inlinable_uniforms = nir->info.num_inlinable_uniforms;
1317
1318    comp->use_local_size = !(nir->info.workgroup_size[0] ||
1319                             nir->info.workgroup_size[1] ||
1320                             nir->info.workgroup_size[2]);
1321    comp->base.can_precompile = !comp->use_local_size &&
1322                                (screen->info.have_EXT_non_seamless_cube_map || !zink_shader_has_cubes(nir)) &&
1323                                (screen->info.rb2_feats.robustImageAccess2 || !(ctx->flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS));
1324    _mesa_hash_table_init(&comp->pipelines, comp, NULL, comp->use_local_size ?
1325                                                        equals_compute_pipeline_state_local_size :
1326                                                        equals_compute_pipeline_state);
1327    util_queue_add_job(&screen->cache_get_thread, comp, &comp->base.cache_fence,
1328                       precompile_compute_job, NULL, 0);
1329    return comp;
1330 }
1331
1332 uint32_t
1333 zink_program_get_descriptor_usage(struct zink_context *ctx, gl_shader_stage stage, enum zink_descriptor_type type)
1334 {
1335    struct zink_shader *zs = NULL;
1336    switch (stage) {
1337    case MESA_SHADER_VERTEX:
1338    case MESA_SHADER_TESS_CTRL:
1339    case MESA_SHADER_TESS_EVAL:
1340    case MESA_SHADER_GEOMETRY:
1341    case MESA_SHADER_FRAGMENT:
1342       zs = ctx->gfx_stages[stage];
1343       break;
1344    case MESA_SHADER_COMPUTE: {
1345       zs = ctx->curr_compute->shader;
1346       break;
1347    }
1348    default:
1349       unreachable("unknown shader type");
1350    }
1351    if (!zs)
1352       return 0;
1353    switch (type) {
1354    case ZINK_DESCRIPTOR_TYPE_UBO:
1355       return zs->ubos_used;
1356    case ZINK_DESCRIPTOR_TYPE_SSBO:
1357       return zs->ssbos_used;
1358    case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW:
1359       return BITSET_TEST_RANGE(zs->info.textures_used, 0, PIPE_MAX_SAMPLERS - 1);
1360    case ZINK_DESCRIPTOR_TYPE_IMAGE:
1361       return BITSET_TEST_RANGE(zs->info.images_used, 0, PIPE_MAX_SAMPLERS - 1);
1362    default:
1363       unreachable("unknown descriptor type!");
1364    }
1365    return 0;
1366 }
1367
1368 bool
1369 zink_program_descriptor_is_buffer(struct zink_context *ctx, gl_shader_stage stage, enum zink_descriptor_type type, unsigned i)
1370 {
1371    struct zink_shader *zs = NULL;
1372    switch (stage) {
1373    case MESA_SHADER_VERTEX:
1374    case MESA_SHADER_TESS_CTRL:
1375    case MESA_SHADER_TESS_EVAL:
1376    case MESA_SHADER_GEOMETRY:
1377    case MESA_SHADER_FRAGMENT:
1378       zs = ctx->gfx_stages[stage];
1379       break;
1380    case MESA_SHADER_COMPUTE: {
1381       zs = ctx->curr_compute->shader;
1382       break;
1383    }
1384    default:
1385       unreachable("unknown shader type");
1386    }
1387    if (!zs)
1388       return false;
1389    return zink_shader_descriptor_is_buffer(zs, type, i);
1390 }
1391
1392 static unsigned
1393 get_num_bindings(struct zink_shader *zs, enum zink_descriptor_type type)
1394 {
1395    switch (type) {
1396    case ZINK_DESCRIPTOR_TYPE_UNIFORMS:
1397       return !!zs->has_uniforms;
1398    case ZINK_DESCRIPTOR_TYPE_UBO:
1399    case ZINK_DESCRIPTOR_TYPE_SSBO:
1400       return zs->num_bindings[type];
1401    default:
1402       break;
1403    }
1404    unsigned num_bindings = 0;
1405    for (int i = 0; i < zs->num_bindings[type]; i++)
1406       num_bindings += zs->bindings[type][i].size;
1407    return num_bindings;
1408 }
1409
1410 unsigned
1411 zink_program_num_bindings_typed(const struct zink_program *pg, enum zink_descriptor_type type)
1412 {
1413    unsigned num_bindings = 0;
1414    if (pg->is_compute) {
1415       struct zink_compute_program *comp = (void*)pg;
1416       return get_num_bindings(comp->shader, type);
1417    }
1418    struct zink_gfx_program *prog = (void*)pg;
1419    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
1420       if (prog->shaders[i])
1421          num_bindings += get_num_bindings(prog->shaders[i], type);
1422    }
1423    return num_bindings;
1424 }
1425
1426 unsigned
1427 zink_program_num_bindings(const struct zink_program *pg)
1428 {
1429    unsigned num_bindings = 0;
1430    for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++)
1431       num_bindings += zink_program_num_bindings_typed(pg, i);
1432    return num_bindings;
1433 }
1434
1435 static void
1436 deinit_program(struct zink_screen *screen, struct zink_program *pg)
1437 {
1438    util_queue_fence_wait(&pg->cache_fence);
1439    if (pg->layout)
1440       VKSCR(DestroyPipelineLayout)(screen->dev, pg->layout, NULL);
1441
1442    if (pg->pipeline_cache)
1443       VKSCR(DestroyPipelineCache)(screen->dev, pg->pipeline_cache, NULL);
1444    zink_descriptor_program_deinit(screen, pg);
1445 }
1446
1447 void
1448 zink_destroy_gfx_program(struct zink_screen *screen,
1449                          struct zink_gfx_program *prog)
1450 {
1451    unsigned max_idx = ARRAY_SIZE(prog->pipelines[0]);
1452    if (screen->info.have_EXT_extended_dynamic_state) {
1453       /* only need first 3/4 for point/line/tri/patch */
1454       if ((prog->stages_present &
1455           (BITFIELD_BIT(MESA_SHADER_TESS_EVAL) | BITFIELD_BIT(MESA_SHADER_GEOMETRY))) ==
1456           BITFIELD_BIT(MESA_SHADER_TESS_EVAL))
1457          max_idx = 4;
1458       else
1459          max_idx = 3;
1460       max_idx++;
1461    }
1462
1463    if (prog->is_separable)
1464       zink_gfx_program_reference(screen, &prog->full_prog, NULL);
1465    for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
1466       for (int i = 0; i < max_idx; ++i) {
1467          hash_table_foreach(&prog->pipelines[r][i], entry) {
1468             struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data;
1469
1470             util_queue_fence_wait(&pc_entry->fence);
1471             VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL);
1472             VKSCR(DestroyPipeline)(screen->dev, pc_entry->unoptimized_pipeline, NULL);
1473             free(pc_entry);
1474          }
1475       }
1476    }
1477
1478    deinit_program(screen, &prog->base);
1479
1480    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1481       if (prog->shaders[i]) {
1482          _mesa_set_remove_key(prog->shaders[i]->programs, prog);
1483          prog->shaders[i] = NULL;
1484       }
1485       if (!prog->is_separable) {
1486          destroy_shader_cache(screen, &prog->shader_cache[i][0][0]);
1487          destroy_shader_cache(screen, &prog->shader_cache[i][0][1]);
1488          destroy_shader_cache(screen, &prog->shader_cache[i][1][0]);
1489          destroy_shader_cache(screen, &prog->shader_cache[i][1][1]);
1490          blob_finish(&prog->blobs[i]);
1491       }
1492    }
1493    if (prog->is_separable && prog->libs)
1494       zink_gfx_lib_cache_unref(screen, prog->libs);
1495
1496    ralloc_free(prog);
1497 }
1498
1499 void
1500 zink_destroy_compute_program(struct zink_screen *screen,
1501                              struct zink_compute_program *comp)
1502 {
1503    deinit_program(screen, &comp->base);
1504
1505    assert(comp->shader);
1506    assert(!comp->shader->spirv);
1507
1508    zink_shader_free(screen, comp->shader);
1509
1510    destroy_shader_cache(screen, &comp->shader_cache[0]);
1511    destroy_shader_cache(screen, &comp->shader_cache[1]);
1512
1513    hash_table_foreach(&comp->pipelines, entry) {
1514       struct compute_pipeline_cache_entry *pc_entry = entry->data;
1515
1516       VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL);
1517       free(pc_entry);
1518    }
1519    VKSCR(DestroyPipeline)(screen->dev, comp->base_pipeline, NULL);
1520    VKSCR(DestroyShaderModule)(screen->dev, comp->module->shader, NULL);
1521    free(comp->module);
1522
1523    ralloc_free(comp);
1524 }
1525
1526 ALWAYS_INLINE static bool
1527 compute_can_shortcut(const struct zink_compute_program *comp)
1528 {
1529    return !comp->use_local_size && !comp->curr->num_uniforms && !comp->curr->has_nonseamless;
1530 }
1531
1532 VkPipeline
1533 zink_get_compute_pipeline(struct zink_screen *screen,
1534                       struct zink_compute_program *comp,
1535                       struct zink_compute_pipeline_state *state)
1536 {
1537    struct hash_entry *entry = NULL;
1538
1539    if (!state->dirty && !state->module_changed)
1540       return state->pipeline;
1541    if (state->dirty) {
1542       if (state->pipeline) //avoid on first hash
1543          state->final_hash ^= state->hash;
1544       if (comp->use_local_size)
1545          state->hash = hash_compute_pipeline_state_local_size(state);
1546       else
1547          state->hash = hash_compute_pipeline_state(state);
1548       state->dirty = false;
1549       state->final_hash ^= state->hash;
1550    }
1551
1552    util_queue_fence_wait(&comp->base.cache_fence);
1553    if (comp->base_pipeline && compute_can_shortcut(comp)) {
1554       state->pipeline = comp->base_pipeline;
1555       return state->pipeline;
1556    }
1557    entry = _mesa_hash_table_search_pre_hashed(&comp->pipelines, state->final_hash, state);
1558
1559    if (!entry) {
1560       VkPipeline pipeline = zink_create_compute_pipeline(screen, comp, state);
1561
1562       if (pipeline == VK_NULL_HANDLE)
1563          return VK_NULL_HANDLE;
1564
1565       zink_screen_update_pipeline_cache(screen, &comp->base, false);
1566       if (compute_can_shortcut(comp)) {
1567          /* don't add base pipeline to cache */
1568          state->pipeline = comp->base_pipeline = pipeline;
1569          return state->pipeline;
1570       }
1571
1572       struct compute_pipeline_cache_entry *pc_entry = CALLOC_STRUCT(compute_pipeline_cache_entry);
1573       if (!pc_entry)
1574          return VK_NULL_HANDLE;
1575
1576       memcpy(&pc_entry->state, state, sizeof(*state));
1577       pc_entry->pipeline = pipeline;
1578
1579       entry = _mesa_hash_table_insert_pre_hashed(&comp->pipelines, state->final_hash, pc_entry, pc_entry);
1580       assert(entry);
1581    }
1582
1583    struct compute_pipeline_cache_entry *cache_entry = entry->data;
1584    state->pipeline = cache_entry->pipeline;
1585    return state->pipeline;
1586 }
1587
1588 static void
1589 bind_gfx_stage(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *shader)
1590 {
1591    if (shader && shader->info.num_inlinable_uniforms)
1592       ctx->shader_has_inlinable_uniforms_mask |= 1 << stage;
1593    else
1594       ctx->shader_has_inlinable_uniforms_mask &= ~(1 << stage);
1595
1596    if (ctx->gfx_stages[stage])
1597       ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash;
1598
1599    if (!shader && stage == MESA_SHADER_GEOMETRY) {
1600       ctx->inlinable_uniforms_valid_mask &= ~BITFIELD64_BIT(MESA_SHADER_GEOMETRY);
1601       ctx->is_generated_gs_bound = false;
1602    }
1603
1604    ctx->gfx_stages[stage] = shader;
1605    ctx->gfx_dirty = ctx->gfx_stages[MESA_SHADER_FRAGMENT] && ctx->gfx_stages[MESA_SHADER_VERTEX];
1606    ctx->gfx_pipeline_state.modules_changed = true;
1607    if (shader) {
1608       ctx->shader_stages |= BITFIELD_BIT(stage);
1609       ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash;
1610    } else {
1611       ctx->gfx_pipeline_state.modules[stage] = VK_NULL_HANDLE;
1612       if (ctx->curr_program)
1613          ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
1614       ctx->curr_program = NULL;
1615       ctx->shader_stages &= ~BITFIELD_BIT(stage);
1616    }
1617 }
1618
1619 static enum pipe_prim_type
1620 gs_output_to_reduced_prim_type(struct shader_info *info)
1621 {
1622    switch (info->gs.output_primitive) {
1623    case SHADER_PRIM_POINTS:
1624       return PIPE_PRIM_POINTS;
1625
1626    case SHADER_PRIM_LINES:
1627    case SHADER_PRIM_LINE_LOOP:
1628    case SHADER_PRIM_LINE_STRIP:
1629    case SHADER_PRIM_LINES_ADJACENCY:
1630    case SHADER_PRIM_LINE_STRIP_ADJACENCY:
1631       return PIPE_PRIM_LINES;
1632
1633    case SHADER_PRIM_TRIANGLES:
1634    case SHADER_PRIM_TRIANGLE_STRIP:
1635    case SHADER_PRIM_TRIANGLE_FAN:
1636    case SHADER_PRIM_TRIANGLES_ADJACENCY:
1637    case SHADER_PRIM_TRIANGLE_STRIP_ADJACENCY:
1638       return PIPE_PRIM_TRIANGLES;
1639
1640    default:
1641       unreachable("unexpected output primitive type");
1642    }
1643 }
1644
1645 static enum pipe_prim_type
1646 update_rast_prim(struct zink_shader *shader)
1647 {
1648    struct shader_info *info = &shader->info;
1649    if (info->stage == MESA_SHADER_GEOMETRY)
1650       return gs_output_to_reduced_prim_type(info);
1651    else if (info->stage == MESA_SHADER_TESS_EVAL) {
1652       if (info->tess.point_mode)
1653          return PIPE_PRIM_POINTS;
1654       else {
1655          switch (info->tess._primitive_mode) {
1656          case TESS_PRIMITIVE_ISOLINES:
1657             return PIPE_PRIM_LINES;
1658          case TESS_PRIMITIVE_TRIANGLES:
1659          case TESS_PRIMITIVE_QUADS:
1660             return PIPE_PRIM_TRIANGLES;
1661          default:
1662             return PIPE_PRIM_MAX;
1663          }
1664       }
1665    }
1666    return PIPE_PRIM_MAX;
1667 }
1668
1669 static void
1670 unbind_generated_gs(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *prev_shader)
1671 {
1672    if (prev_shader->non_fs.is_generated)
1673       ctx->inlinable_uniforms_valid_mask &= ~BITFIELD64_BIT(MESA_SHADER_GEOMETRY);
1674
1675    if (ctx->gfx_stages[MESA_SHADER_GEOMETRY] &&
1676        ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.parent ==
1677        prev_shader) {
1678       bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, NULL);
1679    }
1680 }
1681
1682 static void
1683 bind_last_vertex_stage(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *prev_shader)
1684 {
1685    if (prev_shader && stage < MESA_SHADER_GEOMETRY)
1686       unbind_generated_gs(ctx, stage, prev_shader);
1687
1688    gl_shader_stage old = ctx->last_vertex_stage ? ctx->last_vertex_stage->info.stage : MESA_SHADER_STAGES;
1689    if (ctx->gfx_stages[MESA_SHADER_GEOMETRY])
1690       ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_GEOMETRY];
1691    else if (ctx->gfx_stages[MESA_SHADER_TESS_EVAL])
1692       ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_TESS_EVAL];
1693    else
1694       ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_VERTEX];
1695    gl_shader_stage current = ctx->last_vertex_stage ? ctx->last_vertex_stage->info.stage : MESA_SHADER_VERTEX;
1696
1697    /* update rast_prim */
1698    ctx->gfx_pipeline_state.shader_rast_prim =
1699       ctx->last_vertex_stage ? update_rast_prim(ctx->last_vertex_stage) :
1700                                PIPE_PRIM_MAX;
1701
1702    if (old != current) {
1703       if (!zink_screen(ctx->base.screen)->optimal_keys) {
1704          if (old != MESA_SHADER_STAGES) {
1705             memset(&ctx->gfx_pipeline_state.shader_keys.key[old].key.vs_base, 0, sizeof(struct zink_vs_key_base));
1706             ctx->dirty_gfx_stages |= BITFIELD_BIT(old);
1707          } else {
1708             /* always unset vertex shader values when changing to a non-vs last stage */
1709             memset(&ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_VERTEX].key.vs_base, 0, sizeof(struct zink_vs_key_base));
1710          }
1711       }
1712
1713       unsigned num_viewports = ctx->vp_state.num_viewports;
1714       struct zink_screen *screen = zink_screen(ctx->base.screen);
1715       /* number of enabled viewports is based on whether last vertex stage writes viewport index */
1716       if (ctx->last_vertex_stage) {
1717          if (ctx->last_vertex_stage->info.outputs_written & (VARYING_BIT_VIEWPORT | VARYING_BIT_VIEWPORT_MASK))
1718             ctx->vp_state.num_viewports = MIN2(screen->info.props.limits.maxViewports, PIPE_MAX_VIEWPORTS);
1719          else
1720             ctx->vp_state.num_viewports = 1;
1721       } else {
1722          ctx->vp_state.num_viewports = 1;
1723       }
1724       ctx->vp_state_changed |= num_viewports != ctx->vp_state.num_viewports;
1725       if (!screen->info.have_EXT_extended_dynamic_state) {
1726          if (ctx->gfx_pipeline_state.dyn_state1.num_viewports != ctx->vp_state.num_viewports)
1727             ctx->gfx_pipeline_state.dirty = true;
1728          ctx->gfx_pipeline_state.dyn_state1.num_viewports = ctx->vp_state.num_viewports;
1729       }
1730       ctx->last_vertex_stage_dirty = true;
1731    }
1732 }
1733
1734 static void
1735 zink_bind_vs_state(struct pipe_context *pctx,
1736                    void *cso)
1737 {
1738    struct zink_context *ctx = zink_context(pctx);
1739    if (!cso && !ctx->gfx_stages[MESA_SHADER_VERTEX])
1740       return;
1741    struct zink_shader *prev_shader = ctx->gfx_stages[MESA_SHADER_VERTEX];
1742    bind_gfx_stage(ctx, MESA_SHADER_VERTEX, cso);
1743    bind_last_vertex_stage(ctx, MESA_SHADER_VERTEX, prev_shader);
1744    if (cso) {
1745       struct zink_shader *zs = cso;
1746       ctx->shader_reads_drawid = BITSET_TEST(zs->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
1747       ctx->shader_reads_basevertex = BITSET_TEST(zs->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX);
1748    } else {
1749       ctx->shader_reads_drawid = false;
1750       ctx->shader_reads_basevertex = false;
1751    }
1752 }
1753
1754 /* if gl_SampleMask[] is written to, we have to ensure that we get a shader with the same sample count:
1755  * in GL, samples==1 means ignore gl_SampleMask[]
1756  * in VK, gl_SampleMask[] is never ignored
1757  */
1758 void
1759 zink_update_fs_key_samples(struct zink_context *ctx)
1760 {
1761    if (!ctx->gfx_stages[MESA_SHADER_FRAGMENT])
1762       return;
1763    shader_info *info = &ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info;
1764    if (info->outputs_written & (1 << FRAG_RESULT_SAMPLE_MASK)) {
1765       bool samples = zink_get_fs_base_key(ctx)->samples;
1766       if (samples != (ctx->fb_state.samples > 1))
1767          zink_set_fs_base_key(ctx)->samples = ctx->fb_state.samples > 1;
1768    }
1769 }
1770
1771 void zink_update_gs_key_rectangular_line(struct zink_context *ctx)
1772 {
1773    bool line_rectangular = zink_get_gs_key(ctx)->line_rectangular;
1774    if (line_rectangular != ctx->rast_state->base.line_rectangular)
1775       zink_set_gs_key(ctx)->line_rectangular = ctx->rast_state->base.line_rectangular;
1776 }
1777
1778 static void
1779 zink_bind_fs_state(struct pipe_context *pctx,
1780                    void *cso)
1781 {
1782    struct zink_context *ctx = zink_context(pctx);
1783    if (!cso && !ctx->gfx_stages[MESA_SHADER_FRAGMENT])
1784       return;
1785    unsigned shadow_mask = ctx->gfx_stages[MESA_SHADER_FRAGMENT] ? ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask : 0;
1786    bind_gfx_stage(ctx, MESA_SHADER_FRAGMENT, cso);
1787    ctx->fbfetch_outputs = 0;
1788    if (cso) {
1789       shader_info *info = &ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info;
1790       if (info->fs.uses_fbfetch_output) {
1791          if (info->outputs_read & (BITFIELD_BIT(FRAG_RESULT_DEPTH) | BITFIELD_BIT(FRAG_RESULT_STENCIL)))
1792             ctx->fbfetch_outputs |= BITFIELD_BIT(PIPE_MAX_COLOR_BUFS);
1793          ctx->fbfetch_outputs |= info->outputs_read >> FRAG_RESULT_DATA0;
1794       }
1795       zink_update_fs_key_samples(ctx);
1796       if (zink_screen(pctx->screen)->info.have_EXT_rasterization_order_attachment_access) {
1797          if (ctx->gfx_pipeline_state.rast_attachment_order != info->fs.uses_fbfetch_output)
1798             ctx->gfx_pipeline_state.dirty = true;
1799          ctx->gfx_pipeline_state.rast_attachment_order = info->fs.uses_fbfetch_output;
1800       }
1801       zink_set_zs_needs_shader_swizzle_key(ctx, MESA_SHADER_FRAGMENT, false);
1802       if (shadow_mask != ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask &&
1803           !zink_screen(pctx->screen)->driver_workarounds.needs_zs_shader_swizzle)
1804          zink_update_shadow_samplerviews(ctx, shadow_mask | ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask);
1805       if (!ctx->track_renderpasses && !ctx->blitting)
1806          zink_parse_tc_info(ctx);
1807    }
1808    zink_update_fbfetch(ctx);
1809 }
1810
1811 static void
1812 zink_bind_gs_state(struct pipe_context *pctx,
1813                    void *cso)
1814 {
1815    struct zink_context *ctx = zink_context(pctx);
1816    if (!cso && !ctx->gfx_stages[MESA_SHADER_GEOMETRY])
1817       return;
1818    bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, cso);
1819    bind_last_vertex_stage(ctx, MESA_SHADER_GEOMETRY, NULL);
1820 }
1821
1822 static void
1823 zink_bind_tcs_state(struct pipe_context *pctx,
1824                    void *cso)
1825 {
1826    bind_gfx_stage(zink_context(pctx), MESA_SHADER_TESS_CTRL, cso);
1827 }
1828
1829 static void
1830 zink_bind_tes_state(struct pipe_context *pctx,
1831                    void *cso)
1832 {
1833    struct zink_context *ctx = zink_context(pctx);
1834    if (!cso && !ctx->gfx_stages[MESA_SHADER_TESS_EVAL])
1835       return;
1836    if (!!ctx->gfx_stages[MESA_SHADER_TESS_EVAL] != !!cso) {
1837       if (!cso) {
1838          /* if unsetting a TESS that uses a generated TCS, ensure the TCS is unset */
1839          if (ctx->gfx_stages[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs)
1840             ctx->gfx_stages[MESA_SHADER_TESS_CTRL] = NULL;
1841       }
1842    }
1843    struct zink_shader *prev_shader = ctx->gfx_stages[MESA_SHADER_TESS_EVAL];
1844    bind_gfx_stage(ctx, MESA_SHADER_TESS_EVAL, cso);
1845    bind_last_vertex_stage(ctx, MESA_SHADER_TESS_EVAL, prev_shader);
1846 }
1847
1848 static void *
1849 zink_create_cs_state(struct pipe_context *pctx,
1850                      const struct pipe_compute_state *shader)
1851 {
1852    struct nir_shader *nir;
1853    if (shader->ir_type != PIPE_SHADER_IR_NIR)
1854       nir = zink_tgsi_to_nir(pctx->screen, shader->prog);
1855    else
1856       nir = (struct nir_shader *)shader->prog;
1857
1858    if (nir->info.uses_bindless)
1859       zink_descriptors_init_bindless(zink_context(pctx));
1860
1861    return create_compute_program(zink_context(pctx), nir);
1862 }
1863
1864 static void
1865 zink_bind_cs_state(struct pipe_context *pctx,
1866                    void *cso)
1867 {
1868    struct zink_context *ctx = zink_context(pctx);
1869    struct zink_compute_program *comp = cso;
1870    if (comp && comp->num_inlinable_uniforms)
1871       ctx->shader_has_inlinable_uniforms_mask |= 1 << MESA_SHADER_COMPUTE;
1872    else
1873       ctx->shader_has_inlinable_uniforms_mask &= ~(1 << MESA_SHADER_COMPUTE);
1874
1875    if (ctx->curr_compute) {
1876       zink_batch_reference_program(&ctx->batch, &ctx->curr_compute->base);
1877       ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
1878       ctx->compute_pipeline_state.module = VK_NULL_HANDLE;
1879       ctx->compute_pipeline_state.module_hash = 0;
1880    }
1881    ctx->compute_pipeline_state.dirty = true;
1882    ctx->curr_compute = comp;
1883    if (comp && comp != ctx->curr_compute) {
1884       ctx->compute_pipeline_state.module_hash = ctx->curr_compute->curr->hash;
1885       if (util_queue_fence_is_signalled(&comp->base.cache_fence))
1886          ctx->compute_pipeline_state.module = ctx->curr_compute->curr->shader;
1887       ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
1888       if (ctx->compute_pipeline_state.key.base.nonseamless_cube_mask)
1889          ctx->compute_dirty = true;
1890    }
1891    zink_select_launch_grid(ctx);
1892 }
1893
1894 static void
1895 zink_delete_cs_shader_state(struct pipe_context *pctx, void *cso)
1896 {
1897    struct zink_compute_program *comp = cso;
1898    zink_compute_program_reference(zink_screen(pctx->screen), &comp, NULL);
1899 }
1900
1901 void
1902 zink_delete_shader_state(struct pipe_context *pctx, void *cso)
1903 {
1904    zink_gfx_shader_free(zink_screen(pctx->screen), cso);
1905 }
1906
1907 void *
1908 zink_create_gfx_shader_state(struct pipe_context *pctx, const struct pipe_shader_state *shader)
1909 {
1910    nir_shader *nir;
1911    if (shader->type != PIPE_SHADER_IR_NIR)
1912       nir = zink_tgsi_to_nir(pctx->screen, shader->tokens);
1913    else
1914       nir = (struct nir_shader *)shader->ir.nir;
1915
1916    if (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_fbfetch_output)
1917       zink_descriptor_util_init_fbfetch(zink_context(pctx));
1918    if (nir->info.uses_bindless)
1919       zink_descriptors_init_bindless(zink_context(pctx));
1920
1921    void *ret = zink_shader_create(zink_screen(pctx->screen), nir, &shader->stream_output);
1922    ralloc_free(nir);
1923    return ret;
1924 }
1925
1926 static void
1927 zink_delete_cached_shader_state(struct pipe_context *pctx, void *cso)
1928 {
1929    struct zink_screen *screen = zink_screen(pctx->screen);
1930    util_shader_reference(pctx, &screen->shaders, &cso, NULL);
1931 }
1932
1933 static void *
1934 zink_create_cached_shader_state(struct pipe_context *pctx, const struct pipe_shader_state *shader)
1935 {
1936    bool cache_hit;
1937    struct zink_screen *screen = zink_screen(pctx->screen);
1938    return util_live_shader_cache_get(pctx, &screen->shaders, shader, &cache_hit);
1939 }
1940
1941 /* caller must lock prog->libs->lock */
1942 struct zink_gfx_library_key *
1943 zink_create_pipeline_lib(struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
1944 {
1945    struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key);
1946    if (!gkey) {
1947       mesa_loge("ZINK: failed to allocate gkey!");
1948       return NULL;
1949    }
1950
1951    gkey->optimal_key = state->optimal_key;
1952    assert(gkey->optimal_key);
1953    memcpy(gkey->modules, prog->modules, sizeof(gkey->modules));
1954    gkey->pipeline = zink_create_gfx_pipeline_library(screen, prog);
1955    _mesa_set_add(&prog->libs->libs, gkey);
1956    return gkey;
1957 }
1958
1959 static const char *
1960 print_exe_stages(VkShaderStageFlags stages)
1961 {
1962    if (stages == VK_SHADER_STAGE_VERTEX_BIT)
1963       return "VS";
1964    if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_GEOMETRY_BIT))
1965       return "VS+GS";
1966    if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))
1967       return "VS+TCS+TES";
1968    if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_GEOMETRY_BIT))
1969       return "VS+TCS+TES+GS";
1970    if (stages == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1971       return "TCS";
1972    if (stages == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1973       return "TES";
1974    if (stages == VK_SHADER_STAGE_GEOMETRY_BIT)
1975       return "GS";
1976    if (stages == VK_SHADER_STAGE_FRAGMENT_BIT)
1977       return "FS";
1978    if (stages == VK_SHADER_STAGE_COMPUTE_BIT)
1979       return "CS";
1980    unreachable("unhandled combination of stages!");
1981 }
1982
1983 static void
1984 print_pipeline_stats(struct zink_screen *screen, VkPipeline pipeline)
1985 {
1986    VkPipelineInfoKHR pinfo = {
1987      VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR,
1988      NULL,
1989      pipeline
1990    };
1991    unsigned exe_count = 0;
1992    VkPipelineExecutablePropertiesKHR props[10] = {0};
1993    for (unsigned i = 0; i < ARRAY_SIZE(props); i++) {
1994       props[i].sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_PROPERTIES_KHR;
1995       props[i].pNext = NULL;
1996    }
1997    VKSCR(GetPipelineExecutablePropertiesKHR)(screen->dev, &pinfo, &exe_count, NULL);
1998    VKSCR(GetPipelineExecutablePropertiesKHR)(screen->dev, &pinfo, &exe_count, props);
1999    printf("PIPELINE STATISTICS:");
2000    for (unsigned e = 0; e < exe_count; e++) {
2001       VkPipelineExecutableInfoKHR info = {
2002          VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR,
2003          NULL,
2004          pipeline,
2005          e
2006       };
2007       unsigned count = 0;
2008       printf("\n\t%s (%s): ", print_exe_stages(props[e].stages), props[e].name);
2009       VkPipelineExecutableStatisticKHR *stats = NULL;
2010       VKSCR(GetPipelineExecutableStatisticsKHR)(screen->dev, &info, &count, NULL);
2011       stats = calloc(count, sizeof(VkPipelineExecutableStatisticKHR));
2012       if (!stats) {
2013          mesa_loge("ZINK: failed to allocate stats!");
2014          return;
2015       }
2016
2017       for (unsigned i = 0; i < count; i++)
2018          stats[i].sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_STATISTIC_KHR;
2019       VKSCR(GetPipelineExecutableStatisticsKHR)(screen->dev, &info, &count, stats);
2020
2021       for (unsigned i = 0; i < count; i++) {
2022          if (i)
2023             printf(", ");
2024          switch (stats[i].format) {
2025          case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_BOOL32_KHR:
2026             printf("%s: %u", stats[i].name, stats[i].value.b32);
2027             break;
2028          case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR:
2029             printf("%s: %" PRIi64, stats[i].name, stats[i].value.i64);
2030             break;
2031          case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR:
2032             printf("%s: %" PRIu64, stats[i].name, stats[i].value.u64);
2033             break;
2034          case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR:
2035             printf("%s: %g", stats[i].name, stats[i].value.f64);
2036             break;
2037          default:
2038             unreachable("unknown statistic");
2039          }
2040       }
2041    }
2042    printf("\n");
2043 }
2044
2045 static void
2046 precompile_job(void *data, void *gdata, int thread_index)
2047 {
2048    struct zink_screen *screen = gdata;
2049    struct zink_gfx_program *prog = data;
2050
2051    struct zink_gfx_pipeline_state state = {0};
2052    state.shader_keys_optimal.key.vs_base.last_vertex_stage = true;
2053    state.shader_keys_optimal.key.tcs.patch_vertices = 3; //random guess, generated tcs precompile is hard
2054    state.optimal_key = state.shader_keys_optimal.key.val;
2055    generate_gfx_program_modules_optimal(NULL, screen, prog, &state);
2056    zink_screen_get_pipeline_cache(screen, &prog->base, true);
2057    simple_mtx_lock(&prog->libs->lock);
2058    zink_create_pipeline_lib(screen, prog, &state);
2059    simple_mtx_unlock(&prog->libs->lock);
2060    zink_screen_update_pipeline_cache(screen, &prog->base, true);
2061 }
2062
2063 static void
2064 precompile_separate_shader_job(void *data, void *gdata, int thread_index)
2065 {
2066    struct zink_screen *screen = gdata;
2067    struct zink_shader *zs = data;
2068
2069    zs->precompile.obj = zink_shader_compile_separate(screen, zs);
2070    if (!screen->info.have_EXT_shader_object) {
2071       VkShaderModule mods[ZINK_GFX_SHADER_COUNT] = {0};
2072       mods[zs->info.stage] = zs->precompile.obj.mod;
2073       zs->precompile.gpl = zink_create_gfx_pipeline_separate(screen, mods, zs->precompile.layout);
2074    }
2075 }
2076
2077 static void
2078 zink_link_gfx_shader(struct pipe_context *pctx, void **shaders)
2079 {
2080    struct zink_context *ctx = zink_context(pctx);
2081    struct zink_shader **zshaders = (struct zink_shader **)shaders;
2082    if (shaders[MESA_SHADER_COMPUTE])
2083       return;
2084    /* can't precompile fixedfunc */
2085    if (!shaders[MESA_SHADER_VERTEX] || !shaders[MESA_SHADER_FRAGMENT]) {
2086       if (shaders[MESA_SHADER_VERTEX] || shaders[MESA_SHADER_FRAGMENT]) {
2087          struct zink_shader *zs = shaders[MESA_SHADER_VERTEX] ? shaders[MESA_SHADER_VERTEX] : shaders[MESA_SHADER_FRAGMENT];
2088          if (zs->info.separate_shader && !zs->precompile.obj.mod && util_queue_fence_is_signalled(&zs->precompile.fence) &&
2089              zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB &&
2090              /* sample shading can't precompile */
2091              (!shaders[MESA_SHADER_FRAGMENT] || !zs->info.fs.uses_sample_shading))
2092             util_queue_add_job(&zink_screen(pctx->screen)->cache_get_thread, zs, &zs->precompile.fence, precompile_separate_shader_job, NULL, 0);
2093       }
2094       return;
2095    }
2096    unsigned hash = 0;
2097    unsigned shader_stages = 0;
2098    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
2099       if (zshaders[i]) {
2100          hash ^= zshaders[i]->hash;
2101          shader_stages |= BITFIELD_BIT(i);
2102       }
2103    }
2104    unsigned tess_stages = BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL);
2105    unsigned tess = shader_stages & tess_stages;
2106    /* can't do fixedfunc tes either */
2107    if (tess && !shaders[MESA_SHADER_TESS_EVAL])
2108       return;
2109    struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(shader_stages)];
2110    simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
2111    /* link can be called repeatedly with the same shaders: ignore */
2112    if (_mesa_hash_table_search_pre_hashed(ht, hash, shaders)) {
2113       simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
2114       return;
2115    }
2116    struct zink_gfx_program *prog = zink_create_gfx_program(ctx, zshaders, 3, hash);
2117    u_foreach_bit(i, shader_stages)
2118       assert(prog->shaders[i]);
2119    _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
2120    prog->base.removed = false;
2121    simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
2122    if (zink_debug & ZINK_DEBUG_SHADERDB) {
2123       struct zink_screen *screen = zink_screen(pctx->screen);
2124       if (screen->optimal_keys)
2125          generate_gfx_program_modules_optimal(ctx, screen,  prog, &ctx->gfx_pipeline_state);
2126       else
2127          generate_gfx_program_modules(ctx, screen,  prog, &ctx->gfx_pipeline_state);
2128       VkPipeline pipeline = zink_create_gfx_pipeline(screen, prog, &ctx->gfx_pipeline_state, ctx->gfx_pipeline_state.element_state->binding_map,  shaders[MESA_SHADER_TESS_EVAL] ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, true);
2129       print_pipeline_stats(screen, pipeline);
2130    } else {
2131       util_queue_add_job(&zink_screen(pctx->screen)->cache_get_thread, prog, &prog->base.cache_fence, precompile_job, NULL, 0);
2132    }
2133 }
2134
2135 void
2136 zink_program_init(struct zink_context *ctx)
2137 {
2138    ctx->base.create_vs_state = zink_create_cached_shader_state;
2139    ctx->base.bind_vs_state = zink_bind_vs_state;
2140    ctx->base.delete_vs_state = zink_delete_cached_shader_state;
2141
2142    ctx->base.create_fs_state = zink_create_cached_shader_state;
2143    ctx->base.bind_fs_state = zink_bind_fs_state;
2144    ctx->base.delete_fs_state = zink_delete_cached_shader_state;
2145
2146    ctx->base.create_gs_state = zink_create_cached_shader_state;
2147    ctx->base.bind_gs_state = zink_bind_gs_state;
2148    ctx->base.delete_gs_state = zink_delete_cached_shader_state;
2149
2150    ctx->base.create_tcs_state = zink_create_cached_shader_state;
2151    ctx->base.bind_tcs_state = zink_bind_tcs_state;
2152    ctx->base.delete_tcs_state = zink_delete_cached_shader_state;
2153
2154    ctx->base.create_tes_state = zink_create_cached_shader_state;
2155    ctx->base.bind_tes_state = zink_bind_tes_state;
2156    ctx->base.delete_tes_state = zink_delete_cached_shader_state;
2157
2158    ctx->base.create_compute_state = zink_create_cs_state;
2159    ctx->base.bind_compute_state = zink_bind_cs_state;
2160    ctx->base.delete_compute_state = zink_delete_cs_shader_state;
2161
2162    if (zink_screen(ctx->base.screen)->info.have_EXT_vertex_input_dynamic_state)
2163       _mesa_set_init(&ctx->gfx_inputs, ctx, hash_gfx_input_dynamic, equals_gfx_input_dynamic);
2164    else
2165       _mesa_set_init(&ctx->gfx_inputs, ctx, hash_gfx_input, equals_gfx_input);
2166    if (zink_screen(ctx->base.screen)->have_full_ds3)
2167       _mesa_set_init(&ctx->gfx_outputs, ctx, hash_gfx_output_ds3, equals_gfx_output_ds3);
2168    else
2169       _mesa_set_init(&ctx->gfx_outputs, ctx, hash_gfx_output, equals_gfx_output);
2170    /* validate struct packing */
2171    STATIC_ASSERT(offsetof(struct zink_gfx_output_key, sample_mask) == sizeof(uint32_t));
2172    STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, vertex_buffers_enabled_mask) - offsetof(struct zink_gfx_pipeline_state, input) ==
2173                  offsetof(struct zink_gfx_input_key, vertex_buffers_enabled_mask) - offsetof(struct zink_gfx_input_key, input));
2174    STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, vertex_strides) - offsetof(struct zink_gfx_pipeline_state, input) ==
2175                  offsetof(struct zink_gfx_input_key, vertex_strides) - offsetof(struct zink_gfx_input_key, input));
2176    STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, element_state) - offsetof(struct zink_gfx_pipeline_state, input) ==
2177                  offsetof(struct zink_gfx_input_key, element_state) - offsetof(struct zink_gfx_input_key, input));
2178
2179    STATIC_ASSERT(sizeof(union zink_shader_key_optimal) == sizeof(uint32_t));
2180
2181    if (zink_screen(ctx->base.screen)->info.have_EXT_graphics_pipeline_library || zink_debug & ZINK_DEBUG_SHADERDB)
2182       ctx->base.link_shader = zink_link_gfx_shader;
2183 }
2184
2185 bool
2186 zink_set_rasterizer_discard(struct zink_context *ctx, bool disable)
2187 {
2188    bool value = disable ? false : (ctx->rast_state ? ctx->rast_state->base.rasterizer_discard : false);
2189    bool changed = ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard != value;
2190    ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard = value;
2191    if (!changed)
2192       return false;
2193    if (!zink_screen(ctx->base.screen)->info.have_EXT_extended_dynamic_state2)
2194       ctx->gfx_pipeline_state.dirty |= true;
2195    ctx->rasterizer_discard_changed = true;
2196    return true;
2197 }
2198
2199 void
2200 zink_driver_thread_add_job(struct pipe_screen *pscreen, void *data,
2201                            struct util_queue_fence *fence,
2202                            pipe_driver_thread_func execute,
2203                            pipe_driver_thread_func cleanup,
2204                            const size_t job_size)
2205 {
2206    struct zink_screen *screen = zink_screen(pscreen);
2207    util_queue_add_job(&screen->cache_get_thread, data, fence, execute, cleanup, job_size);
2208 }
2209
2210 static bool
2211 has_edge_flags(struct zink_context *ctx)
2212 {
2213    switch(ctx->gfx_pipeline_state.gfx_prim_mode) {
2214    case PIPE_PRIM_POINTS:
2215    case PIPE_PRIM_LINE_STRIP:
2216    case PIPE_PRIM_LINE_STRIP_ADJACENCY:
2217    case PIPE_PRIM_LINES:
2218    case PIPE_PRIM_LINE_LOOP:
2219    case PIPE_PRIM_LINES_ADJACENCY:
2220    case PIPE_PRIM_TRIANGLE_STRIP:
2221    case PIPE_PRIM_TRIANGLE_FAN:
2222    case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
2223    case PIPE_PRIM_QUAD_STRIP:
2224    case PIPE_PRIM_PATCHES:
2225       return false;
2226    case PIPE_PRIM_TRIANGLES:
2227    case PIPE_PRIM_TRIANGLES_ADJACENCY:
2228    case PIPE_PRIM_QUADS:
2229    case PIPE_PRIM_POLYGON:
2230    case PIPE_PRIM_MAX:
2231    default:
2232       break;
2233    }
2234    return ctx->gfx_pipeline_state.rast_prim == PIPE_PRIM_LINES &&
2235           ctx->gfx_stages[MESA_SHADER_VERTEX]->has_edgeflags;
2236 }
2237
2238 static enum zink_rast_prim
2239 zink_rast_prim_for_pipe(enum pipe_prim_type prim)
2240 {
2241    switch (prim) {
2242    case PIPE_PRIM_POINTS:
2243       return ZINK_PRIM_POINTS;
2244    case PIPE_PRIM_LINES:
2245       return ZINK_PRIM_LINES;
2246    case PIPE_PRIM_TRIANGLES:
2247    default:
2248       return ZINK_PRIM_TRIANGLES;
2249    }
2250 }
2251
2252 static enum pipe_prim_type
2253 zink_tess_prim_type(struct zink_shader *tess)
2254 {
2255    if (tess->info.tess.point_mode)
2256       return PIPE_PRIM_POINTS;
2257    else {
2258       switch (tess->info.tess._primitive_mode) {
2259       case TESS_PRIMITIVE_ISOLINES:
2260          return PIPE_PRIM_LINES;
2261       case TESS_PRIMITIVE_TRIANGLES:
2262       case TESS_PRIMITIVE_QUADS:
2263          return PIPE_PRIM_TRIANGLES;
2264       default:
2265          return PIPE_PRIM_MAX;
2266       }
2267    }
2268 }
2269
2270 static inline void
2271 zink_add_inline_uniform(nir_shader *shader, int offset)
2272 {
2273    shader->info.inlinable_uniform_dw_offsets[shader->info.num_inlinable_uniforms] = offset;
2274    ++shader->info.num_inlinable_uniforms;
2275 }
2276
2277 static unsigned
2278 encode_lower_pv_mode(enum pipe_prim_type prim_type)
2279 {
2280    switch (prim_type) {
2281    case PIPE_PRIM_TRIANGLE_STRIP:
2282    case PIPE_PRIM_QUAD_STRIP:
2283       return ZINK_PVE_PRIMITIVE_TRISTRIP;
2284    case PIPE_PRIM_TRIANGLE_FAN:
2285       return ZINK_PVE_PRIMITIVE_FAN;
2286    default:
2287       return ZINK_PVE_PRIMITIVE_SIMPLE;
2288    }
2289 }
2290
2291 void
2292 zink_set_primitive_emulation_keys(struct zink_context *ctx)
2293 {
2294    struct zink_screen *screen = zink_screen(ctx->base.screen);
2295    bool lower_line_stipple = false, lower_line_smooth = false;
2296    unsigned lower_pv_mode = 0;
2297    if (!screen->optimal_keys) {
2298       lower_line_stipple = ctx->gfx_pipeline_state.rast_prim == PIPE_PRIM_LINES &&
2299                                 screen->driver_workarounds.no_linestipple &&
2300                                 ctx->rast_state->base.line_stipple_enable &&
2301                                 !ctx->num_so_targets;
2302
2303       bool lower_point_smooth = ctx->gfx_pipeline_state.rast_prim == PIPE_PRIM_POINTS &&
2304                                 screen->driconf.emulate_point_smooth &&
2305                                 ctx->rast_state->base.point_smooth;
2306       if (zink_get_fs_key(ctx)->lower_line_stipple != lower_line_stipple) {
2307          assert(zink_get_gs_key(ctx)->lower_line_stipple ==
2308                 zink_get_fs_key(ctx)->lower_line_stipple);
2309          zink_set_fs_key(ctx)->lower_line_stipple = lower_line_stipple;
2310          zink_set_gs_key(ctx)->lower_line_stipple = lower_line_stipple;
2311       }
2312
2313       lower_line_smooth = screen->driver_workarounds.no_linesmooth &&
2314                                ctx->rast_state->base.line_smooth &&
2315                                !ctx->num_so_targets;
2316
2317       if (zink_get_fs_key(ctx)->lower_line_smooth != lower_line_smooth) {
2318          assert(zink_get_gs_key(ctx)->lower_line_smooth ==
2319                 zink_get_fs_key(ctx)->lower_line_smooth);
2320          zink_set_fs_key(ctx)->lower_line_smooth = lower_line_smooth;
2321          zink_set_gs_key(ctx)->lower_line_smooth = lower_line_smooth;
2322       }
2323
2324       if (zink_get_fs_key(ctx)->lower_point_smooth != lower_point_smooth) {
2325          zink_set_fs_key(ctx)->lower_point_smooth = lower_point_smooth;
2326       }
2327
2328       lower_pv_mode = ctx->gfx_pipeline_state.dyn_state3.pv_last &&
2329                       !screen->info.have_EXT_provoking_vertex;
2330       if (lower_pv_mode)
2331          lower_pv_mode = encode_lower_pv_mode(ctx->gfx_pipeline_state.gfx_prim_mode);
2332
2333       if (zink_get_gs_key(ctx)->lower_pv_mode != lower_pv_mode)
2334          zink_set_gs_key(ctx)->lower_pv_mode = lower_pv_mode;
2335    }
2336
2337    bool lower_edge_flags = has_edge_flags(ctx);
2338
2339    bool lower_quad_prim = ctx->gfx_pipeline_state.gfx_prim_mode == PIPE_PRIM_QUADS;
2340
2341    bool lower_filled_quad =  lower_quad_prim &&
2342       ctx->gfx_pipeline_state.rast_prim == PIPE_PRIM_TRIANGLES;
2343
2344    if (lower_line_stipple || lower_line_smooth ||
2345        lower_edge_flags || lower_quad_prim ||
2346        lower_pv_mode || zink_get_gs_key(ctx)->lower_gl_point) {
2347       enum pipe_shader_type prev_vertex_stage =
2348          ctx->gfx_stages[MESA_SHADER_TESS_EVAL] ?
2349             MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
2350       enum zink_rast_prim zink_prim_type =
2351          zink_rast_prim_for_pipe(ctx->gfx_pipeline_state.rast_prim);
2352
2353       //when using transform feedback primitives must be tessellated
2354       lower_filled_quad |= lower_quad_prim && ctx->gfx_stages[prev_vertex_stage]->info.has_transform_feedback_varyings;
2355
2356       if (!ctx->gfx_stages[MESA_SHADER_GEOMETRY] ||
2357           (ctx->gfx_stages[MESA_SHADER_GEOMETRY]->info.gs.input_primitive != ctx->gfx_pipeline_state.gfx_prim_mode)) {
2358
2359          if (!ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type]) {
2360             nir_shader *prev_stage = zink_shader_deserialize(screen, ctx->gfx_stages[prev_vertex_stage]);
2361             nir_shader *nir;
2362             if (lower_filled_quad) {
2363                nir = zink_create_quads_emulation_gs(
2364                   &screen->nir_options,
2365                   prev_stage);
2366             } else {
2367                enum pipe_prim_type prim = ctx->gfx_pipeline_state.gfx_prim_mode;
2368                if (prev_vertex_stage == MESA_SHADER_TESS_EVAL)
2369                   prim = zink_tess_prim_type(ctx->gfx_stages[MESA_SHADER_TESS_EVAL]);
2370                nir = nir_create_passthrough_gs(
2371                   &screen->nir_options,
2372                   prev_stage,
2373                   prim,
2374                   lower_edge_flags,
2375                   lower_line_stipple || lower_quad_prim);
2376             }
2377             zink_lower_system_values_to_inlined_uniforms(nir);
2378
2379             zink_add_inline_uniform(nir, ZINK_INLINE_VAL_FLAT_MASK);
2380             zink_add_inline_uniform(nir, ZINK_INLINE_VAL_PV_LAST_VERT);
2381             ralloc_free(prev_stage);
2382             struct zink_shader *shader = zink_shader_create(screen, nir, &ctx->gfx_stages[prev_vertex_stage]->sinfo.so_info);
2383             shader->needs_inlining = true;
2384             ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type] = shader;
2385             shader->non_fs.is_generated = true;
2386             shader->non_fs.parent = ctx->gfx_stages[prev_vertex_stage];
2387             shader->can_inline = true;
2388             shader->sinfo.so_info = ctx->gfx_stages[prev_vertex_stage]->sinfo.so_info;
2389          }
2390
2391          ctx->base.bind_gs_state(&ctx->base,
2392                                  ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type]);
2393          ctx->is_generated_gs_bound = true;
2394       }
2395
2396       ctx->base.set_inlinable_constants(&ctx->base, MESA_SHADER_GEOMETRY, 2,
2397                                         (uint32_t []){ctx->gfx_stages[MESA_SHADER_FRAGMENT]->flat_flags,
2398                                                       ctx->gfx_pipeline_state.dyn_state3.pv_last});
2399    } else if (ctx->gfx_stages[MESA_SHADER_GEOMETRY] &&
2400               ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated)
2401          ctx->base.bind_gs_state(&ctx->base, NULL);
2402 }
2403
2404 void
2405 zink_update_fs_key_single_sample(struct zink_context *ctx)
2406 {
2407    bool single_sample = ctx->rast_state && !ctx->rast_state->base.multisample &&
2408                         ctx->gfx_pipeline_state.rast_samples != 0;
2409    if (zink_get_fs_base_key(ctx)->single_sample != single_sample)
2410       zink_set_fs_base_key(ctx)->single_sample = single_sample;
2411 }