src/gallium/drivers/zink/zink_program.c

   1 /*
   2  * Copyright 2018 Collabora Ltd.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * on the rights to use, copy, modify, merge, publish, distribute, sub
   8  * license, and/or sell copies of the Software, and to permit persons to whom
   9  * the Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #include "zink_program.h"
  25
  26 #include "zink_compiler.h"
  27 #include "zink_context.h"
  28 #include "zink_descriptors.h"
  29 #include "zink_helpers.h"
  30 #include "zink_pipeline.h"
  31 #include "zink_render_pass.h"
  32 #include "zink_resource.h"
  33 #include "zink_screen.h"
  34 #include "zink_state.h"
  35 #include "zink_inlines.h"
  36
  37 #include "util/u_debug.h"
  38 #include "util/u_memory.h"
  39 #include "util/u_prim.h"
  40 #include "nir_serialize.h"
  41 #include "nir/nir_draw_helpers.h"
  42
  43 /* for pipeline cache */
  44 #define XXH_INLINE_ALL
  45 #include "util/xxhash.h"
  46
  47 static void
  48 precompile_job(void *data, void *gdata, int thread_index);
  49 struct zink_gfx_program *
  50 create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch);
  51
  52 void
  53 debug_describe_zink_gfx_program(char *buf, const struct zink_gfx_program *ptr)
  54 {
  55    sprintf(buf, "zink_gfx_program");
  56 }
  57
  58 void
  59 debug_describe_zink_compute_program(char *buf, const struct zink_compute_program *ptr)
  60 {
  61    sprintf(buf, "zink_compute_program");
  62 }
  63
  64 ALWAYS_INLINE static bool
  65 shader_key_matches_tcs_nongenerated(const struct zink_shader_module *zm, const struct zink_shader_key *key, unsigned num_uniforms)
  66 {
  67    if (zm->num_uniforms != num_uniforms || zm->has_nonseamless != !!key->base.nonseamless_cube_mask)
  68       return false;
  69    const uint32_t nonseamless_size = zm->has_nonseamless ? sizeof(uint32_t) : 0;
  70    return (!nonseamless_size || !memcmp(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size)) &&
  71           (!num_uniforms || !memcmp(zm->key + zm->key_size + nonseamless_size,
  72                                     key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t)));
  73 }
  74
  75 ALWAYS_INLINE static bool
  76 shader_key_matches(const struct zink_shader_module *zm,
  77                    const struct zink_shader_key *key, unsigned num_uniforms,
  78                    bool has_inline, bool has_nonseamless)
  79 {
  80    const uint32_t nonseamless_size = !has_nonseamless && zm->has_nonseamless ? sizeof(uint32_t) : 0;
  81    if (has_inline) {
  82       if (zm->num_uniforms != num_uniforms ||
  83           (num_uniforms &&
  84            memcmp(zm->key + zm->key_size + nonseamless_size,
  85                   key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t))))
  86          return false;
  87    }
  88    if (!has_nonseamless) {
  89       if (zm->has_nonseamless != !!key->base.nonseamless_cube_mask ||
  90           (nonseamless_size && memcmp(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size)))
  91          return false;
  92    }
  93    return !memcmp(zm->key, key, zm->key_size);
  94 }
  95
  96 static uint32_t
  97 shader_module_hash(const struct zink_shader_module *zm)
  98 {
  99    const uint32_t nonseamless_size = zm->has_nonseamless ? sizeof(uint32_t) : 0;
 100    unsigned key_size = zm->key_size + nonseamless_size + zm->num_uniforms * sizeof(uint32_t);
 101    return _mesa_hash_data(zm->key, key_size);
 102 }
 103
 104 ALWAYS_INLINE static void
 105 gather_shader_module_info(struct zink_context *ctx, struct zink_screen *screen,
 106                           struct zink_shader *zs, struct zink_gfx_program *prog,
 107                           struct zink_gfx_pipeline_state *state,
 108                           bool has_inline, //is inlining enabled?
 109                           bool has_nonseamless, //is nonseamless ext present?
 110                           unsigned *inline_size, unsigned *nonseamless_size)
 111 {
 112    gl_shader_stage stage = zs->nir->info.stage;
 113    struct zink_shader_key *key = &state->shader_keys.key[stage];
 114    if (has_inline && ctx && zs->nir->info.num_inlinable_uniforms &&
 115        ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(stage)) {
 116       if (zs->can_inline && (screen->is_cpu || prog->inlined_variant_count[stage] < ZINK_MAX_INLINED_VARIANTS))
 117          *inline_size = zs->nir->info.num_inlinable_uniforms;
 118       else
 119          key->inline_uniforms = false;
 120    }
 121    if (!has_nonseamless && key->base.nonseamless_cube_mask)
 122       *nonseamless_size = sizeof(uint32_t);
 123 }
 124
 125 ALWAYS_INLINE static struct zink_shader_module *
 126 create_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen,
 127                                struct zink_shader *zs, struct zink_gfx_program *prog,
 128                                gl_shader_stage stage,
 129                                struct zink_gfx_pipeline_state *state,
 130                                unsigned inline_size, unsigned nonseamless_size,
 131                                bool has_inline, //is inlining enabled?
 132                                bool has_nonseamless) //is nonseamless ext present?
 133 {
 134    VkShaderModule mod;
 135    struct zink_shader_module *zm;
 136    const struct zink_shader_key *key = &state->shader_keys.key[stage];
 137    /* non-generated tcs won't use the shader key */
 138    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
 139    const bool shadow_needs_shader_swizzle = stage == MESA_SHADER_FRAGMENT && key->key.fs.base.shadow_needs_shader_swizzle;
 140    zm = malloc(sizeof(struct zink_shader_module) + key->size +
 141                (!has_nonseamless ? nonseamless_size : 0) + inline_size * sizeof(uint32_t) +
 142                (shadow_needs_shader_swizzle ? sizeof(struct zink_fs_shadow_key) : 0));
 143    if (!zm) {
 144       return NULL;
 145    }
 146    unsigned patch_vertices = state->shader_keys.key[MESA_SHADER_TESS_CTRL ].key.tcs.patch_vertices;
 147    if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) {
 148       assert(ctx); //TODO async
 149       mod = zink_shader_tcs_compile(screen, zs, patch_vertices);
 150    } else {
 151       mod = zink_shader_compile(screen, zs, prog->nir[stage], key, &ctx->di.shadow);
 152    }
 153    if (!mod) {
 154       FREE(zm);
 155       return NULL;
 156    }
 157    zm->shader = mod;
 158    zm->num_uniforms = inline_size;
 159    if (!is_nongenerated_tcs) {
 160       zm->key_size = key->size;
 161       memcpy(zm->key, key, key->size);
 162    } else {
 163       zm->key_size = 0;
 164       memset(zm->key, 0, key->size);
 165    }
 166    if (!has_nonseamless && nonseamless_size) {
 167       /* nonseamless mask gets added to base key if it exists */
 168       memcpy(zm->key + key->size, &key->base.nonseamless_cube_mask, nonseamless_size);
 169    }
 170    zm->has_nonseamless = has_nonseamless ? 0 : !!nonseamless_size;
 171    if (inline_size)
 172       memcpy(zm->key + key->size + nonseamless_size, key->base.inlined_uniform_values, inline_size * sizeof(uint32_t));
 173    if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
 174       zm->hash = patch_vertices;
 175    else
 176       zm->hash = shader_module_hash(zm);
 177    if (unlikely(shadow_needs_shader_swizzle)) {
 178       memcpy(zm->key + key->size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.shadow, sizeof(struct zink_fs_shadow_key));
 179       zm->hash ^= _mesa_hash_data(&ctx->di.shadow, sizeof(struct zink_fs_shadow_key));
 180    }
 181    zm->default_variant = !inline_size && !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
 182    if (inline_size)
 183       prog->inlined_variant_count[stage]++;
 184    util_dynarray_append(&prog->shader_cache[stage][has_nonseamless ? 0 : !!nonseamless_size][!!inline_size], void*, zm);
 185    return zm;
 186 }
 187
 188 ALWAYS_INLINE static struct zink_shader_module *
 189 get_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen,
 190                             struct zink_shader *zs, struct zink_gfx_program *prog,
 191                             gl_shader_stage stage,
 192                             struct zink_gfx_pipeline_state *state,
 193                             unsigned inline_size, unsigned nonseamless_size,
 194                             bool has_inline, //is inlining enabled?
 195                             bool has_nonseamless) //is nonseamless ext present?
 196 {
 197    const struct zink_shader_key *key = &state->shader_keys.key[stage];
 198    /* non-generated tcs won't use the shader key */
 199    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
 200    const bool shadow_needs_shader_swizzle = stage == MESA_SHADER_FRAGMENT && unlikely(key->key.fs.base.shadow_needs_shader_swizzle);
 201
 202    struct util_dynarray *shader_cache = &prog->shader_cache[stage][!has_nonseamless ? !!nonseamless_size : 0][has_inline ? !!inline_size : 0];
 203    unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
 204    struct zink_shader_module **pzm = shader_cache->data;
 205    for (unsigned i = 0; i < count; i++) {
 206       struct zink_shader_module *iter = pzm[i];
 207       if (is_nongenerated_tcs) {
 208          if (!shader_key_matches_tcs_nongenerated(iter, key, has_inline ? !!inline_size : 0))
 209             continue;
 210       } else {
 211          if (stage == MESA_SHADER_VERTEX && iter->key_size != key->size)
 212             continue;
 213          if (!shader_key_matches(iter, key, inline_size, has_inline, has_nonseamless))
 214             continue;
 215          if (unlikely(shadow_needs_shader_swizzle)) {
 216             /* shadow swizzle data needs a manual compare since it's so fat */
 217             if (memcmp(iter->key + iter->key_size + nonseamless_size + iter->num_uniforms * sizeof(uint32_t),
 218                        &ctx->di.shadow, sizeof(struct zink_fs_shadow_key)))
 219                continue;
 220          }
 221       }
 222       if (i > 0) {
 223          struct zink_shader_module *zero = pzm[0];
 224          pzm[0] = iter;
 225          pzm[i] = zero;
 226       }
 227       return iter;
 228    }
 229
 230    return NULL;
 231 }
 232
 233 ALWAYS_INLINE static struct zink_shader_module *
 234 create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen *screen,
 235                                        struct zink_shader *zs, struct zink_gfx_program *prog,
 236                                        gl_shader_stage stage,
 237                                        struct zink_gfx_pipeline_state *state)
 238 {
 239    VkShaderModule mod;
 240    struct zink_shader_module *zm;
 241    uint16_t *key;
 242    unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
 243    bool shadow_needs_shader_swizzle = false;
 244    if (zs == prog->last_vertex_stage) {
 245       key = (uint16_t*)&state->shader_keys_optimal.key.vs_base;
 246    } else if (stage == MESA_SHADER_FRAGMENT) {
 247       key = (uint16_t*)&state->shader_keys_optimal.key.fs;
 248       shadow_needs_shader_swizzle = ctx ? ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle : false;
 249    } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) {
 250       key = (uint16_t*)&state->shader_keys_optimal.key.tcs;
 251    } else {
 252       key = NULL;
 253    }
 254    size_t key_size = sizeof(uint16_t);
 255    zm = calloc(1, sizeof(struct zink_shader_module) + (key ? key_size : 0) + (unlikely(shadow_needs_shader_swizzle) ? sizeof(struct zink_fs_shadow_key) : 0));
 256    if (!zm) {
 257       return NULL;
 258    }
 259    if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) {
 260       assert(ctx); //TODO async
 261       struct zink_tcs_key *tcs = (struct zink_tcs_key*)key;
 262       mod = zink_shader_tcs_compile(screen, zs, tcs->patch_vertices);
 263    } else {
 264       mod = zink_shader_compile(screen, zs, prog->nir[stage], (struct zink_shader_key*)key, shadow_needs_shader_swizzle ? &ctx->di.shadow : NULL);
 265    }
 266    if (!mod) {
 267       FREE(zm);
 268       return NULL;
 269    }
 270    zm->shader = mod;
 271    /* non-generated tcs won't use the shader key */
 272    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
 273    if (key && !is_nongenerated_tcs) {
 274       zm->key_size = key_size;
 275       uint16_t *data = (uint16_t*)zm->key;
 276       /* sanitize actual key bits */
 277       *data = (*key) & mask;
 278       if (unlikely(shadow_needs_shader_swizzle))
 279          memcpy(&data[1], &ctx->di.shadow, sizeof(struct zink_fs_shadow_key));
 280    }
 281    zm->default_variant = !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
 282    util_dynarray_append(&prog->shader_cache[stage][0][0], void*, zm);
 283    return zm;
 284 }
 285
 286 ALWAYS_INLINE static struct zink_shader_module *
 287 get_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen *screen,
 288                                     struct zink_shader *zs, struct zink_gfx_program *prog,
 289                                     gl_shader_stage stage,
 290                                     struct zink_gfx_pipeline_state *state)
 291 {
 292    /* non-generated tcs won't use the shader key */
 293    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
 294    bool shadow_needs_shader_swizzle = false;
 295    uint16_t *key;
 296    unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
 297    if (zs == prog->last_vertex_stage) {
 298       key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_base;
 299    } else if (stage == MESA_SHADER_FRAGMENT) {
 300       key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.fs;
 301       shadow_needs_shader_swizzle = ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle;
 302    } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) {
 303       key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs;
 304    } else {
 305       key = NULL;
 306    }
 307    struct util_dynarray *shader_cache = &prog->shader_cache[stage][0][0];
 308    unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
 309    struct zink_shader_module **pzm = shader_cache->data;
 310    for (unsigned i = 0; i < count; i++) {
 311       struct zink_shader_module *iter = pzm[i];
 312       if (is_nongenerated_tcs) {
 313          /* always match */
 314       } else if (key) {
 315          uint16_t val = (*key) & mask;
 316          /* no key is bigger than uint16_t */
 317          if (memcmp(iter->key, &val, sizeof(uint16_t)))
 318             continue;
 319          if (unlikely(shadow_needs_shader_swizzle)) {
 320             /* shadow swizzle data needs a manual compare since it's so fat */
 321             if (memcmp(iter->key + sizeof(uint16_t), &ctx->di.shadow, sizeof(struct zink_fs_shadow_key)))
 322                continue;
 323          }
 324       }
 325       if (i > 0) {
 326          struct zink_shader_module *zero = pzm[0];
 327          pzm[0] = iter;
 328          pzm[i] = zero;
 329       }
 330       return iter;
 331    }
 332
 333    return NULL;
 334 }
 335
 336 static void
 337 zink_destroy_shader_module(struct zink_screen *screen, struct zink_shader_module *zm)
 338 {
 339    VKSCR(DestroyShaderModule)(screen->dev, zm->shader, NULL);
 340    free(zm);
 341 }
 342
 343 static void
 344 destroy_shader_cache(struct zink_screen *screen, struct util_dynarray *sc)
 345 {
 346    while (util_dynarray_contains(sc, void*)) {
 347       struct zink_shader_module *zm = util_dynarray_pop(sc, struct zink_shader_module*);
 348       zink_destroy_shader_module(screen, zm);
 349    }
 350 }
 351
 352 ALWAYS_INLINE static void
 353 update_gfx_shader_modules(struct zink_context *ctx,
 354                       struct zink_screen *screen,
 355                       struct zink_gfx_program *prog, uint32_t mask,
 356                       struct zink_gfx_pipeline_state *state,
 357                       bool has_inline, //is inlining enabled?
 358                       bool has_nonseamless) //is nonseamless ext present?
 359 {
 360    bool hash_changed = false;
 361    bool default_variants = true;
 362    assert(prog->modules[MESA_SHADER_VERTEX]);
 363    uint32_t variant_hash = prog->last_variant_hash;
 364    for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
 365       if (!(mask & BITFIELD_BIT(i)))
 366          continue;
 367
 368       assert(prog->shaders[i]);
 369
 370       unsigned inline_size = 0, nonseamless_size = 0;
 371       gather_shader_module_info(ctx, screen, prog->shaders[i], prog, state, has_inline, has_nonseamless, &inline_size, &nonseamless_size);
 372       struct zink_shader_module *zm = get_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
 373                                                                   inline_size, nonseamless_size, has_inline, has_nonseamless);
 374       if (!zm)
 375          zm = create_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
 376                                              inline_size, nonseamless_size, has_inline, has_nonseamless);
 377       state->modules[i] = zm->shader;
 378       if (prog->modules[i] == zm->shader)
 379          continue;
 380       variant_hash ^= prog->module_hash[i];
 381       hash_changed = true;
 382       default_variants &= zm->default_variant;
 383       prog->modules[i] = zm->shader;
 384       prog->module_hash[i] = zm->hash;
 385       if (has_inline) {
 386          if (zm->num_uniforms)
 387             prog->inline_variants |= BITFIELD_BIT(i);
 388          else
 389             prog->inline_variants &= ~BITFIELD_BIT(i);
 390       }
 391       variant_hash ^= prog->module_hash[i];
 392    }
 393
 394    if (hash_changed && state) {
 395       if (default_variants)
 396          prog->last_variant_hash = prog->default_variant_hash;
 397       else
 398          prog->last_variant_hash = variant_hash;
 399
 400       state->modules_changed = true;
 401    }
 402 }
 403
 404 static void
 405 generate_gfx_program_modules(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
 406 {
 407    assert(!prog->modules[MESA_SHADER_VERTEX]);
 408    uint32_t variant_hash = 0;
 409    bool default_variants = true;
 410    for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
 411       if (!(prog->stages_present & BITFIELD_BIT(i)))
 412          continue;
 413
 414       assert(prog->shaders[i]);
 415
 416       unsigned inline_size = 0, nonseamless_size = 0;
 417       gather_shader_module_info(ctx, screen, prog->shaders[i], prog, state,
 418                                 screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map,
 419                                 &inline_size, &nonseamless_size);
 420       struct zink_shader_module *zm = create_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
 421                                                                      inline_size, nonseamless_size,
 422                                                                      screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map);
 423       state->modules[i] = zm->shader;
 424       prog->modules[i] = zm->shader;
 425       prog->module_hash[i] = zm->hash;
 426       if (zm->num_uniforms)
 427          prog->inline_variants |= BITFIELD_BIT(i);
 428       default_variants &= zm->default_variant;
 429       variant_hash ^= prog->module_hash[i];
 430    }
 431
 432    p_atomic_dec(&prog->base.reference.count);
 433    state->modules_changed = true;
 434
 435    prog->last_variant_hash = variant_hash;
 436    if (default_variants)
 437       prog->default_variant_hash = prog->last_variant_hash;
 438 }
 439
 440 static void
 441 generate_gfx_program_modules_optimal(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
 442 {
 443    assert(!prog->modules[MESA_SHADER_VERTEX]);
 444    for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
 445       if (!(prog->stages_present & BITFIELD_BIT(i)))
 446          continue;
 447
 448       assert(prog->shaders[i]);
 449
 450       struct zink_shader_module *zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[i], prog, i, state);
 451       prog->modules[i] = zm->shader;
 452    }
 453
 454    p_atomic_dec(&prog->base.reference.count);
 455    state->modules_changed = true;
 456    prog->last_variant_hash = state->shader_keys_optimal.key.val;
 457 }
 458
 459 static uint32_t
 460 hash_pipeline_lib_generated_tcs(const void *key)
 461 {
 462    const struct zink_gfx_library_key *gkey = key;
 463    return gkey->optimal_key;
 464 }
 465
 466
 467 static bool
 468 equals_pipeline_lib_generated_tcs(const void *a, const void *b)
 469 {
 470    return !memcmp(a, b, sizeof(uint32_t));
 471 }
 472
 473 static uint32_t
 474 hash_pipeline_lib(const void *key)
 475 {
 476    const struct zink_gfx_library_key *gkey = key;
 477    /* remove generated tcs bits */
 478    return zink_shader_key_optimal_no_tcs(gkey->optimal_key);
 479 }
 480
 481 static bool
 482 equals_pipeline_lib(const void *a, const void *b)
 483 {
 484    const struct zink_gfx_library_key *ak = a;
 485    const struct zink_gfx_library_key *bk = b;
 486    /* remove generated tcs bits */
 487    uint32_t val_a = zink_shader_key_optimal_no_tcs(ak->optimal_key);
 488    uint32_t val_b = zink_shader_key_optimal_no_tcs(bk->optimal_key);
 489    return val_a == val_b;
 490 }
 491
 492 uint32_t
 493 hash_gfx_input_dynamic(const void *key)
 494 {
 495    const struct zink_gfx_input_key *ikey = key;
 496    return ikey->idx;
 497 }
 498
 499 static bool
 500 equals_gfx_input_dynamic(const void *a, const void *b)
 501 {
 502    const struct zink_gfx_input_key *ikey_a = a;
 503    const struct zink_gfx_input_key *ikey_b = b;
 504    return ikey_a->idx == ikey_b->idx;
 505 }
 506
 507 uint32_t
 508 hash_gfx_input(const void *key)
 509 {
 510    const struct zink_gfx_input_key *ikey = key;
 511    if (ikey->uses_dynamic_stride)
 512       return ikey->input;
 513    return _mesa_hash_data(key, offsetof(struct zink_gfx_input_key, pipeline));
 514 }
 515
 516 static bool
 517 equals_gfx_input(const void *a, const void *b)
 518 {
 519    const struct zink_gfx_input_key *ikey_a = a;
 520    const struct zink_gfx_input_key *ikey_b = b;
 521    if (ikey_a->uses_dynamic_stride)
 522       return ikey_a->element_state == ikey_b->element_state &&
 523              !memcmp(a, b, offsetof(struct zink_gfx_input_key, vertex_buffers_enabled_mask));
 524    return !memcmp(a, b, offsetof(struct zink_gfx_input_key, pipeline));
 525 }
 526
 527 uint32_t
 528 hash_gfx_output_ds3(const void *key)
 529 {
 530    const uint8_t *data = key;
 531    return _mesa_hash_data(data, sizeof(uint32_t));
 532 }
 533
 534 static bool
 535 equals_gfx_output_ds3(const void *a, const void *b)
 536 {
 537    const uint8_t *da = a;
 538    const uint8_t *db = b;
 539    return !memcmp(da, db, sizeof(uint32_t));
 540 }
 541
 542 uint32_t
 543 hash_gfx_output(const void *key)
 544 {
 545    const uint8_t *data = key;
 546    return _mesa_hash_data(data, offsetof(struct zink_gfx_output_key, pipeline));
 547 }
 548
 549 static bool
 550 equals_gfx_output(const void *a, const void *b)
 551 {
 552    const uint8_t *da = a;
 553    const uint8_t *db = b;
 554    return !memcmp(da, db, offsetof(struct zink_gfx_output_key, pipeline));
 555 }
 556
 557 ALWAYS_INLINE static void
 558 update_gfx_program_nonseamless(struct zink_context *ctx, struct zink_gfx_program *prog, bool has_nonseamless)
 559 {
 560    struct zink_screen *screen = zink_screen(ctx->base.screen);
 561    if (screen->driconf.inline_uniforms)
 562       update_gfx_shader_modules(ctx, screen, prog,
 563                                 ctx->dirty_gfx_stages & prog->stages_present, &ctx->gfx_pipeline_state,
 564                                 true, has_nonseamless);
 565    else
 566       update_gfx_shader_modules(ctx, screen, prog,
 567                                 ctx->dirty_gfx_stages & prog->stages_present, &ctx->gfx_pipeline_state,
 568                                 false, has_nonseamless);
 569 }
 570
 571 static void
 572 update_gfx_program(struct zink_context *ctx, struct zink_gfx_program *prog)
 573 {
 574    struct zink_screen *screen = zink_screen(ctx->base.screen);
 575    if (screen->info.have_EXT_non_seamless_cube_map)
 576       update_gfx_program_nonseamless(ctx, prog, true);
 577    else
 578       update_gfx_program_nonseamless(ctx, prog, false);
 579 }
 580
 581 void
 582 zink_gfx_program_update(struct zink_context *ctx)
 583 {
 584    if (ctx->last_vertex_stage_dirty) {
 585       gl_shader_stage pstage = ctx->last_vertex_stage->nir->info.stage;
 586       ctx->dirty_gfx_stages |= BITFIELD_BIT(pstage);
 587       memcpy(&ctx->gfx_pipeline_state.shader_keys.key[pstage].key.vs_base,
 588              &ctx->gfx_pipeline_state.shader_keys.last_vertex.key.vs_base,
 589              sizeof(struct zink_vs_key_base));
 590       ctx->last_vertex_stage_dirty = false;
 591    }
 592    if (ctx->gfx_dirty) {
 593       struct zink_gfx_program *prog = NULL;
 594
 595       simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
 596       struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
 597       const uint32_t hash = ctx->gfx_hash;
 598       struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
 599       if (entry) {
 600          prog = (struct zink_gfx_program*)entry->data;
 601          for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
 602             if (prog->stages_present & ~ctx->dirty_gfx_stages & BITFIELD_BIT(i))
 603                ctx->gfx_pipeline_state.modules[i] = prog->modules[i];
 604          }
 605          /* ensure variants are always updated if keys have changed since last use */
 606          ctx->dirty_gfx_stages |= prog->stages_present;
 607          update_gfx_program(ctx, prog);
 608       } else {
 609          ctx->dirty_gfx_stages |= ctx->shader_stages;
 610          prog = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, hash);
 611          zink_screen_get_pipeline_cache(zink_screen(ctx->base.screen), &prog->base, false);
 612          _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
 613          prog->base.removed = false;
 614          generate_gfx_program_modules(ctx, zink_screen(ctx->base.screen), prog, &ctx->gfx_pipeline_state);
 615       }
 616       simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
 617       if (prog && prog != ctx->curr_program)
 618          zink_batch_reference_program(&ctx->batch, &prog->base);
 619       if (ctx->curr_program)
 620          ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 621       ctx->curr_program = prog;
 622       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 623       ctx->gfx_dirty = false;
 624    } else if (ctx->dirty_gfx_stages) {
 625       /* remove old hash */
 626       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 627       update_gfx_program(ctx, ctx->curr_program);
 628       /* apply new hash */
 629       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 630    }
 631    ctx->dirty_gfx_stages = 0;
 632 }
 633
 634 ALWAYS_INLINE static bool
 635 update_gfx_shader_module_optimal(struct zink_context *ctx, struct zink_gfx_program *prog, gl_shader_stage pstage)
 636 {
 637    struct zink_screen *screen = zink_screen(ctx->base.screen);
 638    if (screen->info.have_EXT_graphics_pipeline_library)
 639       util_queue_fence_wait(&prog->base.cache_fence);
 640    struct zink_shader_module *zm = get_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state);
 641    if (!zm)
 642       zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state);
 643
 644    bool changed = prog->modules[pstage] != zm->shader;
 645    prog->modules[pstage] = zm->shader;
 646    return changed;
 647 }
 648
 649 static void
 650 update_gfx_program_optimal(struct zink_context *ctx, struct zink_gfx_program *prog)
 651 {
 652    const union zink_shader_key_optimal *optimal_key = (union zink_shader_key_optimal*)&prog->last_variant_hash;
 653    if (ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_bits != optimal_key->vs_bits) {
 654       assert(!prog->is_separable);
 655       bool changed = update_gfx_shader_module_optimal(ctx, prog, ctx->last_vertex_stage->nir->info.stage);
 656       ctx->gfx_pipeline_state.modules_changed |= changed;
 657    }
 658    const bool shadow_needs_shader_swizzle = optimal_key->fs.shadow_needs_shader_swizzle && (ctx->dirty_gfx_stages & BITFIELD_BIT(MESA_SHADER_FRAGMENT));
 659    if (ctx->gfx_pipeline_state.shader_keys_optimal.key.fs_bits != optimal_key->fs_bits ||
 660        /* always recheck shadow swizzles since they aren't directly part of the key */
 661        unlikely(shadow_needs_shader_swizzle)) {
 662       assert(!prog->is_separable);
 663       bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_FRAGMENT);
 664       ctx->gfx_pipeline_state.modules_changed |= changed;
 665       if (unlikely(shadow_needs_shader_swizzle)) {
 666          struct zink_shader_module **pzm = prog->shader_cache[MESA_SHADER_FRAGMENT][0][0].data;
 667          ctx->gfx_pipeline_state.shadow = (struct zink_fs_shadow_key*)pzm[0]->key + sizeof(uint16_t);
 668       }
 669    }
 670    if (prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated &&
 671        ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs_bits != optimal_key->tcs_bits) {
 672       assert(!prog->is_separable);
 673       bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_TESS_CTRL);
 674       ctx->gfx_pipeline_state.modules_changed |= changed;
 675    }
 676    prog->last_variant_hash = ctx->gfx_pipeline_state.shader_keys_optimal.key.val;
 677 }
 678
 679 void
 680 zink_gfx_program_update_optimal(struct zink_context *ctx)
 681 {
 682    if (ctx->gfx_dirty) {
 683       struct zink_gfx_program *prog = NULL;
 684       ctx->gfx_pipeline_state.optimal_key = ctx->gfx_pipeline_state.shader_keys_optimal.key.val;
 685       struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
 686       const uint32_t hash = ctx->gfx_hash;
 687       simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
 688       struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
 689
 690       if (ctx->curr_program)
 691          ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 692       if (entry) {
 693          prog = (struct zink_gfx_program*)entry->data;
 694          if (prog->is_separable) {
 695             /* shader variants can't be handled by separable programs: sync and compile */
 696             if (!ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key))
 697                util_queue_fence_wait(&prog->base.cache_fence);
 698             /* If the optimized linked pipeline is done compiling, swap it into place. */
 699             if (util_queue_fence_is_signalled(&prog->base.cache_fence)) {
 700                struct zink_gfx_program *real = prog->full_prog;
 701                entry->data = real;
 702                entry->key = real->shaders;
 703                real->base.removed = false;
 704                prog->full_prog = NULL;
 705                prog->base.removed = true;
 706                zink_gfx_program_reference(zink_screen(ctx->base.screen), &prog, NULL);
 707                prog = real;
 708             }
 709          }
 710          update_gfx_program_optimal(ctx, prog);
 711       } else {
 712          ctx->dirty_gfx_stages |= ctx->shader_stages;
 713          prog = create_gfx_program_separable(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch);
 714          prog->base.removed = false;
 715          _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
 716          if (!prog->is_separable) {
 717             zink_screen_get_pipeline_cache(zink_screen(ctx->base.screen), &prog->base, false);
 718             generate_gfx_program_modules_optimal(ctx, zink_screen(ctx->base.screen), prog, &ctx->gfx_pipeline_state);
 719          }
 720       }
 721       simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
 722       if (prog && prog != ctx->curr_program)
 723          zink_batch_reference_program(&ctx->batch, &prog->base);
 724       ctx->curr_program = prog;
 725       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 726    } else if (ctx->dirty_gfx_stages) {
 727       /* remove old hash */
 728       ctx->gfx_pipeline_state.optimal_key = ctx->gfx_pipeline_state.shader_keys_optimal.key.val;
 729       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 730       if (ctx->curr_program->is_separable) {
 731          struct zink_gfx_program *prog = ctx->curr_program;
 732          if (prog->is_separable && !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key)) {
 733             util_queue_fence_wait(&prog->base.cache_fence);
 734             /* shader variants can't be handled by separable programs: sync and compile */
 735             struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
 736             const uint32_t hash = ctx->gfx_hash;
 737             simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
 738             struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
 739             struct zink_gfx_program *real = prog->full_prog;
 740             entry->data = real;
 741             entry->key = real->shaders;
 742             real->base.removed = false;
 743             prog->full_prog = NULL;
 744             prog->base.removed = true;
 745             zink_gfx_program_reference(zink_screen(ctx->base.screen), &prog, NULL);
 746             ctx->curr_program = real;
 747             simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
 748          }
 749       }
 750       update_gfx_program_optimal(ctx, ctx->curr_program);
 751       /* apply new hash */
 752       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 753    }
 754    ctx->dirty_gfx_stages = 0;
 755    ctx->gfx_dirty = false;
 756    ctx->last_vertex_stage_dirty = false;
 757 }
 758
 759 static void
 760 optimized_compile_job(void *data, void *gdata, int thread_index)
 761 {
 762    struct zink_gfx_pipeline_cache_entry *pc_entry = data;
 763    struct zink_screen *screen = gdata;
 764    VkPipeline pipeline;
 765    if (pc_entry->gkey)
 766       pipeline = zink_create_gfx_pipeline_combined(screen, pc_entry->prog, pc_entry->ikey->pipeline, &pc_entry->gkey->pipeline, 1, pc_entry->okey->pipeline, true);
 767    else
 768       pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, &pc_entry->state, pc_entry->state.element_state->binding_map, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true);
 769    if (pipeline) {
 770       pc_entry->unoptimized_pipeline = pc_entry->pipeline;
 771       pc_entry->pipeline = pipeline;
 772    }
 773 }
 774
 775 void
 776 zink_gfx_program_compile_queue(struct zink_context *ctx, struct zink_gfx_pipeline_cache_entry *pc_entry)
 777 {
 778    util_queue_add_job(&zink_screen(ctx->base.screen)->cache_get_thread, pc_entry, &pc_entry->fence, optimized_compile_job, NULL, 0);
 779 }
 780
 781 static void
 782 update_cs_shader_module(struct zink_context *ctx, struct zink_compute_program *comp)
 783 {
 784    struct zink_screen *screen = zink_screen(ctx->base.screen);
 785    struct zink_shader *zs = comp->shader;
 786    VkShaderModule mod;
 787    struct zink_shader_module *zm = NULL;
 788    unsigned inline_size = 0, nonseamless_size = 0;
 789    struct zink_shader_key *key = &ctx->compute_pipeline_state.key;
 790    ASSERTED bool check_robustness = screen->driver_workarounds.lower_robustImageAccess2 && (ctx->flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS);
 791    assert(zink_cs_key(key)->robust_access == check_robustness);
 792
 793    if (ctx && zs->nir->info.num_inlinable_uniforms &&
 794        ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(MESA_SHADER_COMPUTE)) {
 795       if (screen->is_cpu || comp->inlined_variant_count < ZINK_MAX_INLINED_VARIANTS)
 796          inline_size = zs->nir->info.num_inlinable_uniforms;
 797       else
 798          key->inline_uniforms = false;
 799    }
 800    if (key->base.nonseamless_cube_mask)
 801       nonseamless_size = sizeof(uint32_t);
 802
 803    if (inline_size || nonseamless_size || zink_cs_key(key)->robust_access) {
 804       struct util_dynarray *shader_cache = &comp->shader_cache[!!nonseamless_size];
 805       unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
 806       struct zink_shader_module **pzm = shader_cache->data;
 807       for (unsigned i = 0; i < count; i++) {
 808          struct zink_shader_module *iter = pzm[i];
 809          if (!shader_key_matches(iter, key, inline_size,
 810                                  screen->driconf.inline_uniforms,
 811                                  screen->info.have_EXT_non_seamless_cube_map))
 812             continue;
 813          if (i > 0) {
 814             struct zink_shader_module *zero = pzm[0];
 815             pzm[0] = iter;
 816             pzm[i] = zero;
 817          }
 818          zm = iter;
 819       }
 820    } else {
 821       zm = comp->module;
 822    }
 823
 824    if (!zm) {
 825       zm = malloc(sizeof(struct zink_shader_module) + nonseamless_size + inline_size * sizeof(uint32_t));
 826       if (!zm) {
 827          return;
 828       }
 829       mod = zink_shader_compile(screen, zs, comp->shader->nir, key, NULL);
 830       if (!mod) {
 831          FREE(zm);
 832          return;
 833       }
 834       zm->shader = mod;
 835       zm->num_uniforms = inline_size;
 836       zm->key_size = key->size;
 837       memcpy(zm->key, key, key->size);
 838       zm->has_nonseamless = !!nonseamless_size;
 839       assert(nonseamless_size || inline_size || zink_cs_key(key)->robust_access);
 840       if (nonseamless_size)
 841          memcpy(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size);
 842       if (inline_size)
 843          memcpy(zm->key + zm->key_size + nonseamless_size, key->base.inlined_uniform_values, inline_size * sizeof(uint32_t));
 844       zm->hash = shader_module_hash(zm);
 845       zm->default_variant = false;
 846       if (inline_size)
 847          comp->inlined_variant_count++;
 848
 849       /* this is otherwise the default variant, which is stored as comp->module */
 850       if (zm->num_uniforms || nonseamless_size || zink_cs_key(key)->robust_access)
 851          util_dynarray_append(&comp->shader_cache[!!nonseamless_size], void*, zm);
 852    }
 853    if (comp->curr == zm)
 854       return;
 855    ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
 856    comp->curr = zm;
 857    ctx->compute_pipeline_state.module_hash = zm->hash;
 858    ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
 859    ctx->compute_pipeline_state.module_changed = true;
 860 }
 861
 862 void
 863 zink_update_compute_program(struct zink_context *ctx)
 864 {
 865    util_queue_fence_wait(&ctx->curr_compute->base.cache_fence);
 866    update_cs_shader_module(ctx, ctx->curr_compute);
 867 }
 868
 869 VkPipelineLayout
 870 zink_pipeline_layout_create(struct zink_screen *screen, VkDescriptorSetLayout *dsl, unsigned num_dsl, bool is_compute, VkPipelineLayoutCreateFlags flags)
 871 {
 872    VkPipelineLayoutCreateInfo plci = {0};
 873    plci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
 874    plci.flags = flags;
 875
 876    plci.pSetLayouts = dsl;
 877    plci.setLayoutCount = num_dsl;
 878
 879    VkPushConstantRange pcr;
 880    if (!is_compute) {
 881       pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
 882       pcr.offset = 0;
 883       pcr.size = sizeof(struct zink_gfx_push_constant);
 884       plci.pushConstantRangeCount = 1;
 885       plci.pPushConstantRanges = &pcr;
 886    }
 887
 888    VkPipelineLayout layout;
 889    VkResult result = VKSCR(CreatePipelineLayout)(screen->dev, &plci, NULL, &layout);
 890    if (result != VK_SUCCESS) {
 891       mesa_loge("vkCreatePipelineLayout failed (%s)", vk_Result_to_str(result));
 892       return VK_NULL_HANDLE;
 893    }
 894
 895    return layout;
 896 }
 897
 898 static void *
 899 create_program(struct zink_context *ctx, bool is_compute)
 900 {
 901    struct zink_program *pg = rzalloc_size(NULL, is_compute ? sizeof(struct zink_compute_program) : sizeof(struct zink_gfx_program));
 902    if (!pg)
 903       return NULL;
 904
 905    pipe_reference_init(&pg->reference, 1);
 906    util_queue_fence_init(&pg->cache_fence);
 907    pg->is_compute = is_compute;
 908    pg->ctx = ctx;
 909    return (void*)pg;
 910 }
 911
 912 static void
 913 assign_io(struct zink_screen *screen,
 914           struct zink_gfx_program *prog,
 915           struct zink_shader *stages[ZINK_GFX_SHADER_COUNT])
 916 {
 917    struct zink_shader *shaders[MESA_SHADER_STAGES];
 918
 919    /* build array in pipeline order */
 920    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++)
 921       shaders[i] = stages[i];
 922
 923    for (unsigned i = 0; i < MESA_SHADER_FRAGMENT;) {
 924       nir_shader *producer = shaders[i]->nir;
 925       for (unsigned j = i + 1; j < ZINK_GFX_SHADER_COUNT; i++, j++) {
 926          struct zink_shader *consumer = shaders[j];
 927          if (!consumer)
 928             continue;
 929          if (!prog->nir[producer->info.stage])
 930             prog->nir[producer->info.stage] = nir_shader_clone(prog, producer);
 931          if (!prog->nir[j])
 932             prog->nir[j] = nir_shader_clone(prog, consumer->nir);
 933          zink_compiler_assign_io(screen, prog->nir[producer->info.stage], prog->nir[j]);
 934          i = j;
 935          break;
 936       }
 937    }
 938 }
 939
 940 void
 941 zink_gfx_lib_cache_unref(struct zink_screen *screen, struct zink_gfx_lib_cache *libs)
 942 {
 943    if (!p_atomic_dec_zero(&libs->refcount))
 944       return;
 945
 946    simple_mtx_destroy(&libs->lock);
 947    set_foreach_remove(&libs->libs, he) {
 948       struct zink_gfx_library_key *gkey = (void*)he->key;
 949       VKSCR(DestroyPipeline)(screen->dev, gkey->pipeline, NULL);
 950       FREE(gkey);
 951    }
 952    ralloc_free(libs);
 953 }
 954
 955 static struct zink_gfx_lib_cache *
 956 create_lib_cache(struct zink_gfx_program *prog, bool generated_tcs)
 957 {
 958    struct zink_gfx_lib_cache *libs = rzalloc(NULL, struct zink_gfx_lib_cache);
 959    simple_mtx_init(&libs->lock, mtx_plain);
 960    if (generated_tcs)
 961       _mesa_set_init(&libs->libs, libs, hash_pipeline_lib_generated_tcs, equals_pipeline_lib_generated_tcs);
 962    else
 963       _mesa_set_init(&libs->libs, libs, hash_pipeline_lib, equals_pipeline_lib);
 964    return libs;
 965 }
 966
 967 static struct zink_gfx_lib_cache *
 968 find_or_create_lib_cache(struct zink_screen *screen, struct zink_gfx_program *prog)
 969 {
 970    unsigned stages_present = prog->stages_present;
 971    bool generated_tcs = prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated;
 972    if (generated_tcs)
 973       stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
 974    unsigned idx = zink_program_cache_stages(stages_present);
 975    struct set *ht = &screen->pipeline_libs[idx];
 976    const uint32_t hash = prog->gfx_hash;
 977
 978    simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
 979    bool found = false;
 980    struct set_entry *entry = _mesa_set_search_or_add_pre_hashed(ht, hash, prog->shaders, &found);
 981    struct zink_gfx_lib_cache *libs;
 982    if (found) {
 983       libs = (void*)entry->key;
 984    } else {
 985       libs = create_lib_cache(prog, generated_tcs);
 986       memcpy(libs->shaders, prog->shaders, sizeof(prog->shaders));
 987       entry->key = libs;
 988       unsigned refs = 0;
 989       for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
 990          if (prog->shaders[i] && (!generated_tcs || i != MESA_SHADER_TESS_CTRL)) {
 991             simple_mtx_lock(&prog->shaders[i]->lock);
 992             util_dynarray_append(&prog->shaders[i]->pipeline_libs, struct zink_gfx_lib_cache*, libs);
 993             simple_mtx_unlock(&prog->shaders[i]->lock);
 994             refs++;
 995          }
 996       }
 997       p_atomic_set(&libs->refcount, refs);
 998    }
 999    simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
1000    return libs;
1001 }
1002
1003 struct zink_gfx_program *
1004 zink_create_gfx_program(struct zink_context *ctx,
1005                         struct zink_shader **stages,
1006                         unsigned vertices_per_patch,
1007                         uint32_t gfx_hash)
1008 {
1009    struct zink_screen *screen = zink_screen(ctx->base.screen);
1010    struct zink_gfx_program *prog = create_program(ctx, false);
1011    if (!prog)
1012       goto fail;
1013
1014    prog->ctx = ctx;
1015    prog->gfx_hash = gfx_hash;
1016    prog->base.removed = true;
1017
1018    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1019       util_dynarray_init(&prog->shader_cache[i][0][0], prog);
1020       util_dynarray_init(&prog->shader_cache[i][0][1], prog);
1021       util_dynarray_init(&prog->shader_cache[i][1][0], prog);
1022       util_dynarray_init(&prog->shader_cache[i][1][1], prog);
1023       if (stages[i]) {
1024          prog->shaders[i] = stages[i];
1025          prog->stages_present |= BITFIELD_BIT(i);
1026       }
1027    }
1028    if (stages[MESA_SHADER_TESS_EVAL] && !stages[MESA_SHADER_TESS_CTRL]) {
1029       prog->shaders[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs =
1030       prog->shaders[MESA_SHADER_TESS_CTRL] =
1031         zink_shader_tcs_create(screen, stages[MESA_SHADER_VERTEX], vertices_per_patch);
1032       prog->stages_present |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
1033    }
1034    prog->stages_remaining = prog->stages_present;
1035
1036    assign_io(screen, prog, prog->shaders);
1037
1038    if (stages[MESA_SHADER_GEOMETRY])
1039       prog->last_vertex_stage = stages[MESA_SHADER_GEOMETRY];
1040    else if (stages[MESA_SHADER_TESS_EVAL])
1041       prog->last_vertex_stage = stages[MESA_SHADER_TESS_EVAL];
1042    else
1043       prog->last_vertex_stage = stages[MESA_SHADER_VERTEX];
1044
1045    for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) {
1046       for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
1047          _mesa_hash_table_init(&prog->pipelines[r][i], prog, NULL, zink_get_gfx_pipeline_eq_func(screen, prog));
1048          /* only need first 3/4 for point/line/tri/patch */
1049          if (screen->info.have_EXT_extended_dynamic_state &&
1050              i == (prog->last_vertex_stage->nir->info.stage == MESA_SHADER_TESS_EVAL ? 4 : 3))
1051             break;
1052       }
1053    }
1054
1055    if (screen->optimal_keys)
1056       prog->libs = find_or_create_lib_cache(screen, prog);
1057
1058    struct mesa_sha1 sctx;
1059    _mesa_sha1_init(&sctx);
1060    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1061       if (prog->shaders[i]) {
1062          simple_mtx_lock(&prog->shaders[i]->lock);
1063          _mesa_set_add(prog->shaders[i]->programs, prog);
1064          simple_mtx_unlock(&prog->shaders[i]->lock);
1065          zink_gfx_program_reference(screen, NULL, prog);
1066          _mesa_sha1_update(&sctx, prog->shaders[i]->base.sha1, sizeof(prog->shaders[i]->base.sha1));
1067       }
1068    }
1069    _mesa_sha1_final(&sctx, prog->base.sha1);
1070
1071    if (!zink_descriptor_program_init(ctx, &prog->base))
1072       goto fail;
1073
1074    return prog;
1075
1076 fail:
1077    if (prog)
1078       zink_destroy_gfx_program(screen, prog);
1079    return NULL;
1080 }
1081
1082 /* Creates a replacement, optimized zink_gfx_program for this set of separate shaders, which will
1083  * be swapped in in place of the fast-linked separable program once it's done compiling.
1084  */
1085 static void
1086 create_linked_separable_job(void *data, void *gdata, int thread_index)
1087 {
1088    struct zink_gfx_program *prog = data;
1089    prog->full_prog = zink_create_gfx_program(prog->ctx, prog->shaders, 0, prog->gfx_hash);
1090    precompile_job(prog->full_prog, gdata, thread_index);
1091 }
1092
1093 struct zink_gfx_program *
1094 create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch)
1095 {
1096    struct zink_screen *screen = zink_screen(ctx->base.screen);
1097    unsigned shader_stages = BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT);
1098    /* filter cases that need real pipelines */
1099    if (ctx->shader_stages != shader_stages ||
1100        !stages[MESA_SHADER_VERTEX]->precompile.mod || !stages[MESA_SHADER_FRAGMENT]->precompile.mod ||
1101        /* TODO: maybe try variants? grimace */
1102        !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) ||
1103        !zink_can_use_pipeline_libs(ctx))
1104       return zink_create_gfx_program(ctx, stages, vertices_per_patch, ctx->gfx_hash);
1105    /* ensure async gpl creation is done */
1106    util_queue_fence_wait(&stages[MESA_SHADER_VERTEX]->precompile.fence);
1107    util_queue_fence_wait(&stages[MESA_SHADER_FRAGMENT]->precompile.fence);
1108
1109    struct zink_gfx_program *prog = create_program(ctx, false);
1110    if (!prog)
1111       goto fail;
1112
1113    prog->ctx = ctx;
1114    prog->is_separable = true;
1115    prog->gfx_hash = ctx->gfx_hash;
1116
1117    prog->shaders[MESA_SHADER_VERTEX] = stages[MESA_SHADER_VERTEX];
1118    prog->stages_remaining = prog->stages_present = shader_stages;
1119    prog->shaders[MESA_SHADER_FRAGMENT] = stages[MESA_SHADER_FRAGMENT];
1120    prog->last_vertex_stage = stages[MESA_SHADER_VERTEX];
1121    prog->libs = create_lib_cache(prog, false);
1122    /* this libs cache is owned by the program */
1123    p_atomic_set(&prog->libs->refcount, 1);
1124
1125    unsigned refs = 0;
1126    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1127       if (prog->shaders[i]) {
1128          simple_mtx_lock(&prog->shaders[i]->lock);
1129          _mesa_set_add(prog->shaders[i]->programs, prog);
1130          simple_mtx_unlock(&prog->shaders[i]->lock);
1131          refs++;
1132       }
1133    }
1134    /* We can do this add after the _mesa_set_adds above because we know the prog->shaders[] are
1135    * referenced by the draw state and zink_shader_free() can't be called on them while we're in here.
1136    */
1137    p_atomic_add(&prog->base.reference.count, refs);
1138
1139    for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) {
1140       for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
1141          _mesa_hash_table_init(&prog->pipelines[r][i], prog, NULL, zink_get_gfx_pipeline_eq_func(screen, prog));
1142          /* only need first 3/4 for point/line/tri/patch */
1143          if (screen->info.have_EXT_extended_dynamic_state &&
1144              i == (prog->last_vertex_stage->nir->info.stage == MESA_SHADER_TESS_EVAL ? 4 : 3))
1145             break;
1146       }
1147    }
1148
1149    if (prog->shaders[MESA_SHADER_VERTEX]->precompile.dsl) {
1150       prog->base.dd.binding_usage |= BITFIELD_BIT(0);
1151       prog->base.dsl[prog->base.num_dsl] = prog->shaders[MESA_SHADER_VERTEX]->precompile.dsl;
1152       prog->base.num_dsl++;
1153    }
1154    if (prog->shaders[MESA_SHADER_FRAGMENT]->precompile.dsl) {
1155       prog->base.dd.binding_usage |= BITFIELD_BIT(1);
1156       prog->base.dsl[prog->base.num_dsl] = prog->shaders[MESA_SHADER_FRAGMENT]->precompile.dsl;
1157       /* guarantee a null dsl if vs doesn't have descriptors */
1158       prog->base.num_dsl = 2;
1159    }
1160    prog->base.dd.bindless = prog->shaders[MESA_SHADER_VERTEX]->bindless | prog->shaders[MESA_SHADER_FRAGMENT]->bindless;
1161    if (prog->base.dd.bindless) {
1162       prog->base.num_dsl = screen->compact_descriptors ? ZINK_DESCRIPTOR_ALL_TYPES - ZINK_DESCRIPTOR_COMPACT : ZINK_DESCRIPTOR_ALL_TYPES;
1163       prog->base.dsl[screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS]] = screen->bindless_layout;
1164    }
1165    prog->base.layout = zink_pipeline_layout_create(screen, prog->base.dsl, prog->base.num_dsl, false, VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT);
1166
1167    VkPipeline libs[] = {stages[MESA_SHADER_VERTEX]->precompile.gpl, stages[MESA_SHADER_FRAGMENT]->precompile.gpl};
1168    prog->last_variant_hash = ctx->gfx_pipeline_state.optimal_key;
1169
1170    struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key);
1171    gkey->optimal_key = prog->last_variant_hash;
1172    assert(gkey->optimal_key);
1173    gkey->pipeline = zink_create_gfx_pipeline_combined(screen, prog, VK_NULL_HANDLE, libs, 2, VK_NULL_HANDLE, false);
1174    _mesa_set_add(&prog->libs->libs, gkey);
1175
1176    util_queue_add_job(&screen->cache_get_thread, prog, &prog->base.cache_fence, create_linked_separable_job, NULL, 0);
1177
1178    return prog;
1179 fail:
1180    if (prog)
1181       zink_destroy_gfx_program(screen, prog);
1182    return NULL;
1183 }
1184
1185 static uint32_t
1186 hash_compute_pipeline_state_local_size(const void *key)
1187 {
1188    const struct zink_compute_pipeline_state *state = key;
1189    uint32_t hash = _mesa_hash_data(state, offsetof(struct zink_compute_pipeline_state, hash));
1190    hash = XXH32(&state->local_size[0], sizeof(state->local_size), hash);
1191    return hash;
1192 }
1193
1194 static uint32_t
1195 hash_compute_pipeline_state(const void *key)
1196 {
1197    const struct zink_compute_pipeline_state *state = key;
1198    return _mesa_hash_data(state, offsetof(struct zink_compute_pipeline_state, hash));
1199 }
1200
1201 void
1202 zink_program_update_compute_pipeline_state(struct zink_context *ctx, struct zink_compute_program *comp, const uint block[3])
1203 {
1204    if (comp->use_local_size) {
1205       for (int i = 0; i < ARRAY_SIZE(ctx->compute_pipeline_state.local_size); i++) {
1206          if (ctx->compute_pipeline_state.local_size[i] != block[i])
1207             ctx->compute_pipeline_state.dirty = true;
1208          ctx->compute_pipeline_state.local_size[i] = block[i];
1209       }
1210    }
1211 }
1212
1213 static bool
1214 equals_compute_pipeline_state(const void *a, const void *b)
1215 {
1216    const struct zink_compute_pipeline_state *sa = a;
1217    const struct zink_compute_pipeline_state *sb = b;
1218    return !memcmp(a, b, offsetof(struct zink_compute_pipeline_state, hash)) &&
1219           sa->module == sb->module;
1220 }
1221
1222 static bool
1223 equals_compute_pipeline_state_local_size(const void *a, const void *b)
1224 {
1225    const struct zink_compute_pipeline_state *sa = a;
1226    const struct zink_compute_pipeline_state *sb = b;
1227    return !memcmp(a, b, offsetof(struct zink_compute_pipeline_state, hash)) &&
1228           !memcmp(sa->local_size, sb->local_size, sizeof(sa->local_size)) &&
1229           sa->module == sb->module;
1230 }
1231
1232 static void
1233 precompile_compute_job(void *data, void *gdata, int thread_index)
1234 {
1235    struct zink_compute_program *comp = data;
1236    struct zink_screen *screen = gdata;
1237
1238    comp->shader = zink_shader_create(screen, comp->nir, NULL);
1239    comp->curr = comp->module = CALLOC_STRUCT(zink_shader_module);
1240    assert(comp->module);
1241    comp->module->shader = zink_shader_compile(screen, comp->shader, comp->shader->nir, NULL, NULL);
1242    assert(comp->module->shader);
1243    util_dynarray_init(&comp->shader_cache[0], comp);
1244    util_dynarray_init(&comp->shader_cache[1], comp);
1245
1246    struct blob blob = {0};
1247    blob_init(&blob);
1248    nir_serialize(&blob, comp->shader->nir, true);
1249
1250    struct mesa_sha1 sha1_ctx;
1251    _mesa_sha1_init(&sha1_ctx);
1252    _mesa_sha1_update(&sha1_ctx, blob.data, blob.size);
1253    _mesa_sha1_final(&sha1_ctx, comp->base.sha1);
1254    blob_finish(&blob);
1255
1256    zink_descriptor_program_init(comp->base.ctx, &comp->base);
1257
1258    zink_screen_get_pipeline_cache(screen, &comp->base, true);
1259    if (comp->base.can_precompile)
1260       comp->base_pipeline = zink_create_compute_pipeline(screen, comp, NULL);
1261    if (comp->base_pipeline)
1262       zink_screen_update_pipeline_cache(screen, &comp->base, true);
1263 }
1264
1265 static struct zink_compute_program *
1266 create_compute_program(struct zink_context *ctx, nir_shader *nir)
1267 {
1268    struct zink_screen *screen = zink_screen(ctx->base.screen);
1269    struct zink_compute_program *comp = create_program(ctx, true);
1270    if (!comp)
1271       return NULL;
1272    comp->nir = nir;
1273
1274    comp->use_local_size = !(nir->info.workgroup_size[0] ||
1275                             nir->info.workgroup_size[1] ||
1276                             nir->info.workgroup_size[2]);
1277    comp->base.can_precompile = !comp->use_local_size &&
1278                                (screen->info.have_EXT_non_seamless_cube_map || !zink_shader_has_cubes(nir)) &&
1279                                (screen->info.rb2_feats.robustImageAccess2 || !(ctx->flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS));
1280    _mesa_hash_table_init(&comp->pipelines, comp, NULL, comp->use_local_size ?
1281                                                        equals_compute_pipeline_state_local_size :
1282                                                        equals_compute_pipeline_state);
1283    util_queue_add_job(&screen->cache_get_thread, comp, &comp->base.cache_fence,
1284                       precompile_compute_job, NULL, 0);
1285    return comp;
1286 }
1287
1288 uint32_t
1289 zink_program_get_descriptor_usage(struct zink_context *ctx, gl_shader_stage stage, enum zink_descriptor_type type)
1290 {
1291    struct zink_shader *zs = NULL;
1292    switch (stage) {
1293    case MESA_SHADER_VERTEX:
1294    case MESA_SHADER_TESS_CTRL:
1295    case MESA_SHADER_TESS_EVAL:
1296    case MESA_SHADER_GEOMETRY:
1297    case MESA_SHADER_FRAGMENT:
1298       zs = ctx->gfx_stages[stage];
1299       break;
1300    case MESA_SHADER_COMPUTE: {
1301       zs = ctx->curr_compute->shader;
1302       break;
1303    }
1304    default:
1305       unreachable("unknown shader type");
1306    }
1307    if (!zs)
1308       return 0;
1309    switch (type) {
1310    case ZINK_DESCRIPTOR_TYPE_UBO:
1311       return zs->ubos_used;
1312    case ZINK_DESCRIPTOR_TYPE_SSBO:
1313       return zs->ssbos_used;
1314    case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW:
1315       return BITSET_TEST_RANGE(zs->nir->info.textures_used, 0, PIPE_MAX_SAMPLERS - 1);
1316    case ZINK_DESCRIPTOR_TYPE_IMAGE:
1317       return BITSET_TEST_RANGE(zs->nir->info.images_used, 0, PIPE_MAX_SAMPLERS - 1);
1318    default:
1319       unreachable("unknown descriptor type!");
1320    }
1321    return 0;
1322 }
1323
1324 bool
1325 zink_program_descriptor_is_buffer(struct zink_context *ctx, gl_shader_stage stage, enum zink_descriptor_type type, unsigned i)
1326 {
1327    struct zink_shader *zs = NULL;
1328    switch (stage) {
1329    case MESA_SHADER_VERTEX:
1330    case MESA_SHADER_TESS_CTRL:
1331    case MESA_SHADER_TESS_EVAL:
1332    case MESA_SHADER_GEOMETRY:
1333    case MESA_SHADER_FRAGMENT:
1334       zs = ctx->gfx_stages[stage];
1335       break;
1336    case MESA_SHADER_COMPUTE: {
1337       zs = ctx->curr_compute->shader;
1338       break;
1339    }
1340    default:
1341       unreachable("unknown shader type");
1342    }
1343    if (!zs)
1344       return false;
1345    return zink_shader_descriptor_is_buffer(zs, type, i);
1346 }
1347
1348 static unsigned
1349 get_num_bindings(struct zink_shader *zs, enum zink_descriptor_type type)
1350 {
1351    switch (type) {
1352    case ZINK_DESCRIPTOR_TYPE_UNIFORMS:
1353       return !!zs->has_uniforms;
1354    case ZINK_DESCRIPTOR_TYPE_UBO:
1355    case ZINK_DESCRIPTOR_TYPE_SSBO:
1356       return zs->num_bindings[type];
1357    default:
1358       break;
1359    }
1360    unsigned num_bindings = 0;
1361    for (int i = 0; i < zs->num_bindings[type]; i++)
1362       num_bindings += zs->bindings[type][i].size;
1363    return num_bindings;
1364 }
1365
1366 unsigned
1367 zink_program_num_bindings_typed(const struct zink_program *pg, enum zink_descriptor_type type)
1368 {
1369    unsigned num_bindings = 0;
1370    if (pg->is_compute) {
1371       struct zink_compute_program *comp = (void*)pg;
1372       return get_num_bindings(comp->shader, type);
1373    }
1374    struct zink_gfx_program *prog = (void*)pg;
1375    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
1376       if (prog->shaders[i])
1377          num_bindings += get_num_bindings(prog->shaders[i], type);
1378    }
1379    return num_bindings;
1380 }
1381
1382 unsigned
1383 zink_program_num_bindings(const struct zink_program *pg)
1384 {
1385    unsigned num_bindings = 0;
1386    for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++)
1387       num_bindings += zink_program_num_bindings_typed(pg, i);
1388    return num_bindings;
1389 }
1390
1391 static void
1392 deinit_program(struct zink_screen *screen, struct zink_program *pg)
1393 {
1394    util_queue_fence_wait(&pg->cache_fence);
1395    if (pg->layout)
1396       VKSCR(DestroyPipelineLayout)(screen->dev, pg->layout, NULL);
1397
1398    if (pg->pipeline_cache)
1399       VKSCR(DestroyPipelineCache)(screen->dev, pg->pipeline_cache, NULL);
1400    zink_descriptor_program_deinit(screen, pg);
1401 }
1402
1403 void
1404 zink_destroy_gfx_program(struct zink_screen *screen,
1405                          struct zink_gfx_program *prog)
1406 {
1407    unsigned max_idx = ARRAY_SIZE(prog->pipelines[0]);
1408    if (screen->info.have_EXT_extended_dynamic_state) {
1409       /* only need first 3/4 for point/line/tri/patch */
1410       if ((prog->stages_present &
1411           (BITFIELD_BIT(MESA_SHADER_TESS_EVAL) | BITFIELD_BIT(MESA_SHADER_GEOMETRY))) ==
1412           BITFIELD_BIT(MESA_SHADER_TESS_EVAL))
1413          max_idx = 4;
1414       else
1415          max_idx = 3;
1416       max_idx++;
1417    }
1418
1419    if (prog->is_separable)
1420       zink_gfx_program_reference(screen, &prog->full_prog, NULL);
1421    for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
1422       for (int i = 0; i < max_idx; ++i) {
1423          hash_table_foreach(&prog->pipelines[r][i], entry) {
1424             struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data;
1425
1426             util_queue_fence_wait(&pc_entry->fence);
1427             VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL);
1428             VKSCR(DestroyPipeline)(screen->dev, pc_entry->unoptimized_pipeline, NULL);
1429             free(pc_entry);
1430          }
1431       }
1432    }
1433
1434    deinit_program(screen, &prog->base);
1435
1436    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1437       if (prog->shaders[i]) {
1438          _mesa_set_remove_key(prog->shaders[i]->programs, prog);
1439          prog->shaders[i] = NULL;
1440       }
1441       if (!prog->is_separable) {
1442          destroy_shader_cache(screen, &prog->shader_cache[i][0][0]);
1443          destroy_shader_cache(screen, &prog->shader_cache[i][0][1]);
1444          destroy_shader_cache(screen, &prog->shader_cache[i][1][0]);
1445          destroy_shader_cache(screen, &prog->shader_cache[i][1][1]);
1446          ralloc_free(prog->nir[i]);
1447       }
1448    }
1449    if (prog->is_separable)
1450       zink_gfx_lib_cache_unref(screen, prog->libs);
1451
1452    ralloc_free(prog);
1453 }
1454
1455 void
1456 zink_destroy_compute_program(struct zink_screen *screen,
1457                              struct zink_compute_program *comp)
1458 {
1459    deinit_program(screen, &comp->base);
1460
1461    assert(comp->shader);
1462    assert(!comp->shader->spirv);
1463
1464    _mesa_set_destroy(comp->shader->programs, NULL);
1465    ralloc_free(comp->shader->nir);
1466    ralloc_free(comp->shader);
1467
1468    destroy_shader_cache(screen, &comp->shader_cache[0]);
1469    destroy_shader_cache(screen, &comp->shader_cache[1]);
1470
1471    hash_table_foreach(&comp->pipelines, entry) {
1472       struct compute_pipeline_cache_entry *pc_entry = entry->data;
1473
1474       VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL);
1475       free(pc_entry);
1476    }
1477    VKSCR(DestroyPipeline)(screen->dev, comp->base_pipeline, NULL);
1478    VKSCR(DestroyShaderModule)(screen->dev, comp->module->shader, NULL);
1479    free(comp->module);
1480
1481    ralloc_free(comp);
1482 }
1483
1484 ALWAYS_INLINE static bool
1485 compute_can_shortcut(const struct zink_compute_program *comp)
1486 {
1487    return !comp->use_local_size && !comp->curr->num_uniforms && !comp->curr->has_nonseamless;
1488 }
1489
1490 VkPipeline
1491 zink_get_compute_pipeline(struct zink_screen *screen,
1492                       struct zink_compute_program *comp,
1493                       struct zink_compute_pipeline_state *state)
1494 {
1495    struct hash_entry *entry = NULL;
1496
1497    if (!state->dirty && !state->module_changed)
1498       return state->pipeline;
1499    if (state->dirty) {
1500       if (state->pipeline) //avoid on first hash
1501          state->final_hash ^= state->hash;
1502       if (comp->use_local_size)
1503          state->hash = hash_compute_pipeline_state_local_size(state);
1504       else
1505          state->hash = hash_compute_pipeline_state(state);
1506       state->dirty = false;
1507       state->final_hash ^= state->hash;
1508    }
1509
1510    util_queue_fence_wait(&comp->base.cache_fence);
1511    if (comp->base_pipeline && compute_can_shortcut(comp)) {
1512       state->pipeline = comp->base_pipeline;
1513       return state->pipeline;
1514    }
1515    entry = _mesa_hash_table_search_pre_hashed(&comp->pipelines, state->final_hash, state);
1516
1517    if (!entry) {
1518       VkPipeline pipeline = zink_create_compute_pipeline(screen, comp, state);
1519
1520       if (pipeline == VK_NULL_HANDLE)
1521          return VK_NULL_HANDLE;
1522
1523       zink_screen_update_pipeline_cache(screen, &comp->base, false);
1524       if (compute_can_shortcut(comp)) {
1525          /* don't add base pipeline to cache */
1526          state->pipeline = comp->base_pipeline = pipeline;
1527          return state->pipeline;
1528       }
1529
1530       struct compute_pipeline_cache_entry *pc_entry = CALLOC_STRUCT(compute_pipeline_cache_entry);
1531       if (!pc_entry)
1532          return VK_NULL_HANDLE;
1533
1534       memcpy(&pc_entry->state, state, sizeof(*state));
1535       pc_entry->pipeline = pipeline;
1536
1537       entry = _mesa_hash_table_insert_pre_hashed(&comp->pipelines, state->final_hash, pc_entry, pc_entry);
1538       assert(entry);
1539    }
1540
1541    struct compute_pipeline_cache_entry *cache_entry = entry->data;
1542    state->pipeline = cache_entry->pipeline;
1543    return state->pipeline;
1544 }
1545
1546 static void
1547 bind_gfx_stage(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *shader)
1548 {
1549    if (shader && shader->nir->info.num_inlinable_uniforms)
1550       ctx->shader_has_inlinable_uniforms_mask |= 1 << stage;
1551    else
1552       ctx->shader_has_inlinable_uniforms_mask &= ~(1 << stage);
1553
1554    if (ctx->gfx_stages[stage]) {
1555       ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash;
1556
1557       /* unbind the generated GS */
1558       if (stage != MESA_SHADER_FRAGMENT &&
1559           ctx->gfx_stages[stage]->non_fs.generated_gs &&
1560           ctx->gfx_stages[MESA_SHADER_GEOMETRY] ==
1561           ctx->gfx_stages[stage]->non_fs.generated_gs) {
1562          assert(stage != MESA_SHADER_GEOMETRY); /* let's not keep recursing! */
1563          bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, NULL);
1564       }
1565    }
1566    ctx->gfx_stages[stage] = shader;
1567    ctx->gfx_dirty = ctx->gfx_stages[MESA_SHADER_FRAGMENT] && ctx->gfx_stages[MESA_SHADER_VERTEX];
1568    ctx->gfx_pipeline_state.modules_changed = true;
1569    if (shader) {
1570       ctx->shader_stages |= BITFIELD_BIT(stage);
1571       ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash;
1572    } else {
1573       ctx->gfx_pipeline_state.modules[stage] = VK_NULL_HANDLE;
1574       if (ctx->curr_program)
1575          ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
1576       ctx->curr_program = NULL;
1577       ctx->shader_stages &= ~BITFIELD_BIT(stage);
1578    }
1579 }
1580
1581 static enum pipe_prim_type
1582 gs_output_to_reduced_prim_type(struct shader_info *info)
1583 {
1584    switch (info->gs.output_primitive) {
1585    case SHADER_PRIM_POINTS:
1586       return PIPE_PRIM_POINTS;
1587
1588    case SHADER_PRIM_LINES:
1589    case SHADER_PRIM_LINE_LOOP:
1590    case SHADER_PRIM_LINE_STRIP:
1591    case SHADER_PRIM_LINES_ADJACENCY:
1592    case SHADER_PRIM_LINE_STRIP_ADJACENCY:
1593       return PIPE_PRIM_LINES;
1594
1595    case SHADER_PRIM_TRIANGLES:
1596    case SHADER_PRIM_TRIANGLE_STRIP:
1597    case SHADER_PRIM_TRIANGLE_FAN:
1598    case SHADER_PRIM_TRIANGLES_ADJACENCY:
1599    case SHADER_PRIM_TRIANGLE_STRIP_ADJACENCY:
1600       return PIPE_PRIM_TRIANGLES;
1601
1602    default:
1603       unreachable("unexpected output primitive type");
1604    }
1605 }
1606
1607 static enum pipe_prim_type
1608 update_rast_prim(struct zink_shader *shader)
1609 {
1610    struct shader_info *info = &shader->nir->info;
1611    if (info->stage == MESA_SHADER_GEOMETRY)
1612       return gs_output_to_reduced_prim_type(info);
1613    else if (info->stage == MESA_SHADER_TESS_EVAL) {
1614       if (info->tess.point_mode)
1615          return PIPE_PRIM_POINTS;
1616       else {
1617          switch (info->tess._primitive_mode) {
1618          case TESS_PRIMITIVE_ISOLINES:
1619             return PIPE_PRIM_LINES;
1620          case TESS_PRIMITIVE_TRIANGLES:
1621          case TESS_PRIMITIVE_QUADS:
1622             return PIPE_PRIM_TRIANGLES;
1623          default:
1624             return PIPE_PRIM_MAX;
1625          }
1626       }
1627    }
1628    return PIPE_PRIM_MAX;
1629 }
1630
1631 static void
1632 bind_last_vertex_stage(struct zink_context *ctx)
1633 {
1634    gl_shader_stage old = ctx->last_vertex_stage ? ctx->last_vertex_stage->nir->info.stage : MESA_SHADER_STAGES;
1635    if (ctx->gfx_stages[MESA_SHADER_GEOMETRY])
1636       ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_GEOMETRY];
1637    else if (ctx->gfx_stages[MESA_SHADER_TESS_EVAL])
1638       ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_TESS_EVAL];
1639    else
1640       ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_VERTEX];
1641    gl_shader_stage current = ctx->last_vertex_stage ? ctx->last_vertex_stage->nir->info.stage : MESA_SHADER_VERTEX;
1642
1643    /* update rast_prim */
1644    ctx->gfx_pipeline_state.shader_rast_prim =
1645       ctx->last_vertex_stage ? update_rast_prim(ctx->last_vertex_stage) :
1646                                PIPE_PRIM_MAX;
1647
1648    if (old != current) {
1649       if (!zink_screen(ctx->base.screen)->optimal_keys) {
1650          if (old != MESA_SHADER_STAGES) {
1651             memset(&ctx->gfx_pipeline_state.shader_keys.key[old].key.vs_base, 0, sizeof(struct zink_vs_key_base));
1652             ctx->dirty_gfx_stages |= BITFIELD_BIT(old);
1653          } else {
1654             /* always unset vertex shader values when changing to a non-vs last stage */
1655             memset(&ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_VERTEX].key.vs_base, 0, sizeof(struct zink_vs_key_base));
1656          }
1657       }
1658
1659       unsigned num_viewports = ctx->vp_state.num_viewports;
1660       struct zink_screen *screen = zink_screen(ctx->base.screen);
1661       /* number of enabled viewports is based on whether last vertex stage writes viewport index */
1662       if (ctx->last_vertex_stage) {
1663          if (ctx->last_vertex_stage->nir->info.outputs_written & (VARYING_BIT_VIEWPORT | VARYING_BIT_VIEWPORT_MASK))
1664             ctx->vp_state.num_viewports = MIN2(screen->info.props.limits.maxViewports, PIPE_MAX_VIEWPORTS);
1665          else
1666             ctx->vp_state.num_viewports = 1;
1667       } else {
1668          ctx->vp_state.num_viewports = 1;
1669       }
1670       ctx->vp_state_changed |= num_viewports != ctx->vp_state.num_viewports;
1671       if (!screen->info.have_EXT_extended_dynamic_state) {
1672          if (ctx->gfx_pipeline_state.dyn_state1.num_viewports != ctx->vp_state.num_viewports)
1673             ctx->gfx_pipeline_state.dirty = true;
1674          ctx->gfx_pipeline_state.dyn_state1.num_viewports = ctx->vp_state.num_viewports;
1675       }
1676       ctx->last_vertex_stage_dirty = true;
1677    }
1678 }
1679
1680 static void
1681 zink_bind_vs_state(struct pipe_context *pctx,
1682                    void *cso)
1683 {
1684    struct zink_context *ctx = zink_context(pctx);
1685    if (!cso && !ctx->gfx_stages[MESA_SHADER_VERTEX])
1686       return;
1687    bind_gfx_stage(ctx, MESA_SHADER_VERTEX, cso);
1688    bind_last_vertex_stage(ctx);
1689    if (cso) {
1690       struct zink_shader *zs = cso;
1691       ctx->shader_reads_drawid = BITSET_TEST(zs->nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
1692       ctx->shader_reads_basevertex = BITSET_TEST(zs->nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX);
1693    } else {
1694       ctx->shader_reads_drawid = false;
1695       ctx->shader_reads_basevertex = false;
1696    }
1697 }
1698
1699 /* if gl_SampleMask[] is written to, we have to ensure that we get a shader with the same sample count:
1700  * in GL, samples==1 means ignore gl_SampleMask[]
1701  * in VK, gl_SampleMask[] is never ignored
1702  */
1703 void
1704 zink_update_fs_key_samples(struct zink_context *ctx)
1705 {
1706    if (!ctx->gfx_stages[MESA_SHADER_FRAGMENT])
1707       return;
1708    nir_shader *nir = ctx->gfx_stages[MESA_SHADER_FRAGMENT]->nir;
1709    if (nir->info.outputs_written & (1 << FRAG_RESULT_SAMPLE_MASK)) {
1710       bool samples = zink_get_fs_base_key(ctx)->samples;
1711       if (samples != (ctx->fb_state.samples > 1))
1712          zink_set_fs_base_key(ctx)->samples = ctx->fb_state.samples > 1;
1713    }
1714 }
1715
1716 static void
1717 zink_bind_fs_state(struct pipe_context *pctx,
1718                    void *cso)
1719 {
1720    struct zink_context *ctx = zink_context(pctx);
1721    if (!cso && !ctx->gfx_stages[MESA_SHADER_FRAGMENT])
1722       return;
1723    unsigned shadow_mask = ctx->gfx_stages[MESA_SHADER_FRAGMENT] ? ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask : 0;
1724    bind_gfx_stage(ctx, MESA_SHADER_FRAGMENT, cso);
1725    ctx->fbfetch_outputs = 0;
1726    if (cso) {
1727       nir_shader *nir = ctx->gfx_stages[MESA_SHADER_FRAGMENT]->nir;
1728       if (nir->info.fs.uses_fbfetch_output) {
1729          nir_foreach_shader_out_variable(var, ctx->gfx_stages[MESA_SHADER_FRAGMENT]->nir) {
1730             if (var->data.fb_fetch_output)
1731                ctx->fbfetch_outputs |= BITFIELD_BIT(var->data.location - FRAG_RESULT_DATA0);
1732          }
1733       }
1734       zink_update_fs_key_samples(ctx);
1735       if (zink_screen(pctx->screen)->info.have_EXT_rasterization_order_attachment_access) {
1736          if (ctx->gfx_pipeline_state.rast_attachment_order != nir->info.fs.uses_fbfetch_output)
1737             ctx->gfx_pipeline_state.dirty = true;
1738          ctx->gfx_pipeline_state.rast_attachment_order = nir->info.fs.uses_fbfetch_output;
1739       }
1740       zink_set_fs_shadow_needs_shader_swizzle_key(ctx, false);
1741       if (shadow_mask != ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask)
1742          zink_update_shadow_samplerviews(ctx, shadow_mask | ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask);
1743    }
1744    zink_update_fbfetch(ctx);
1745 }
1746
1747 static void
1748 zink_bind_gs_state(struct pipe_context *pctx,
1749                    void *cso)
1750 {
1751    struct zink_context *ctx = zink_context(pctx);
1752    if (!cso && !ctx->gfx_stages[MESA_SHADER_GEOMETRY])
1753       return;
1754    bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, cso);
1755    bind_last_vertex_stage(ctx);
1756 }
1757
1758 static void
1759 zink_bind_tcs_state(struct pipe_context *pctx,
1760                    void *cso)
1761 {
1762    bind_gfx_stage(zink_context(pctx), MESA_SHADER_TESS_CTRL, cso);
1763 }
1764
1765 static void
1766 zink_bind_tes_state(struct pipe_context *pctx,
1767                    void *cso)
1768 {
1769    struct zink_context *ctx = zink_context(pctx);
1770    if (!cso && !ctx->gfx_stages[MESA_SHADER_TESS_EVAL])
1771       return;
1772    if (!!ctx->gfx_stages[MESA_SHADER_TESS_EVAL] != !!cso) {
1773       if (!cso) {
1774          /* if unsetting a TESS that uses a generated TCS, ensure the TCS is unset */
1775          if (ctx->gfx_stages[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs)
1776             ctx->gfx_stages[MESA_SHADER_TESS_CTRL] = NULL;
1777       }
1778    }
1779    bind_gfx_stage(ctx, MESA_SHADER_TESS_EVAL, cso);
1780    bind_last_vertex_stage(ctx);
1781 }
1782
1783 static void *
1784 zink_create_cs_state(struct pipe_context *pctx,
1785                      const struct pipe_compute_state *shader)
1786 {
1787    struct nir_shader *nir;
1788    if (shader->ir_type != PIPE_SHADER_IR_NIR)
1789       nir = zink_tgsi_to_nir(pctx->screen, shader->prog);
1790    else
1791       nir = (struct nir_shader *)shader->prog;
1792
1793    if (nir->info.uses_bindless)
1794       zink_descriptors_init_bindless(zink_context(pctx));
1795
1796    return create_compute_program(zink_context(pctx), nir);
1797 }
1798
1799 static void
1800 zink_bind_cs_state(struct pipe_context *pctx,
1801                    void *cso)
1802 {
1803    struct zink_context *ctx = zink_context(pctx);
1804    struct zink_compute_program *comp = cso;
1805    if (comp && comp->nir->info.num_inlinable_uniforms)
1806       ctx->shader_has_inlinable_uniforms_mask |= 1 << MESA_SHADER_COMPUTE;
1807    else
1808       ctx->shader_has_inlinable_uniforms_mask &= ~(1 << MESA_SHADER_COMPUTE);
1809
1810    if (ctx->curr_compute) {
1811       zink_batch_reference_program(&ctx->batch, &ctx->curr_compute->base);
1812       ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
1813       ctx->compute_pipeline_state.module = VK_NULL_HANDLE;
1814       ctx->compute_pipeline_state.module_hash = 0;
1815    }
1816    ctx->compute_pipeline_state.dirty = true;
1817    ctx->curr_compute = comp;
1818    if (comp && comp != ctx->curr_compute) {
1819       ctx->compute_pipeline_state.module_hash = ctx->curr_compute->curr->hash;
1820       if (util_queue_fence_is_signalled(&comp->base.cache_fence))
1821          ctx->compute_pipeline_state.module = ctx->curr_compute->curr->shader;
1822       ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
1823       if (ctx->compute_pipeline_state.key.base.nonseamless_cube_mask)
1824          ctx->compute_dirty = true;
1825    }
1826    zink_select_launch_grid(ctx);
1827 }
1828
1829 static void
1830 zink_delete_cs_shader_state(struct pipe_context *pctx, void *cso)
1831 {
1832    struct zink_compute_program *comp = cso;
1833    zink_compute_program_reference(zink_screen(pctx->screen), &comp, NULL);
1834 }
1835
1836 void
1837 zink_delete_shader_state(struct pipe_context *pctx, void *cso)
1838 {
1839    zink_shader_free(zink_screen(pctx->screen), cso);
1840 }
1841
1842 void *
1843 zink_create_gfx_shader_state(struct pipe_context *pctx, const struct pipe_shader_state *shader)
1844 {
1845    nir_shader *nir;
1846    if (shader->type != PIPE_SHADER_IR_NIR)
1847       nir = zink_tgsi_to_nir(pctx->screen, shader->tokens);
1848    else
1849       nir = (struct nir_shader *)shader->ir.nir;
1850
1851    if (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_fbfetch_output)
1852       zink_descriptor_util_init_fbfetch(zink_context(pctx));
1853    if (nir->info.uses_bindless)
1854       zink_descriptors_init_bindless(zink_context(pctx));
1855
1856    return zink_shader_create(zink_screen(pctx->screen), nir, &shader->stream_output);
1857 }
1858
1859 static void
1860 zink_delete_cached_shader_state(struct pipe_context *pctx, void *cso)
1861 {
1862    struct zink_screen *screen = zink_screen(pctx->screen);
1863    util_shader_reference(pctx, &screen->shaders, &cso, NULL);
1864 }
1865
1866 static void *
1867 zink_create_cached_shader_state(struct pipe_context *pctx, const struct pipe_shader_state *shader)
1868 {
1869    bool cache_hit;
1870    struct zink_screen *screen = zink_screen(pctx->screen);
1871    return util_live_shader_cache_get(pctx, &screen->shaders, shader, &cache_hit);
1872 }
1873
1874 /* caller must lock prog->libs->lock */
1875 struct zink_gfx_library_key *
1876 zink_create_pipeline_lib(struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
1877 {
1878    struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key);
1879    gkey->optimal_key = state->optimal_key;
1880    assert(gkey->optimal_key);
1881    memcpy(gkey->modules, prog->modules, sizeof(gkey->modules));
1882    gkey->pipeline = zink_create_gfx_pipeline_library(screen, prog);
1883    _mesa_set_add(&prog->libs->libs, gkey);
1884    return gkey;
1885 }
1886
1887 static const char *
1888 print_exe_stages(VkShaderStageFlags stages)
1889 {
1890    if (stages == VK_SHADER_STAGE_VERTEX_BIT)
1891       return "VS";
1892    if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_GEOMETRY_BIT))
1893       return "VS+GS";
1894    if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))
1895       return "VS+TCS+TES";
1896    if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_GEOMETRY_BIT))
1897       return "VS+TCS+TES+GS";
1898    if (stages == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1899       return "TCS";
1900    if (stages == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1901       return "TES";
1902    if (stages == VK_SHADER_STAGE_GEOMETRY_BIT)
1903       return "GS";
1904    if (stages == VK_SHADER_STAGE_FRAGMENT_BIT)
1905       return "FS";
1906    if (stages == VK_SHADER_STAGE_COMPUTE_BIT)
1907       return "CS";
1908    unreachable("unhandled combination of stages!");
1909 }
1910
1911 static void
1912 print_pipeline_stats(struct zink_screen *screen, VkPipeline pipeline)
1913 {
1914    VkPipelineInfoKHR pinfo = {
1915      VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR,
1916      NULL,
1917      pipeline
1918    };
1919    unsigned exe_count = 0;
1920    VkPipelineExecutablePropertiesKHR props[10] = {0};
1921    for (unsigned i = 0; i < ARRAY_SIZE(props); i++) {
1922       props[i].sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_PROPERTIES_KHR;
1923       props[i].pNext = NULL;
1924    }
1925    VKSCR(GetPipelineExecutablePropertiesKHR)(screen->dev, &pinfo, &exe_count, NULL);
1926    VKSCR(GetPipelineExecutablePropertiesKHR)(screen->dev, &pinfo, &exe_count, props);
1927    printf("PIPELINE STATISTICS:");
1928    for (unsigned e = 0; e < exe_count; e++) {
1929       VkPipelineExecutableInfoKHR info = {
1930          VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR,
1931          NULL,
1932          pipeline,
1933          e
1934       };
1935       unsigned count = 0;
1936       printf("\n\t%s (%s): ", print_exe_stages(props[e].stages), props[e].name);
1937       VkPipelineExecutableStatisticKHR *stats = NULL;
1938       VKSCR(GetPipelineExecutableStatisticsKHR)(screen->dev, &info, &count, NULL);
1939       stats = calloc(count, sizeof(VkPipelineExecutableStatisticKHR));
1940       for (unsigned i = 0; i < count; i++)
1941          stats[i].sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_STATISTIC_KHR;
1942       VKSCR(GetPipelineExecutableStatisticsKHR)(screen->dev, &info, &count, stats);
1943
1944       for (unsigned i = 0; i < count; i++) {
1945          if (i)
1946             printf(", ");
1947          switch (stats[i].format) {
1948          case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_BOOL32_KHR:
1949             printf("%s: %u", stats[i].name, stats[i].value.b32);
1950             break;
1951          case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR:
1952             printf("%s: %" PRIi64, stats[i].name, stats[i].value.i64);
1953             break;
1954          case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR:
1955             printf("%s: %" PRIu64, stats[i].name, stats[i].value.u64);
1956             break;
1957          case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR:
1958             printf("%s: %g", stats[i].name, stats[i].value.f64);
1959             break;
1960          default:
1961             unreachable("unknown statistic");
1962          }
1963       }
1964    }
1965    printf("\n");
1966 }
1967
1968 static void
1969 precompile_job(void *data, void *gdata, int thread_index)
1970 {
1971    struct zink_screen *screen = gdata;
1972    struct zink_gfx_program *prog = data;
1973
1974    struct zink_gfx_pipeline_state state = {0};
1975    state.shader_keys_optimal.key.vs_base.last_vertex_stage = true;
1976    state.shader_keys_optimal.key.tcs.patch_vertices = 3; //random guess, generated tcs precompile is hard
1977    state.optimal_key = state.shader_keys_optimal.key.val;
1978    generate_gfx_program_modules_optimal(NULL, screen, prog, &state);
1979    zink_screen_get_pipeline_cache(screen, &prog->base, true);
1980    simple_mtx_lock(&prog->libs->lock);
1981    zink_create_pipeline_lib(screen, prog, &state);
1982    simple_mtx_unlock(&prog->libs->lock);
1983    zink_screen_update_pipeline_cache(screen, &prog->base, true);
1984 }
1985
1986 static void
1987 precompile_separate_shader_job(void *data, void *gdata, int thread_index)
1988 {
1989    struct zink_screen *screen = gdata;
1990    struct zink_shader *zs = data;
1991
1992    nir_shader *nir;
1993    zs->precompile.mod = zink_shader_compile_separate(screen, zs, &nir);
1994    zink_descriptor_shader_init(screen, zs);
1995    VkShaderModule mods[ZINK_GFX_SHADER_COUNT] = {0};
1996    mods[nir->info.stage] = zs->precompile.mod;
1997    zs->precompile.gpl = zink_create_gfx_pipeline_separate(screen, mods, zs->precompile.layout);
1998 }
1999
2000 static void
2001 zink_link_gfx_shader(struct pipe_context *pctx, void **shaders)
2002 {
2003    struct zink_context *ctx = zink_context(pctx);
2004    struct zink_shader **zshaders = (struct zink_shader **)shaders;
2005    if (shaders[MESA_SHADER_COMPUTE])
2006       return;
2007    /* can't precompile fixedfunc */
2008    if (!shaders[MESA_SHADER_VERTEX] || !shaders[MESA_SHADER_FRAGMENT]) {
2009       if (shaders[MESA_SHADER_VERTEX] || shaders[MESA_SHADER_FRAGMENT]) {
2010          struct zink_shader *zs = shaders[MESA_SHADER_VERTEX] ? shaders[MESA_SHADER_VERTEX] : shaders[MESA_SHADER_FRAGMENT];
2011          if (zs->nir->info.separate_shader && !zs->precompile.mod && util_queue_fence_is_signalled(&zs->precompile.fence) &&
2012              zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB &&
2013              /* sample shading can't precompile */
2014              (!shaders[MESA_SHADER_FRAGMENT] || !zs->nir->info.fs.uses_sample_shading))
2015             util_queue_add_job(&zink_screen(pctx->screen)->cache_get_thread, zs, &zs->precompile.fence, precompile_separate_shader_job, NULL, 0);
2016       }
2017       return;
2018    }
2019    unsigned hash = 0;
2020    unsigned shader_stages = 0;
2021    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
2022       if (zshaders[i]) {
2023          hash ^= zshaders[i]->hash;
2024          shader_stages |= BITFIELD_BIT(i);
2025       }
2026    }
2027    unsigned tess_stages = BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL);
2028    unsigned tess = shader_stages & tess_stages;
2029    /* can't do fixedfunc tes either */
2030    if (tess && !shaders[MESA_SHADER_TESS_EVAL])
2031       return;
2032    struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(shader_stages)];
2033    simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
2034    /* link can be called repeatedly with the same shaders: ignore */
2035    if (_mesa_hash_table_search_pre_hashed(ht, hash, shaders)) {
2036       simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
2037       return;
2038    }
2039    struct zink_gfx_program *prog = zink_create_gfx_program(ctx, zshaders, 3, hash);
2040    u_foreach_bit(i, shader_stages)
2041       assert(prog->shaders[i]);
2042    _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
2043    prog->base.removed = false;
2044    simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
2045    if (zink_debug & ZINK_DEBUG_SHADERDB) {
2046       struct zink_screen *screen = zink_screen(pctx->screen);
2047       if (screen->optimal_keys)
2048          generate_gfx_program_modules_optimal(ctx, screen,  prog, &ctx->gfx_pipeline_state);
2049       else
2050          generate_gfx_program_modules(ctx, screen,  prog, &ctx->gfx_pipeline_state);
2051       VkPipeline pipeline = zink_create_gfx_pipeline(screen, prog, &ctx->gfx_pipeline_state, ctx->gfx_pipeline_state.element_state->binding_map,  shaders[MESA_SHADER_TESS_EVAL] ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, true);
2052       print_pipeline_stats(screen, pipeline);
2053    } else {
2054       util_queue_add_job(&zink_screen(pctx->screen)->cache_get_thread, prog, &prog->base.cache_fence, precompile_job, NULL, 0);
2055    }
2056 }
2057
2058 void
2059 zink_program_init(struct zink_context *ctx)
2060 {
2061    ctx->base.create_vs_state = zink_create_cached_shader_state;
2062    ctx->base.bind_vs_state = zink_bind_vs_state;
2063    ctx->base.delete_vs_state = zink_delete_cached_shader_state;
2064
2065    ctx->base.create_fs_state = zink_create_cached_shader_state;
2066    ctx->base.bind_fs_state = zink_bind_fs_state;
2067    ctx->base.delete_fs_state = zink_delete_cached_shader_state;
2068
2069    ctx->base.create_gs_state = zink_create_cached_shader_state;
2070    ctx->base.bind_gs_state = zink_bind_gs_state;
2071    ctx->base.delete_gs_state = zink_delete_cached_shader_state;
2072
2073    ctx->base.create_tcs_state = zink_create_cached_shader_state;
2074    ctx->base.bind_tcs_state = zink_bind_tcs_state;
2075    ctx->base.delete_tcs_state = zink_delete_cached_shader_state;
2076
2077    ctx->base.create_tes_state = zink_create_cached_shader_state;
2078    ctx->base.bind_tes_state = zink_bind_tes_state;
2079    ctx->base.delete_tes_state = zink_delete_cached_shader_state;
2080
2081    ctx->base.create_compute_state = zink_create_cs_state;
2082    ctx->base.bind_compute_state = zink_bind_cs_state;
2083    ctx->base.delete_compute_state = zink_delete_cs_shader_state;
2084
2085    if (zink_screen(ctx->base.screen)->info.have_EXT_vertex_input_dynamic_state)
2086       _mesa_set_init(&ctx->gfx_inputs, ctx, hash_gfx_input_dynamic, equals_gfx_input_dynamic);
2087    else
2088       _mesa_set_init(&ctx->gfx_inputs, ctx, hash_gfx_input, equals_gfx_input);
2089    if (zink_screen(ctx->base.screen)->have_full_ds3)
2090       _mesa_set_init(&ctx->gfx_outputs, ctx, hash_gfx_output_ds3, equals_gfx_output_ds3);
2091    else
2092       _mesa_set_init(&ctx->gfx_outputs, ctx, hash_gfx_output, equals_gfx_output);
2093    /* validate struct packing */
2094    STATIC_ASSERT(offsetof(struct zink_gfx_output_key, sample_mask) == sizeof(uint32_t));
2095    STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, vertex_buffers_enabled_mask) - offsetof(struct zink_gfx_pipeline_state, input) ==
2096                  offsetof(struct zink_gfx_input_key, vertex_buffers_enabled_mask) - offsetof(struct zink_gfx_input_key, input));
2097    STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, vertex_strides) - offsetof(struct zink_gfx_pipeline_state, input) ==
2098                  offsetof(struct zink_gfx_input_key, vertex_strides) - offsetof(struct zink_gfx_input_key, input));
2099    STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, element_state) - offsetof(struct zink_gfx_pipeline_state, input) ==
2100                  offsetof(struct zink_gfx_input_key, element_state) - offsetof(struct zink_gfx_input_key, input));
2101
2102    STATIC_ASSERT(sizeof(union zink_shader_key_optimal) == sizeof(uint32_t));
2103
2104    if (zink_screen(ctx->base.screen)->info.have_EXT_graphics_pipeline_library || zink_debug & ZINK_DEBUG_SHADERDB)
2105       ctx->base.link_shader = zink_link_gfx_shader;
2106 }
2107
2108 bool
2109 zink_set_rasterizer_discard(struct zink_context *ctx, bool disable)
2110 {
2111    bool value = disable ? false : (ctx->rast_state ? ctx->rast_state->base.rasterizer_discard : false);
2112    bool changed = ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard != value;
2113    ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard = value;
2114    if (!changed)
2115       return false;
2116    if (!zink_screen(ctx->base.screen)->info.have_EXT_extended_dynamic_state2)
2117       ctx->gfx_pipeline_state.dirty |= true;
2118    ctx->rasterizer_discard_changed = true;
2119    return true;
2120 }
2121
2122 void
2123 zink_driver_thread_add_job(struct pipe_screen *pscreen, void *data,
2124                            struct util_queue_fence *fence,
2125                            pipe_driver_thread_func execute,
2126                            pipe_driver_thread_func cleanup,
2127                            const size_t job_size)
2128 {
2129    struct zink_screen *screen = zink_screen(pscreen);
2130    util_queue_add_job(&screen->cache_get_thread, data, fence, execute, cleanup, job_size);
2131 }
2132
2133 void
2134 zink_set_primitive_emulation_keys(struct zink_context *ctx)
2135 {
2136    struct zink_screen *screen = zink_screen(ctx->base.screen);
2137    bool lower_line_stipple = ctx->gfx_pipeline_state.rast_prim == PIPE_PRIM_LINES &&
2138                              screen->driver_workarounds.no_linestipple &&
2139                              ctx->rast_state->base.line_stipple_enable &&
2140                              !ctx->num_so_targets;
2141
2142    if (zink_get_fs_key(ctx)->lower_line_stipple != lower_line_stipple) {
2143       assert(zink_get_gs_key(ctx)->lower_line_stipple ==
2144              zink_get_fs_key(ctx)->lower_line_stipple);
2145       zink_set_fs_key(ctx)->lower_line_stipple = lower_line_stipple;
2146       zink_set_gs_key(ctx)->lower_line_stipple = lower_line_stipple;
2147    }
2148
2149    bool lower_line_smooth = screen->driver_workarounds.no_linesmooth &&
2150                             ctx->rast_state->base.line_smooth &&
2151                             !ctx->num_so_targets;
2152
2153    if (zink_get_fs_key(ctx)->lower_line_smooth != lower_line_smooth) {
2154       assert(zink_get_gs_key(ctx)->lower_line_smooth ==
2155              zink_get_fs_key(ctx)->lower_line_smooth);
2156       zink_set_fs_key(ctx)->lower_line_smooth = lower_line_smooth;
2157       zink_set_gs_key(ctx)->lower_line_smooth = lower_line_smooth;
2158    }
2159
2160    if (lower_line_stipple || lower_line_smooth ||
2161        zink_get_gs_key(ctx)->lower_gl_point) {
2162       enum pipe_shader_type prev_vertex_stage =
2163          ctx->gfx_stages[MESA_SHADER_TESS_EVAL] ?
2164             MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
2165
2166       if (!ctx->gfx_stages[MESA_SHADER_GEOMETRY]) {
2167          assert(!screen->optimal_keys);
2168
2169          if (!ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs) {
2170             nir_shader *nir = nir_create_passthrough_gs(
2171                &screen->nir_options,
2172                ctx->gfx_stages[prev_vertex_stage]->nir,
2173                (lower_line_stipple || lower_line_smooth) ? SHADER_PRIM_LINE_STRIP :  SHADER_PRIM_POINTS,
2174                (lower_line_stipple || lower_line_smooth) ? 2 : 1);
2175
2176             struct zink_shader *shader = zink_shader_create(screen, nir, NULL);
2177             ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs = shader;
2178             shader->non_fs.is_generated = true;
2179          }
2180
2181          bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY,
2182                         ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs);
2183       }
2184    } else if (ctx->gfx_stages[MESA_SHADER_GEOMETRY] &&
2185               ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated)
2186          bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, NULL);
2187 }