src/gallium/drivers/zink/zink_program.c

   1 /*
   2  * Copyright 2018 Collabora Ltd.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * on the rights to use, copy, modify, merge, publish, distribute, sub
   8  * license, and/or sell copies of the Software, and to permit persons to whom
   9  * the Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #include "zink_program.h"
  25
  26 #include "zink_compiler.h"
  27 #include "zink_context.h"
  28 #include "zink_descriptors.h"
  29 #include "zink_helpers.h"
  30 #include "zink_pipeline.h"
  31 #include "zink_render_pass.h"
  32 #include "zink_resource.h"
  33 #include "zink_screen.h"
  34 #include "zink_state.h"
  35 #include "zink_inlines.h"
  36
  37 #include "util/u_debug.h"
  38 #include "util/u_memory.h"
  39 #include "util/u_prim.h"
  40 #include "nir_serialize.h"
  41 #include "nir/nir_draw_helpers.h"
  42
  43 /* for pipeline cache */
  44 #define XXH_INLINE_ALL
  45 #include "util/xxhash.h"
  46
  47 static void
  48 precompile_job(void *data, void *gdata, int thread_index);
  49 struct zink_gfx_program *
  50 create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch);
  51
  52 void
  53 debug_describe_zink_gfx_program(char *buf, const struct zink_gfx_program *ptr)
  54 {
  55    sprintf(buf, "zink_gfx_program");
  56 }
  57
  58 void
  59 debug_describe_zink_compute_program(char *buf, const struct zink_compute_program *ptr)
  60 {
  61    sprintf(buf, "zink_compute_program");
  62 }
  63
  64 ALWAYS_INLINE static bool
  65 shader_key_matches_tcs_nongenerated(const struct zink_shader_module *zm, const struct zink_shader_key *key, unsigned num_uniforms)
  66 {
  67    if (zm->num_uniforms != num_uniforms || zm->has_nonseamless != !!key->base.nonseamless_cube_mask ||
  68        zm->needs_zs_shader_swizzle != key->base.needs_zs_shader_swizzle)
  69       return false;
  70    const uint32_t nonseamless_size = zm->has_nonseamless ? sizeof(uint32_t) : 0;
  71    return (!nonseamless_size || !memcmp(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size)) &&
  72           (!num_uniforms || !memcmp(zm->key + zm->key_size + nonseamless_size,
  73                                     key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t)));
  74 }
  75
  76 ALWAYS_INLINE static bool
  77 shader_key_matches(const struct zink_shader_module *zm,
  78                    const struct zink_shader_key *key, unsigned num_uniforms,
  79                    bool has_inline, bool has_nonseamless)
  80 {
  81    const uint32_t nonseamless_size = !has_nonseamless && zm->has_nonseamless ? sizeof(uint32_t) : 0;
  82    if (has_inline) {
  83       if (zm->num_uniforms != num_uniforms ||
  84           (num_uniforms &&
  85            memcmp(zm->key + zm->key_size + nonseamless_size,
  86                   key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t))))
  87          return false;
  88    }
  89    if (!has_nonseamless) {
  90       if (zm->has_nonseamless != !!key->base.nonseamless_cube_mask ||
  91           (nonseamless_size && memcmp(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size)))
  92          return false;
  93    }
  94    if (zm->needs_zs_shader_swizzle != key->base.needs_zs_shader_swizzle)
  95       return false;
  96    return !memcmp(zm->key, key, zm->key_size);
  97 }
  98
  99 static uint32_t
 100 shader_module_hash(const struct zink_shader_module *zm)
 101 {
 102    const uint32_t nonseamless_size = zm->has_nonseamless ? sizeof(uint32_t) : 0;
 103    unsigned key_size = zm->key_size + nonseamless_size + zm->num_uniforms * sizeof(uint32_t);
 104    return _mesa_hash_data(zm->key, key_size);
 105 }
 106
 107 ALWAYS_INLINE static void
 108 gather_shader_module_info(struct zink_context *ctx, struct zink_screen *screen,
 109                           struct zink_shader *zs, struct zink_gfx_program *prog,
 110                           struct zink_gfx_pipeline_state *state,
 111                           bool has_inline, //is inlining enabled?
 112                           bool has_nonseamless, //is nonseamless ext present?
 113                           unsigned *inline_size, unsigned *nonseamless_size)
 114 {
 115    gl_shader_stage stage = zs->nir->info.stage;
 116    struct zink_shader_key *key = &state->shader_keys.key[stage];
 117    if (has_inline && ctx && zs->nir->info.num_inlinable_uniforms &&
 118        ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(stage)) {
 119       if (zs->can_inline && (screen->is_cpu || prog->inlined_variant_count[stage] < ZINK_MAX_INLINED_VARIANTS))
 120          *inline_size = zs->nir->info.num_inlinable_uniforms;
 121       else
 122          key->inline_uniforms = false;
 123    }
 124    if (!has_nonseamless && key->base.nonseamless_cube_mask)
 125       *nonseamless_size = sizeof(uint32_t);
 126 }
 127
 128 ALWAYS_INLINE static struct zink_shader_module *
 129 create_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen,
 130                                struct zink_shader *zs, struct zink_gfx_program *prog,
 131                                gl_shader_stage stage,
 132                                struct zink_gfx_pipeline_state *state,
 133                                unsigned inline_size, unsigned nonseamless_size,
 134                                bool has_inline, //is inlining enabled?
 135                                bool has_nonseamless) //is nonseamless ext present?
 136 {
 137    VkShaderModule mod;
 138    struct zink_shader_module *zm;
 139    const struct zink_shader_key *key = &state->shader_keys.key[stage];
 140    /* non-generated tcs won't use the shader key */
 141    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
 142    const bool shadow_needs_shader_swizzle = key->base.needs_zs_shader_swizzle ||
 143                                             (stage == MESA_SHADER_FRAGMENT && key->key.fs.base.shadow_needs_shader_swizzle);
 144    zm = malloc(sizeof(struct zink_shader_module) + key->size +
 145                (!has_nonseamless ? nonseamless_size : 0) + inline_size * sizeof(uint32_t) +
 146                (shadow_needs_shader_swizzle ? sizeof(struct zink_zs_swizzle_key) : 0));
 147    if (!zm) {
 148       return NULL;
 149    }
 150    unsigned patch_vertices = state->shader_keys.key[MESA_SHADER_TESS_CTRL ].key.tcs.patch_vertices;
 151    if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) {
 152       assert(ctx); //TODO async
 153       mod = zink_shader_tcs_compile(screen, zs, patch_vertices);
 154    } else {
 155       mod = zink_shader_compile(screen, zs, prog->nir[stage], key, &ctx->di.zs_swizzle[stage]);
 156    }
 157    if (!mod) {
 158       FREE(zm);
 159       return NULL;
 160    }
 161    zm->shader = mod;
 162    zm->num_uniforms = inline_size;
 163    if (!is_nongenerated_tcs) {
 164       zm->key_size = key->size;
 165       memcpy(zm->key, key, key->size);
 166    } else {
 167       zm->key_size = 0;
 168       memset(zm->key, 0, key->size);
 169    }
 170    if (!has_nonseamless && nonseamless_size) {
 171       /* nonseamless mask gets added to base key if it exists */
 172       memcpy(zm->key + key->size, &key->base.nonseamless_cube_mask, nonseamless_size);
 173    }
 174    zm->needs_zs_shader_swizzle = shadow_needs_shader_swizzle;
 175    zm->has_nonseamless = has_nonseamless ? 0 : !!nonseamless_size;
 176    if (inline_size)
 177       memcpy(zm->key + key->size + nonseamless_size, key->base.inlined_uniform_values, inline_size * sizeof(uint32_t));
 178    if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
 179       zm->hash = patch_vertices;
 180    else
 181       zm->hash = shader_module_hash(zm);
 182    if (unlikely(shadow_needs_shader_swizzle)) {
 183       memcpy(zm->key + key->size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key));
 184       zm->hash ^= _mesa_hash_data(&ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key));
 185    }
 186    zm->default_variant = !shadow_needs_shader_swizzle && !inline_size && !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
 187    if (inline_size)
 188       prog->inlined_variant_count[stage]++;
 189    util_dynarray_append(&prog->shader_cache[stage][has_nonseamless ? 0 : !!nonseamless_size][!!inline_size], void*, zm);
 190    return zm;
 191 }
 192
 193 ALWAYS_INLINE static struct zink_shader_module *
 194 get_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen,
 195                             struct zink_shader *zs, struct zink_gfx_program *prog,
 196                             gl_shader_stage stage,
 197                             struct zink_gfx_pipeline_state *state,
 198                             unsigned inline_size, unsigned nonseamless_size,
 199                             bool has_inline, //is inlining enabled?
 200                             bool has_nonseamless) //is nonseamless ext present?
 201 {
 202    const struct zink_shader_key *key = &state->shader_keys.key[stage];
 203    /* non-generated tcs won't use the shader key */
 204    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
 205    const bool shadow_needs_shader_swizzle = unlikely(key->base.needs_zs_shader_swizzle) ||
 206                                             (stage == MESA_SHADER_FRAGMENT && unlikely(key->key.fs.base.shadow_needs_shader_swizzle));
 207
 208    struct util_dynarray *shader_cache = &prog->shader_cache[stage][!has_nonseamless ? !!nonseamless_size : 0][has_inline ? !!inline_size : 0];
 209    unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
 210    struct zink_shader_module **pzm = shader_cache->data;
 211    for (unsigned i = 0; i < count; i++) {
 212       struct zink_shader_module *iter = pzm[i];
 213       if (is_nongenerated_tcs) {
 214          if (!shader_key_matches_tcs_nongenerated(iter, key, has_inline ? !!inline_size : 0))
 215             continue;
 216       } else {
 217          if (stage == MESA_SHADER_VERTEX && iter->key_size != key->size)
 218             continue;
 219          if (!shader_key_matches(iter, key, inline_size, has_inline, has_nonseamless))
 220             continue;
 221          if (unlikely(shadow_needs_shader_swizzle)) {
 222             /* shadow swizzle data needs a manual compare since it's so fat */
 223             if (memcmp(iter->key + iter->key_size + nonseamless_size + iter->num_uniforms * sizeof(uint32_t),
 224                        &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key)))
 225                continue;
 226          }
 227       }
 228       if (i > 0) {
 229          struct zink_shader_module *zero = pzm[0];
 230          pzm[0] = iter;
 231          pzm[i] = zero;
 232       }
 233       return iter;
 234    }
 235
 236    return NULL;
 237 }
 238
 239 ALWAYS_INLINE static struct zink_shader_module *
 240 create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen *screen,
 241                                        struct zink_shader *zs, struct zink_gfx_program *prog,
 242                                        gl_shader_stage stage,
 243                                        struct zink_gfx_pipeline_state *state)
 244 {
 245    VkShaderModule mod;
 246    struct zink_shader_module *zm;
 247    uint16_t *key;
 248    unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
 249    bool shadow_needs_shader_swizzle = false;
 250    if (zs == prog->last_vertex_stage) {
 251       key = (uint16_t*)&state->shader_keys_optimal.key.vs_base;
 252    } else if (stage == MESA_SHADER_FRAGMENT) {
 253       key = (uint16_t*)&state->shader_keys_optimal.key.fs;
 254       shadow_needs_shader_swizzle = ctx ? ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle : false;
 255    } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) {
 256       key = (uint16_t*)&state->shader_keys_optimal.key.tcs;
 257    } else {
 258       key = NULL;
 259    }
 260    size_t key_size = sizeof(uint16_t);
 261    zm = calloc(1, sizeof(struct zink_shader_module) + (key ? key_size : 0) + (unlikely(shadow_needs_shader_swizzle) ? sizeof(struct zink_zs_swizzle_key) : 0));
 262    if (!zm) {
 263       return NULL;
 264    }
 265    if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) {
 266       assert(ctx); //TODO async
 267       struct zink_tcs_key *tcs = (struct zink_tcs_key*)key;
 268       mod = zink_shader_tcs_compile(screen, zs, tcs->patch_vertices);
 269    } else {
 270       mod = zink_shader_compile(screen, zs, prog->nir[stage], (struct zink_shader_key*)key, shadow_needs_shader_swizzle ? &ctx->di.zs_swizzle[stage] : NULL);
 271    }
 272    if (!mod) {
 273       FREE(zm);
 274       return NULL;
 275    }
 276    zm->shader = mod;
 277    /* non-generated tcs won't use the shader key */
 278    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
 279    if (key && !is_nongenerated_tcs) {
 280       zm->key_size = key_size;
 281       uint16_t *data = (uint16_t*)zm->key;
 282       /* sanitize actual key bits */
 283       *data = (*key) & mask;
 284       if (unlikely(shadow_needs_shader_swizzle))
 285          memcpy(&data[1], &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key));
 286    }
 287    zm->default_variant = !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
 288    util_dynarray_append(&prog->shader_cache[stage][0][0], void*, zm);
 289    return zm;
 290 }
 291
 292 ALWAYS_INLINE static struct zink_shader_module *
 293 get_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen *screen,
 294                                     struct zink_shader *zs, struct zink_gfx_program *prog,
 295                                     gl_shader_stage stage,
 296                                     struct zink_gfx_pipeline_state *state)
 297 {
 298    /* non-generated tcs won't use the shader key */
 299    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
 300    bool shadow_needs_shader_swizzle = false;
 301    uint16_t *key;
 302    unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
 303    if (zs == prog->last_vertex_stage) {
 304       key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_base;
 305    } else if (stage == MESA_SHADER_FRAGMENT) {
 306       key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.fs;
 307       shadow_needs_shader_swizzle = ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle;
 308    } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) {
 309       key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs;
 310    } else {
 311       key = NULL;
 312    }
 313    struct util_dynarray *shader_cache = &prog->shader_cache[stage][0][0];
 314    unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
 315    struct zink_shader_module **pzm = shader_cache->data;
 316    for (unsigned i = 0; i < count; i++) {
 317       struct zink_shader_module *iter = pzm[i];
 318       if (is_nongenerated_tcs) {
 319          /* always match */
 320       } else if (key) {
 321          uint16_t val = (*key) & mask;
 322          /* no key is bigger than uint16_t */
 323          if (memcmp(iter->key, &val, sizeof(uint16_t)))
 324             continue;
 325          if (unlikely(shadow_needs_shader_swizzle)) {
 326             /* shadow swizzle data needs a manual compare since it's so fat */
 327             if (memcmp(iter->key + sizeof(uint16_t), &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key)))
 328                continue;
 329          }
 330       }
 331       if (i > 0) {
 332          struct zink_shader_module *zero = pzm[0];
 333          pzm[0] = iter;
 334          pzm[i] = zero;
 335       }
 336       return iter;
 337    }
 338
 339    return NULL;
 340 }
 341
 342 static void
 343 zink_destroy_shader_module(struct zink_screen *screen, struct zink_shader_module *zm)
 344 {
 345    VKSCR(DestroyShaderModule)(screen->dev, zm->shader, NULL);
 346    free(zm);
 347 }
 348
 349 static void
 350 destroy_shader_cache(struct zink_screen *screen, struct util_dynarray *sc)
 351 {
 352    while (util_dynarray_contains(sc, void*)) {
 353       struct zink_shader_module *zm = util_dynarray_pop(sc, struct zink_shader_module*);
 354       zink_destroy_shader_module(screen, zm);
 355    }
 356 }
 357
 358 ALWAYS_INLINE static void
 359 update_gfx_shader_modules(struct zink_context *ctx,
 360                       struct zink_screen *screen,
 361                       struct zink_gfx_program *prog, uint32_t mask,
 362                       struct zink_gfx_pipeline_state *state,
 363                       bool has_inline, //is inlining enabled?
 364                       bool has_nonseamless) //is nonseamless ext present?
 365 {
 366    bool hash_changed = false;
 367    bool default_variants = true;
 368    assert(prog->modules[MESA_SHADER_VERTEX]);
 369    uint32_t variant_hash = prog->last_variant_hash;
 370    prog->has_edgeflags = prog->shaders[MESA_SHADER_VERTEX]->has_edgeflags;
 371    for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
 372       if (!(mask & BITFIELD_BIT(i)))
 373          continue;
 374
 375       assert(prog->shaders[i]);
 376
 377       unsigned inline_size = 0, nonseamless_size = 0;
 378       gather_shader_module_info(ctx, screen, prog->shaders[i], prog, state, has_inline, has_nonseamless, &inline_size, &nonseamless_size);
 379       struct zink_shader_module *zm = get_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
 380                                                                   inline_size, nonseamless_size, has_inline, has_nonseamless);
 381       if (!zm)
 382          zm = create_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
 383                                              inline_size, nonseamless_size, has_inline, has_nonseamless);
 384       state->modules[i] = zm->shader;
 385       if (prog->modules[i] == zm->shader)
 386          continue;
 387       variant_hash ^= prog->module_hash[i];
 388       hash_changed = true;
 389       default_variants &= zm->default_variant;
 390       prog->modules[i] = zm->shader;
 391       prog->module_hash[i] = zm->hash;
 392       if (has_inline) {
 393          if (zm->num_uniforms)
 394             prog->inline_variants |= BITFIELD_BIT(i);
 395          else
 396             prog->inline_variants &= ~BITFIELD_BIT(i);
 397       }
 398       variant_hash ^= prog->module_hash[i];
 399    }
 400
 401    if (hash_changed && state) {
 402       if (default_variants)
 403          prog->last_variant_hash = prog->default_variant_hash;
 404       else
 405          prog->last_variant_hash = variant_hash;
 406
 407       state->modules_changed = true;
 408    }
 409 }
 410
 411 static void
 412 generate_gfx_program_modules(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
 413 {
 414    assert(!prog->modules[MESA_SHADER_VERTEX]);
 415    uint32_t variant_hash = 0;
 416    bool default_variants = true;
 417    for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
 418       if (!(prog->stages_present & BITFIELD_BIT(i)))
 419          continue;
 420
 421       assert(prog->shaders[i]);
 422
 423       unsigned inline_size = 0, nonseamless_size = 0;
 424       gather_shader_module_info(ctx, screen, prog->shaders[i], prog, state,
 425                                 screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map,
 426                                 &inline_size, &nonseamless_size);
 427       struct zink_shader_module *zm = create_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
 428                                                                      inline_size, nonseamless_size,
 429                                                                      screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map);
 430       state->modules[i] = zm->shader;
 431       prog->modules[i] = zm->shader;
 432       prog->module_hash[i] = zm->hash;
 433       if (zm->num_uniforms)
 434          prog->inline_variants |= BITFIELD_BIT(i);
 435       default_variants &= zm->default_variant;
 436       variant_hash ^= prog->module_hash[i];
 437    }
 438
 439    p_atomic_dec(&prog->base.reference.count);
 440    state->modules_changed = true;
 441
 442    prog->last_variant_hash = variant_hash;
 443    if (default_variants)
 444       prog->default_variant_hash = prog->last_variant_hash;
 445 }
 446
 447 static void
 448 generate_gfx_program_modules_optimal(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
 449 {
 450    assert(!prog->modules[MESA_SHADER_VERTEX]);
 451    for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
 452       if (!(prog->stages_present & BITFIELD_BIT(i)))
 453          continue;
 454
 455       assert(prog->shaders[i]);
 456
 457       struct zink_shader_module *zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[i], prog, i, state);
 458       prog->modules[i] = zm->shader;
 459    }
 460
 461    p_atomic_dec(&prog->base.reference.count);
 462    state->modules_changed = true;
 463    prog->last_variant_hash = state->shader_keys_optimal.key.val;
 464 }
 465
 466 static uint32_t
 467 hash_pipeline_lib_generated_tcs(const void *key)
 468 {
 469    const struct zink_gfx_library_key *gkey = key;
 470    return gkey->optimal_key;
 471 }
 472
 473
 474 static bool
 475 equals_pipeline_lib_generated_tcs(const void *a, const void *b)
 476 {
 477    return !memcmp(a, b, sizeof(uint32_t));
 478 }
 479
 480 static uint32_t
 481 hash_pipeline_lib(const void *key)
 482 {
 483    const struct zink_gfx_library_key *gkey = key;
 484    /* remove generated tcs bits */
 485    return zink_shader_key_optimal_no_tcs(gkey->optimal_key);
 486 }
 487
 488 static bool
 489 equals_pipeline_lib(const void *a, const void *b)
 490 {
 491    const struct zink_gfx_library_key *ak = a;
 492    const struct zink_gfx_library_key *bk = b;
 493    /* remove generated tcs bits */
 494    uint32_t val_a = zink_shader_key_optimal_no_tcs(ak->optimal_key);
 495    uint32_t val_b = zink_shader_key_optimal_no_tcs(bk->optimal_key);
 496    return val_a == val_b;
 497 }
 498
 499 uint32_t
 500 hash_gfx_input_dynamic(const void *key)
 501 {
 502    const struct zink_gfx_input_key *ikey = key;
 503    return ikey->idx;
 504 }
 505
 506 static bool
 507 equals_gfx_input_dynamic(const void *a, const void *b)
 508 {
 509    const struct zink_gfx_input_key *ikey_a = a;
 510    const struct zink_gfx_input_key *ikey_b = b;
 511    return ikey_a->idx == ikey_b->idx;
 512 }
 513
 514 uint32_t
 515 hash_gfx_input(const void *key)
 516 {
 517    const struct zink_gfx_input_key *ikey = key;
 518    if (ikey->uses_dynamic_stride)
 519       return ikey->input;
 520    return _mesa_hash_data(key, offsetof(struct zink_gfx_input_key, pipeline));
 521 }
 522
 523 static bool
 524 equals_gfx_input(const void *a, const void *b)
 525 {
 526    const struct zink_gfx_input_key *ikey_a = a;
 527    const struct zink_gfx_input_key *ikey_b = b;
 528    if (ikey_a->uses_dynamic_stride)
 529       return ikey_a->element_state == ikey_b->element_state &&
 530              !memcmp(a, b, offsetof(struct zink_gfx_input_key, vertex_buffers_enabled_mask));
 531    return !memcmp(a, b, offsetof(struct zink_gfx_input_key, pipeline));
 532 }
 533
 534 uint32_t
 535 hash_gfx_output_ds3(const void *key)
 536 {
 537    const uint8_t *data = key;
 538    return _mesa_hash_data(data, sizeof(uint32_t));
 539 }
 540
 541 static bool
 542 equals_gfx_output_ds3(const void *a, const void *b)
 543 {
 544    const uint8_t *da = a;
 545    const uint8_t *db = b;
 546    return !memcmp(da, db, sizeof(uint32_t));
 547 }
 548
 549 uint32_t
 550 hash_gfx_output(const void *key)
 551 {
 552    const uint8_t *data = key;
 553    return _mesa_hash_data(data, offsetof(struct zink_gfx_output_key, pipeline));
 554 }
 555
 556 static bool
 557 equals_gfx_output(const void *a, const void *b)
 558 {
 559    const uint8_t *da = a;
 560    const uint8_t *db = b;
 561    return !memcmp(da, db, offsetof(struct zink_gfx_output_key, pipeline));
 562 }
 563
 564 ALWAYS_INLINE static void
 565 update_gfx_program_nonseamless(struct zink_context *ctx, struct zink_gfx_program *prog, bool has_nonseamless)
 566 {
 567    struct zink_screen *screen = zink_screen(ctx->base.screen);
 568    if (screen->driconf.inline_uniforms)
 569       update_gfx_shader_modules(ctx, screen, prog,
 570                                 ctx->dirty_gfx_stages & prog->stages_present, &ctx->gfx_pipeline_state,
 571                                 true, has_nonseamless);
 572    else
 573       update_gfx_shader_modules(ctx, screen, prog,
 574                                 ctx->dirty_gfx_stages & prog->stages_present, &ctx->gfx_pipeline_state,
 575                                 false, has_nonseamless);
 576 }
 577
 578 static void
 579 update_gfx_program(struct zink_context *ctx, struct zink_gfx_program *prog)
 580 {
 581    struct zink_screen *screen = zink_screen(ctx->base.screen);
 582    if (screen->info.have_EXT_non_seamless_cube_map)
 583       update_gfx_program_nonseamless(ctx, prog, true);
 584    else
 585       update_gfx_program_nonseamless(ctx, prog, false);
 586 }
 587
 588 void
 589 zink_gfx_program_update(struct zink_context *ctx)
 590 {
 591    if (ctx->last_vertex_stage_dirty) {
 592       gl_shader_stage pstage = ctx->last_vertex_stage->nir->info.stage;
 593       ctx->dirty_gfx_stages |= BITFIELD_BIT(pstage);
 594       memcpy(&ctx->gfx_pipeline_state.shader_keys.key[pstage].key.vs_base,
 595              &ctx->gfx_pipeline_state.shader_keys.last_vertex.key.vs_base,
 596              sizeof(struct zink_vs_key_base));
 597       ctx->last_vertex_stage_dirty = false;
 598    }
 599    if (ctx->gfx_dirty) {
 600       struct zink_gfx_program *prog = NULL;
 601
 602       simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
 603       struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
 604       const uint32_t hash = ctx->gfx_hash;
 605       struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
 606       /* this must be done before prog is updated */
 607       if (ctx->curr_program)
 608          ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 609       if (entry) {
 610          prog = (struct zink_gfx_program*)entry->data;
 611          for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
 612             if (prog->stages_present & ~ctx->dirty_gfx_stages & BITFIELD_BIT(i))
 613                ctx->gfx_pipeline_state.modules[i] = prog->modules[i];
 614          }
 615          /* ensure variants are always updated if keys have changed since last use */
 616          ctx->dirty_gfx_stages |= prog->stages_present;
 617          update_gfx_program(ctx, prog);
 618       } else {
 619          ctx->dirty_gfx_stages |= ctx->shader_stages;
 620          prog = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, hash);
 621          zink_screen_get_pipeline_cache(zink_screen(ctx->base.screen), &prog->base, false);
 622          _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
 623          prog->base.removed = false;
 624          generate_gfx_program_modules(ctx, zink_screen(ctx->base.screen), prog, &ctx->gfx_pipeline_state);
 625       }
 626       simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
 627       if (prog && prog != ctx->curr_program)
 628          zink_batch_reference_program(&ctx->batch, &prog->base);
 629       ctx->curr_program = prog;
 630       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 631       ctx->gfx_dirty = false;
 632    } else if (ctx->dirty_gfx_stages) {
 633       /* remove old hash */
 634       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 635       update_gfx_program(ctx, ctx->curr_program);
 636       /* apply new hash */
 637       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 638    }
 639    ctx->dirty_gfx_stages = 0;
 640 }
 641
 642 ALWAYS_INLINE static bool
 643 update_gfx_shader_module_optimal(struct zink_context *ctx, struct zink_gfx_program *prog, gl_shader_stage pstage)
 644 {
 645    struct zink_screen *screen = zink_screen(ctx->base.screen);
 646    if (screen->info.have_EXT_graphics_pipeline_library)
 647       util_queue_fence_wait(&prog->base.cache_fence);
 648    struct zink_shader_module *zm = get_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state);
 649    if (!zm)
 650       zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state);
 651
 652    bool changed = prog->modules[pstage] != zm->shader;
 653    prog->modules[pstage] = zm->shader;
 654    return changed;
 655 }
 656
 657 static void
 658 update_gfx_program_optimal(struct zink_context *ctx, struct zink_gfx_program *prog)
 659 {
 660    const union zink_shader_key_optimal *optimal_key = (union zink_shader_key_optimal*)&prog->last_variant_hash;
 661    if (ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_bits != optimal_key->vs_bits) {
 662       assert(!prog->is_separable);
 663       bool changed = update_gfx_shader_module_optimal(ctx, prog, ctx->last_vertex_stage->nir->info.stage);
 664       ctx->gfx_pipeline_state.modules_changed |= changed;
 665    }
 666    const bool shadow_needs_shader_swizzle = optimal_key->fs.shadow_needs_shader_swizzle && (ctx->dirty_gfx_stages & BITFIELD_BIT(MESA_SHADER_FRAGMENT));
 667    if (ctx->gfx_pipeline_state.shader_keys_optimal.key.fs_bits != optimal_key->fs_bits ||
 668        /* always recheck shadow swizzles since they aren't directly part of the key */
 669        unlikely(shadow_needs_shader_swizzle)) {
 670       assert(!prog->is_separable);
 671       bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_FRAGMENT);
 672       ctx->gfx_pipeline_state.modules_changed |= changed;
 673       if (unlikely(shadow_needs_shader_swizzle)) {
 674          struct zink_shader_module **pzm = prog->shader_cache[MESA_SHADER_FRAGMENT][0][0].data;
 675          ctx->gfx_pipeline_state.shadow = (struct zink_zs_swizzle_key*)pzm[0]->key + sizeof(uint16_t);
 676       }
 677    }
 678    if (prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated &&
 679        ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs_bits != optimal_key->tcs_bits) {
 680       assert(!prog->is_separable);
 681       bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_TESS_CTRL);
 682       ctx->gfx_pipeline_state.modules_changed |= changed;
 683    }
 684    prog->last_variant_hash = ctx->gfx_pipeline_state.shader_keys_optimal.key.val;
 685 }
 686
 687 void
 688 zink_gfx_program_update_optimal(struct zink_context *ctx)
 689 {
 690    if (ctx->gfx_dirty) {
 691       struct zink_gfx_program *prog = NULL;
 692       ctx->gfx_pipeline_state.optimal_key = ctx->gfx_pipeline_state.shader_keys_optimal.key.val;
 693       struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
 694       const uint32_t hash = ctx->gfx_hash;
 695       simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
 696       struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
 697
 698       if (ctx->curr_program)
 699          ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 700       if (entry) {
 701          prog = (struct zink_gfx_program*)entry->data;
 702          if (prog->is_separable) {
 703             /* shader variants can't be handled by separable programs: sync and compile */
 704             if (!ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key))
 705                util_queue_fence_wait(&prog->base.cache_fence);
 706             /* If the optimized linked pipeline is done compiling, swap it into place. */
 707             if (util_queue_fence_is_signalled(&prog->base.cache_fence)) {
 708                struct zink_gfx_program *real = prog->full_prog;
 709                entry->data = real;
 710                entry->key = real->shaders;
 711                real->base.removed = false;
 712                prog->full_prog = NULL;
 713                prog->base.removed = true;
 714                zink_gfx_program_reference(zink_screen(ctx->base.screen), &prog, NULL);
 715                prog = real;
 716             }
 717          }
 718          update_gfx_program_optimal(ctx, prog);
 719       } else {
 720          ctx->dirty_gfx_stages |= ctx->shader_stages;
 721          prog = create_gfx_program_separable(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch);
 722          prog->base.removed = false;
 723          _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
 724          if (!prog->is_separable) {
 725             zink_screen_get_pipeline_cache(zink_screen(ctx->base.screen), &prog->base, false);
 726             generate_gfx_program_modules_optimal(ctx, zink_screen(ctx->base.screen), prog, &ctx->gfx_pipeline_state);
 727          }
 728       }
 729       simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
 730       if (prog && prog != ctx->curr_program)
 731          zink_batch_reference_program(&ctx->batch, &prog->base);
 732       ctx->curr_program = prog;
 733       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 734    } else if (ctx->dirty_gfx_stages) {
 735       /* remove old hash */
 736       ctx->gfx_pipeline_state.optimal_key = ctx->gfx_pipeline_state.shader_keys_optimal.key.val;
 737       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 738       if (ctx->curr_program->is_separable) {
 739          struct zink_gfx_program *prog = ctx->curr_program;
 740          if (prog->is_separable && !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key)) {
 741             util_queue_fence_wait(&prog->base.cache_fence);
 742             /* shader variants can't be handled by separable programs: sync and compile */
 743             struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
 744             const uint32_t hash = ctx->gfx_hash;
 745             simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
 746             struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
 747             struct zink_gfx_program *real = prog->full_prog;
 748             entry->data = real;
 749             entry->key = real->shaders;
 750             real->base.removed = false;
 751             prog->full_prog = NULL;
 752             prog->base.removed = true;
 753             zink_gfx_program_reference(zink_screen(ctx->base.screen), &prog, NULL);
 754             ctx->curr_program = real;
 755             simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
 756          }
 757       }
 758       update_gfx_program_optimal(ctx, ctx->curr_program);
 759       /* apply new hash */
 760       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 761    }
 762    ctx->dirty_gfx_stages = 0;
 763    ctx->gfx_dirty = false;
 764    ctx->last_vertex_stage_dirty = false;
 765 }
 766
 767 static void
 768 optimized_compile_job(void *data, void *gdata, int thread_index)
 769 {
 770    struct zink_gfx_pipeline_cache_entry *pc_entry = data;
 771    struct zink_screen *screen = gdata;
 772    VkPipeline pipeline;
 773    if (pc_entry->gkey)
 774       pipeline = zink_create_gfx_pipeline_combined(screen, pc_entry->prog, pc_entry->ikey->pipeline, &pc_entry->gkey->pipeline, 1, pc_entry->okey->pipeline, true);
 775    else
 776       pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, &pc_entry->state, pc_entry->state.element_state->binding_map, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true);
 777    if (pipeline) {
 778       pc_entry->unoptimized_pipeline = pc_entry->pipeline;
 779       pc_entry->pipeline = pipeline;
 780    }
 781 }
 782
 783 void
 784 zink_gfx_program_compile_queue(struct zink_context *ctx, struct zink_gfx_pipeline_cache_entry *pc_entry)
 785 {
 786    util_queue_add_job(&zink_screen(ctx->base.screen)->cache_get_thread, pc_entry, &pc_entry->fence, optimized_compile_job, NULL, 0);
 787 }
 788
 789 static void
 790 update_cs_shader_module(struct zink_context *ctx, struct zink_compute_program *comp)
 791 {
 792    struct zink_screen *screen = zink_screen(ctx->base.screen);
 793    struct zink_shader *zs = comp->shader;
 794    VkShaderModule mod;
 795    struct zink_shader_module *zm = NULL;
 796    unsigned inline_size = 0, nonseamless_size = 0, zs_swizzle_size = 0;
 797    struct zink_shader_key *key = &ctx->compute_pipeline_state.key;
 798    ASSERTED bool check_robustness = screen->driver_workarounds.lower_robustImageAccess2 && (ctx->flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS);
 799    assert(zink_cs_key(key)->robust_access == check_robustness);
 800
 801    if (ctx && zs->nir->info.num_inlinable_uniforms &&
 802        ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(MESA_SHADER_COMPUTE)) {
 803       if (screen->is_cpu || comp->inlined_variant_count < ZINK_MAX_INLINED_VARIANTS)
 804          inline_size = zs->nir->info.num_inlinable_uniforms;
 805       else
 806          key->inline_uniforms = false;
 807    }
 808    if (key->base.nonseamless_cube_mask)
 809       nonseamless_size = sizeof(uint32_t);
 810    if (key->base.needs_zs_shader_swizzle)
 811       zs_swizzle_size = sizeof(struct zink_zs_swizzle_key);
 812
 813    if (inline_size || nonseamless_size || zink_cs_key(key)->robust_access || zs_swizzle_size) {
 814       struct util_dynarray *shader_cache = &comp->shader_cache[!!nonseamless_size];
 815       unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
 816       struct zink_shader_module **pzm = shader_cache->data;
 817       for (unsigned i = 0; i < count; i++) {
 818          struct zink_shader_module *iter = pzm[i];
 819          if (!shader_key_matches(iter, key, inline_size,
 820                                  screen->driconf.inline_uniforms,
 821                                  screen->info.have_EXT_non_seamless_cube_map))
 822             continue;
 823          if (unlikely(zs_swizzle_size)) {
 824             /* zs swizzle data needs a manual compare since it's so fat */
 825             if (memcmp(iter->key + iter->key_size + nonseamless_size + inline_size * sizeof(uint32_t),
 826                        &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE], zs_swizzle_size))
 827                continue;
 828          }
 829          if (i > 0) {
 830             struct zink_shader_module *zero = pzm[0];
 831             pzm[0] = iter;
 832             pzm[i] = zero;
 833          }
 834          zm = iter;
 835       }
 836    } else {
 837       zm = comp->module;
 838    }
 839
 840    if (!zm) {
 841       zm = malloc(sizeof(struct zink_shader_module) + nonseamless_size + inline_size * sizeof(uint32_t) + zs_swizzle_size);
 842       if (!zm) {
 843          return;
 844       }
 845       mod = zink_shader_compile(screen, zs, comp->shader->nir, key, zs_swizzle_size ? &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE] : NULL);
 846       if (!mod) {
 847          FREE(zm);
 848          return;
 849       }
 850       zm->shader = mod;
 851       zm->num_uniforms = inline_size;
 852       zm->key_size = key->size;
 853       memcpy(zm->key, key, key->size);
 854       zm->has_nonseamless = !!nonseamless_size;
 855       zm->needs_zs_shader_swizzle = !!zs_swizzle_size;
 856       assert(nonseamless_size || inline_size || zink_cs_key(key)->robust_access || zs_swizzle_size);
 857       if (nonseamless_size)
 858          memcpy(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size);
 859       if (inline_size)
 860          memcpy(zm->key + zm->key_size + nonseamless_size, key->base.inlined_uniform_values, inline_size * sizeof(uint32_t));
 861       if (zs_swizzle_size)
 862          memcpy(zm->key + zm->key_size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE], zs_swizzle_size);
 863
 864       zm->hash = shader_module_hash(zm);
 865       zm->default_variant = false;
 866       if (inline_size)
 867          comp->inlined_variant_count++;
 868
 869       /* this is otherwise the default variant, which is stored as comp->module */
 870       if (zm->num_uniforms || nonseamless_size || zink_cs_key(key)->robust_access || zs_swizzle_size)
 871          util_dynarray_append(&comp->shader_cache[!!nonseamless_size], void*, zm);
 872    }
 873    if (comp->curr == zm)
 874       return;
 875    ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
 876    comp->curr = zm;
 877    ctx->compute_pipeline_state.module_hash = zm->hash;
 878    ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
 879    ctx->compute_pipeline_state.module_changed = true;
 880 }
 881
 882 void
 883 zink_update_compute_program(struct zink_context *ctx)
 884 {
 885    util_queue_fence_wait(&ctx->curr_compute->base.cache_fence);
 886    update_cs_shader_module(ctx, ctx->curr_compute);
 887 }
 888
 889 VkPipelineLayout
 890 zink_pipeline_layout_create(struct zink_screen *screen, VkDescriptorSetLayout *dsl, unsigned num_dsl, bool is_compute, VkPipelineLayoutCreateFlags flags)
 891 {
 892    VkPipelineLayoutCreateInfo plci = {0};
 893    plci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
 894    plci.flags = flags;
 895
 896    plci.pSetLayouts = dsl;
 897    plci.setLayoutCount = num_dsl;
 898
 899    VkPushConstantRange pcr;
 900    if (!is_compute) {
 901       pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
 902       pcr.offset = 0;
 903       pcr.size = sizeof(struct zink_gfx_push_constant);
 904       plci.pushConstantRangeCount = 1;
 905       plci.pPushConstantRanges = &pcr;
 906    }
 907
 908    VkPipelineLayout layout;
 909    VkResult result = VKSCR(CreatePipelineLayout)(screen->dev, &plci, NULL, &layout);
 910    if (result != VK_SUCCESS) {
 911       mesa_loge("vkCreatePipelineLayout failed (%s)", vk_Result_to_str(result));
 912       return VK_NULL_HANDLE;
 913    }
 914
 915    return layout;
 916 }
 917
 918 static void *
 919 create_program(struct zink_context *ctx, bool is_compute)
 920 {
 921    struct zink_program *pg = rzalloc_size(NULL, is_compute ? sizeof(struct zink_compute_program) : sizeof(struct zink_gfx_program));
 922    if (!pg)
 923       return NULL;
 924
 925    pipe_reference_init(&pg->reference, 1);
 926    util_queue_fence_init(&pg->cache_fence);
 927    pg->is_compute = is_compute;
 928    pg->ctx = ctx;
 929    return (void*)pg;
 930 }
 931
 932 static void
 933 assign_io(struct zink_screen *screen,
 934           struct zink_gfx_program *prog,
 935           struct zink_shader *stages[ZINK_GFX_SHADER_COUNT])
 936 {
 937    struct zink_shader *shaders[MESA_SHADER_STAGES];
 938
 939    /* build array in pipeline order */
 940    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++)
 941       shaders[i] = stages[i];
 942
 943    for (unsigned i = 0; i < MESA_SHADER_FRAGMENT;) {
 944       nir_shader *producer = shaders[i]->nir;
 945       for (unsigned j = i + 1; j < ZINK_GFX_SHADER_COUNT; i++, j++) {
 946          struct zink_shader *consumer = shaders[j];
 947          if (!consumer)
 948             continue;
 949          if (!prog->nir[producer->info.stage])
 950             prog->nir[producer->info.stage] = nir_shader_clone(prog, producer);
 951          if (!prog->nir[j])
 952             prog->nir[j] = nir_shader_clone(prog, consumer->nir);
 953          zink_compiler_assign_io(screen, prog->nir[producer->info.stage], prog->nir[j]);
 954          i = j;
 955          break;
 956       }
 957    }
 958 }
 959
 960 void
 961 zink_gfx_lib_cache_unref(struct zink_screen *screen, struct zink_gfx_lib_cache *libs)
 962 {
 963    if (!p_atomic_dec_zero(&libs->refcount))
 964       return;
 965
 966    simple_mtx_destroy(&libs->lock);
 967    set_foreach_remove(&libs->libs, he) {
 968       struct zink_gfx_library_key *gkey = (void*)he->key;
 969       VKSCR(DestroyPipeline)(screen->dev, gkey->pipeline, NULL);
 970       FREE(gkey);
 971    }
 972    ralloc_free(libs);
 973 }
 974
 975 static struct zink_gfx_lib_cache *
 976 create_lib_cache(struct zink_gfx_program *prog, bool generated_tcs)
 977 {
 978    struct zink_gfx_lib_cache *libs = rzalloc(NULL, struct zink_gfx_lib_cache);
 979    simple_mtx_init(&libs->lock, mtx_plain);
 980    if (generated_tcs)
 981       _mesa_set_init(&libs->libs, libs, hash_pipeline_lib_generated_tcs, equals_pipeline_lib_generated_tcs);
 982    else
 983       _mesa_set_init(&libs->libs, libs, hash_pipeline_lib, equals_pipeline_lib);
 984    return libs;
 985 }
 986
 987 static struct zink_gfx_lib_cache *
 988 find_or_create_lib_cache(struct zink_screen *screen, struct zink_gfx_program *prog)
 989 {
 990    unsigned stages_present = prog->stages_present;
 991    bool generated_tcs = prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated;
 992    if (generated_tcs)
 993       stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
 994    unsigned idx = zink_program_cache_stages(stages_present);
 995    struct set *ht = &screen->pipeline_libs[idx];
 996    const uint32_t hash = prog->gfx_hash;
 997
 998    simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
 999    bool found = false;
1000    struct set_entry *entry = _mesa_set_search_or_add_pre_hashed(ht, hash, prog->shaders, &found);
1001    struct zink_gfx_lib_cache *libs;
1002    if (found) {
1003       libs = (void*)entry->key;
1004    } else {
1005       libs = create_lib_cache(prog, generated_tcs);
1006       memcpy(libs->shaders, prog->shaders, sizeof(prog->shaders));
1007       entry->key = libs;
1008       unsigned refs = 0;
1009       for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
1010          if (prog->shaders[i] && (!generated_tcs || i != MESA_SHADER_TESS_CTRL)) {
1011             simple_mtx_lock(&prog->shaders[i]->lock);
1012             util_dynarray_append(&prog->shaders[i]->pipeline_libs, struct zink_gfx_lib_cache*, libs);
1013             simple_mtx_unlock(&prog->shaders[i]->lock);
1014             refs++;
1015          }
1016       }
1017       p_atomic_set(&libs->refcount, refs);
1018    }
1019    simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
1020    return libs;
1021 }
1022
1023 struct zink_gfx_program *
1024 zink_create_gfx_program(struct zink_context *ctx,
1025                         struct zink_shader **stages,
1026                         unsigned vertices_per_patch,
1027                         uint32_t gfx_hash)
1028 {
1029    struct zink_screen *screen = zink_screen(ctx->base.screen);
1030    struct zink_gfx_program *prog = create_program(ctx, false);
1031    if (!prog)
1032       goto fail;
1033
1034    prog->ctx = ctx;
1035    prog->gfx_hash = gfx_hash;
1036    prog->base.removed = true;
1037
1038    prog->has_edgeflags = prog->shaders[MESA_SHADER_VERTEX] &&
1039                          prog->shaders[MESA_SHADER_VERTEX]->has_edgeflags;
1040    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1041       util_dynarray_init(&prog->shader_cache[i][0][0], prog);
1042       util_dynarray_init(&prog->shader_cache[i][0][1], prog);
1043       util_dynarray_init(&prog->shader_cache[i][1][0], prog);
1044       util_dynarray_init(&prog->shader_cache[i][1][1], prog);
1045       if (stages[i]) {
1046          prog->shaders[i] = stages[i];
1047          prog->stages_present |= BITFIELD_BIT(i);
1048       }
1049    }
1050    if (stages[MESA_SHADER_TESS_EVAL] && !stages[MESA_SHADER_TESS_CTRL]) {
1051       prog->shaders[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs =
1052       prog->shaders[MESA_SHADER_TESS_CTRL] =
1053         zink_shader_tcs_create(screen, stages[MESA_SHADER_VERTEX], vertices_per_patch);
1054       prog->stages_present |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
1055    }
1056    prog->stages_remaining = prog->stages_present;
1057
1058    assign_io(screen, prog, prog->shaders);
1059
1060    if (stages[MESA_SHADER_GEOMETRY])
1061       prog->last_vertex_stage = stages[MESA_SHADER_GEOMETRY];
1062    else if (stages[MESA_SHADER_TESS_EVAL])
1063       prog->last_vertex_stage = stages[MESA_SHADER_TESS_EVAL];
1064    else
1065       prog->last_vertex_stage = stages[MESA_SHADER_VERTEX];
1066
1067    for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) {
1068       for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
1069          _mesa_hash_table_init(&prog->pipelines[r][i], prog, NULL, zink_get_gfx_pipeline_eq_func(screen, prog));
1070          /* only need first 3/4 for point/line/tri/patch */
1071          if (screen->info.have_EXT_extended_dynamic_state &&
1072              i == (prog->last_vertex_stage->nir->info.stage == MESA_SHADER_TESS_EVAL ? 4 : 3))
1073             break;
1074       }
1075    }
1076
1077    if (screen->optimal_keys)
1078       prog->libs = find_or_create_lib_cache(screen, prog);
1079
1080    struct mesa_sha1 sctx;
1081    _mesa_sha1_init(&sctx);
1082    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1083       if (prog->shaders[i]) {
1084          simple_mtx_lock(&prog->shaders[i]->lock);
1085          _mesa_set_add(prog->shaders[i]->programs, prog);
1086          simple_mtx_unlock(&prog->shaders[i]->lock);
1087          zink_gfx_program_reference(screen, NULL, prog);
1088          _mesa_sha1_update(&sctx, prog->shaders[i]->base.sha1, sizeof(prog->shaders[i]->base.sha1));
1089       }
1090    }
1091    _mesa_sha1_final(&sctx, prog->base.sha1);
1092
1093    if (!zink_descriptor_program_init(ctx, &prog->base))
1094       goto fail;
1095
1096    return prog;
1097
1098 fail:
1099    if (prog)
1100       zink_destroy_gfx_program(screen, prog);
1101    return NULL;
1102 }
1103
1104 /* Creates a replacement, optimized zink_gfx_program for this set of separate shaders, which will
1105  * be swapped in in place of the fast-linked separable program once it's done compiling.
1106  */
1107 static void
1108 create_linked_separable_job(void *data, void *gdata, int thread_index)
1109 {
1110    struct zink_gfx_program *prog = data;
1111    prog->full_prog = zink_create_gfx_program(prog->ctx, prog->shaders, 0, prog->gfx_hash);
1112    precompile_job(prog->full_prog, gdata, thread_index);
1113 }
1114
1115 struct zink_gfx_program *
1116 create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch)
1117 {
1118    struct zink_screen *screen = zink_screen(ctx->base.screen);
1119    unsigned shader_stages = BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT);
1120    /* filter cases that need real pipelines */
1121    if (ctx->shader_stages != shader_stages ||
1122        !stages[MESA_SHADER_VERTEX]->precompile.mod || !stages[MESA_SHADER_FRAGMENT]->precompile.mod ||
1123        /* TODO: maybe try variants? grimace */
1124        !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) ||
1125        !zink_can_use_pipeline_libs(ctx))
1126       return zink_create_gfx_program(ctx, stages, vertices_per_patch, ctx->gfx_hash);
1127    /* ensure async gpl creation is done */
1128    util_queue_fence_wait(&stages[MESA_SHADER_VERTEX]->precompile.fence);
1129    util_queue_fence_wait(&stages[MESA_SHADER_FRAGMENT]->precompile.fence);
1130
1131    struct zink_gfx_program *prog = create_program(ctx, false);
1132    if (!prog)
1133       goto fail;
1134
1135    prog->ctx = ctx;
1136    prog->is_separable = true;
1137    prog->gfx_hash = ctx->gfx_hash;
1138
1139    prog->shaders[MESA_SHADER_VERTEX] = stages[MESA_SHADER_VERTEX];
1140    prog->stages_remaining = prog->stages_present = shader_stages;
1141    prog->shaders[MESA_SHADER_FRAGMENT] = stages[MESA_SHADER_FRAGMENT];
1142    prog->last_vertex_stage = stages[MESA_SHADER_VERTEX];
1143    prog->libs = create_lib_cache(prog, false);
1144    /* this libs cache is owned by the program */
1145    p_atomic_set(&prog->libs->refcount, 1);
1146
1147    unsigned refs = 0;
1148    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1149       if (prog->shaders[i]) {
1150          simple_mtx_lock(&prog->shaders[i]->lock);
1151          _mesa_set_add(prog->shaders[i]->programs, prog);
1152          simple_mtx_unlock(&prog->shaders[i]->lock);
1153          refs++;
1154       }
1155    }
1156    /* We can do this add after the _mesa_set_adds above because we know the prog->shaders[] are
1157    * referenced by the draw state and zink_shader_free() can't be called on them while we're in here.
1158    */
1159    p_atomic_add(&prog->base.reference.count, refs);
1160
1161    for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) {
1162       for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
1163          _mesa_hash_table_init(&prog->pipelines[r][i], prog, NULL, zink_get_gfx_pipeline_eq_func(screen, prog));
1164          /* only need first 3/4 for point/line/tri/patch */
1165          if (screen->info.have_EXT_extended_dynamic_state &&
1166              i == (prog->last_vertex_stage->nir->info.stage == MESA_SHADER_TESS_EVAL ? 4 : 3))
1167             break;
1168       }
1169    }
1170
1171    if (prog->shaders[MESA_SHADER_VERTEX]->precompile.dsl) {
1172       prog->base.dd.binding_usage |= BITFIELD_BIT(0);
1173       prog->base.dsl[prog->base.num_dsl] = prog->shaders[MESA_SHADER_VERTEX]->precompile.dsl;
1174       prog->base.num_dsl++;
1175    }
1176    if (prog->shaders[MESA_SHADER_FRAGMENT]->precompile.dsl) {
1177       prog->base.dd.binding_usage |= BITFIELD_BIT(1);
1178       prog->base.dsl[prog->base.num_dsl] = prog->shaders[MESA_SHADER_FRAGMENT]->precompile.dsl;
1179       /* guarantee a null dsl if vs doesn't have descriptors */
1180       prog->base.num_dsl = 2;
1181    }
1182    prog->base.dd.bindless = prog->shaders[MESA_SHADER_VERTEX]->bindless | prog->shaders[MESA_SHADER_FRAGMENT]->bindless;
1183    if (prog->base.dd.bindless) {
1184       prog->base.num_dsl = screen->compact_descriptors ? ZINK_DESCRIPTOR_ALL_TYPES - ZINK_DESCRIPTOR_COMPACT : ZINK_DESCRIPTOR_ALL_TYPES;
1185       prog->base.dsl[screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS]] = screen->bindless_layout;
1186    }
1187    prog->base.layout = zink_pipeline_layout_create(screen, prog->base.dsl, prog->base.num_dsl, false, VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT);
1188
1189    VkPipeline libs[] = {stages[MESA_SHADER_VERTEX]->precompile.gpl, stages[MESA_SHADER_FRAGMENT]->precompile.gpl};
1190    prog->last_variant_hash = ctx->gfx_pipeline_state.optimal_key;
1191
1192    struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key);
1193    gkey->optimal_key = prog->last_variant_hash;
1194    assert(gkey->optimal_key);
1195    gkey->pipeline = zink_create_gfx_pipeline_combined(screen, prog, VK_NULL_HANDLE, libs, 2, VK_NULL_HANDLE, false);
1196    _mesa_set_add(&prog->libs->libs, gkey);
1197
1198    util_queue_add_job(&screen->cache_get_thread, prog, &prog->base.cache_fence, create_linked_separable_job, NULL, 0);
1199
1200    return prog;
1201 fail:
1202    if (prog)
1203       zink_destroy_gfx_program(screen, prog);
1204    return NULL;
1205 }
1206
1207 static uint32_t
1208 hash_compute_pipeline_state_local_size(const void *key)
1209 {
1210    const struct zink_compute_pipeline_state *state = key;
1211    uint32_t hash = _mesa_hash_data(state, offsetof(struct zink_compute_pipeline_state, hash));
1212    hash = XXH32(&state->local_size[0], sizeof(state->local_size), hash);
1213    return hash;
1214 }
1215
1216 static uint32_t
1217 hash_compute_pipeline_state(const void *key)
1218 {
1219    const struct zink_compute_pipeline_state *state = key;
1220    return _mesa_hash_data(state, offsetof(struct zink_compute_pipeline_state, hash));
1221 }
1222
1223 void
1224 zink_program_update_compute_pipeline_state(struct zink_context *ctx, struct zink_compute_program *comp, const uint block[3])
1225 {
1226    if (comp->use_local_size) {
1227       for (int i = 0; i < ARRAY_SIZE(ctx->compute_pipeline_state.local_size); i++) {
1228          if (ctx->compute_pipeline_state.local_size[i] != block[i])
1229             ctx->compute_pipeline_state.dirty = true;
1230          ctx->compute_pipeline_state.local_size[i] = block[i];
1231       }
1232    }
1233 }
1234
1235 static bool
1236 equals_compute_pipeline_state(const void *a, const void *b)
1237 {
1238    const struct zink_compute_pipeline_state *sa = a;
1239    const struct zink_compute_pipeline_state *sb = b;
1240    return !memcmp(a, b, offsetof(struct zink_compute_pipeline_state, hash)) &&
1241           sa->module == sb->module;
1242 }
1243
1244 static bool
1245 equals_compute_pipeline_state_local_size(const void *a, const void *b)
1246 {
1247    const struct zink_compute_pipeline_state *sa = a;
1248    const struct zink_compute_pipeline_state *sb = b;
1249    return !memcmp(a, b, offsetof(struct zink_compute_pipeline_state, hash)) &&
1250           !memcmp(sa->local_size, sb->local_size, sizeof(sa->local_size)) &&
1251           sa->module == sb->module;
1252 }
1253
1254 static void
1255 precompile_compute_job(void *data, void *gdata, int thread_index)
1256 {
1257    struct zink_compute_program *comp = data;
1258    struct zink_screen *screen = gdata;
1259
1260    comp->shader = zink_shader_create(screen, comp->nir, NULL);
1261    comp->curr = comp->module = CALLOC_STRUCT(zink_shader_module);
1262    assert(comp->module);
1263    comp->module->shader = zink_shader_compile(screen, comp->shader, comp->shader->nir, NULL, NULL);
1264    assert(comp->module->shader);
1265    util_dynarray_init(&comp->shader_cache[0], comp);
1266    util_dynarray_init(&comp->shader_cache[1], comp);
1267
1268    struct blob blob = {0};
1269    blob_init(&blob);
1270    nir_serialize(&blob, comp->shader->nir, true);
1271
1272    struct mesa_sha1 sha1_ctx;
1273    _mesa_sha1_init(&sha1_ctx);
1274    _mesa_sha1_update(&sha1_ctx, blob.data, blob.size);
1275    _mesa_sha1_final(&sha1_ctx, comp->base.sha1);
1276    blob_finish(&blob);
1277
1278    zink_descriptor_program_init(comp->base.ctx, &comp->base);
1279
1280    zink_screen_get_pipeline_cache(screen, &comp->base, true);
1281    if (comp->base.can_precompile)
1282       comp->base_pipeline = zink_create_compute_pipeline(screen, comp, NULL);
1283    if (comp->base_pipeline)
1284       zink_screen_update_pipeline_cache(screen, &comp->base, true);
1285 }
1286
1287 static struct zink_compute_program *
1288 create_compute_program(struct zink_context *ctx, nir_shader *nir)
1289 {
1290    struct zink_screen *screen = zink_screen(ctx->base.screen);
1291    struct zink_compute_program *comp = create_program(ctx, true);
1292    if (!comp)
1293       return NULL;
1294    comp->nir = nir;
1295
1296    comp->use_local_size = !(nir->info.workgroup_size[0] ||
1297                             nir->info.workgroup_size[1] ||
1298                             nir->info.workgroup_size[2]);
1299    comp->base.can_precompile = !comp->use_local_size &&
1300                                (screen->info.have_EXT_non_seamless_cube_map || !zink_shader_has_cubes(nir)) &&
1301                                (screen->info.rb2_feats.robustImageAccess2 || !(ctx->flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS));
1302    _mesa_hash_table_init(&comp->pipelines, comp, NULL, comp->use_local_size ?
1303                                                        equals_compute_pipeline_state_local_size :
1304                                                        equals_compute_pipeline_state);
1305    util_queue_add_job(&screen->cache_get_thread, comp, &comp->base.cache_fence,
1306                       precompile_compute_job, NULL, 0);
1307    return comp;
1308 }
1309
1310 uint32_t
1311 zink_program_get_descriptor_usage(struct zink_context *ctx, gl_shader_stage stage, enum zink_descriptor_type type)
1312 {
1313    struct zink_shader *zs = NULL;
1314    switch (stage) {
1315    case MESA_SHADER_VERTEX:
1316    case MESA_SHADER_TESS_CTRL:
1317    case MESA_SHADER_TESS_EVAL:
1318    case MESA_SHADER_GEOMETRY:
1319    case MESA_SHADER_FRAGMENT:
1320       zs = ctx->gfx_stages[stage];
1321       break;
1322    case MESA_SHADER_COMPUTE: {
1323       zs = ctx->curr_compute->shader;
1324       break;
1325    }
1326    default:
1327       unreachable("unknown shader type");
1328    }
1329    if (!zs)
1330       return 0;
1331    switch (type) {
1332    case ZINK_DESCRIPTOR_TYPE_UBO:
1333       return zs->ubos_used;
1334    case ZINK_DESCRIPTOR_TYPE_SSBO:
1335       return zs->ssbos_used;
1336    case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW:
1337       return BITSET_TEST_RANGE(zs->nir->info.textures_used, 0, PIPE_MAX_SAMPLERS - 1);
1338    case ZINK_DESCRIPTOR_TYPE_IMAGE:
1339       return BITSET_TEST_RANGE(zs->nir->info.images_used, 0, PIPE_MAX_SAMPLERS - 1);
1340    default:
1341       unreachable("unknown descriptor type!");
1342    }
1343    return 0;
1344 }
1345
1346 bool
1347 zink_program_descriptor_is_buffer(struct zink_context *ctx, gl_shader_stage stage, enum zink_descriptor_type type, unsigned i)
1348 {
1349    struct zink_shader *zs = NULL;
1350    switch (stage) {
1351    case MESA_SHADER_VERTEX:
1352    case MESA_SHADER_TESS_CTRL:
1353    case MESA_SHADER_TESS_EVAL:
1354    case MESA_SHADER_GEOMETRY:
1355    case MESA_SHADER_FRAGMENT:
1356       zs = ctx->gfx_stages[stage];
1357       break;
1358    case MESA_SHADER_COMPUTE: {
1359       zs = ctx->curr_compute->shader;
1360       break;
1361    }
1362    default:
1363       unreachable("unknown shader type");
1364    }
1365    if (!zs)
1366       return false;
1367    return zink_shader_descriptor_is_buffer(zs, type, i);
1368 }
1369
1370 static unsigned
1371 get_num_bindings(struct zink_shader *zs, enum zink_descriptor_type type)
1372 {
1373    switch (type) {
1374    case ZINK_DESCRIPTOR_TYPE_UNIFORMS:
1375       return !!zs->has_uniforms;
1376    case ZINK_DESCRIPTOR_TYPE_UBO:
1377    case ZINK_DESCRIPTOR_TYPE_SSBO:
1378       return zs->num_bindings[type];
1379    default:
1380       break;
1381    }
1382    unsigned num_bindings = 0;
1383    for (int i = 0; i < zs->num_bindings[type]; i++)
1384       num_bindings += zs->bindings[type][i].size;
1385    return num_bindings;
1386 }
1387
1388 unsigned
1389 zink_program_num_bindings_typed(const struct zink_program *pg, enum zink_descriptor_type type)
1390 {
1391    unsigned num_bindings = 0;
1392    if (pg->is_compute) {
1393       struct zink_compute_program *comp = (void*)pg;
1394       return get_num_bindings(comp->shader, type);
1395    }
1396    struct zink_gfx_program *prog = (void*)pg;
1397    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
1398       if (prog->shaders[i])
1399          num_bindings += get_num_bindings(prog->shaders[i], type);
1400    }
1401    return num_bindings;
1402 }
1403
1404 unsigned
1405 zink_program_num_bindings(const struct zink_program *pg)
1406 {
1407    unsigned num_bindings = 0;
1408    for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++)
1409       num_bindings += zink_program_num_bindings_typed(pg, i);
1410    return num_bindings;
1411 }
1412
1413 static void
1414 deinit_program(struct zink_screen *screen, struct zink_program *pg)
1415 {
1416    util_queue_fence_wait(&pg->cache_fence);
1417    if (pg->layout)
1418       VKSCR(DestroyPipelineLayout)(screen->dev, pg->layout, NULL);
1419
1420    if (pg->pipeline_cache)
1421       VKSCR(DestroyPipelineCache)(screen->dev, pg->pipeline_cache, NULL);
1422    zink_descriptor_program_deinit(screen, pg);
1423 }
1424
1425 void
1426 zink_destroy_gfx_program(struct zink_screen *screen,
1427                          struct zink_gfx_program *prog)
1428 {
1429    unsigned max_idx = ARRAY_SIZE(prog->pipelines[0]);
1430    if (screen->info.have_EXT_extended_dynamic_state) {
1431       /* only need first 3/4 for point/line/tri/patch */
1432       if ((prog->stages_present &
1433           (BITFIELD_BIT(MESA_SHADER_TESS_EVAL) | BITFIELD_BIT(MESA_SHADER_GEOMETRY))) ==
1434           BITFIELD_BIT(MESA_SHADER_TESS_EVAL))
1435          max_idx = 4;
1436       else
1437          max_idx = 3;
1438       max_idx++;
1439    }
1440
1441    if (prog->is_separable)
1442       zink_gfx_program_reference(screen, &prog->full_prog, NULL);
1443    for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
1444       for (int i = 0; i < max_idx; ++i) {
1445          hash_table_foreach(&prog->pipelines[r][i], entry) {
1446             struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data;
1447
1448             util_queue_fence_wait(&pc_entry->fence);
1449             VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL);
1450             VKSCR(DestroyPipeline)(screen->dev, pc_entry->unoptimized_pipeline, NULL);
1451             free(pc_entry);
1452          }
1453       }
1454    }
1455
1456    deinit_program(screen, &prog->base);
1457
1458    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1459       if (prog->shaders[i]) {
1460          _mesa_set_remove_key(prog->shaders[i]->programs, prog);
1461          prog->shaders[i] = NULL;
1462       }
1463       if (!prog->is_separable) {
1464          destroy_shader_cache(screen, &prog->shader_cache[i][0][0]);
1465          destroy_shader_cache(screen, &prog->shader_cache[i][0][1]);
1466          destroy_shader_cache(screen, &prog->shader_cache[i][1][0]);
1467          destroy_shader_cache(screen, &prog->shader_cache[i][1][1]);
1468          ralloc_free(prog->nir[i]);
1469       }
1470    }
1471    if (prog->is_separable)
1472       zink_gfx_lib_cache_unref(screen, prog->libs);
1473
1474    ralloc_free(prog);
1475 }
1476
1477 void
1478 zink_destroy_compute_program(struct zink_screen *screen,
1479                              struct zink_compute_program *comp)
1480 {
1481    deinit_program(screen, &comp->base);
1482
1483    assert(comp->shader);
1484    assert(!comp->shader->spirv);
1485
1486    _mesa_set_destroy(comp->shader->programs, NULL);
1487    ralloc_free(comp->shader->nir);
1488    ralloc_free(comp->shader);
1489
1490    destroy_shader_cache(screen, &comp->shader_cache[0]);
1491    destroy_shader_cache(screen, &comp->shader_cache[1]);
1492
1493    hash_table_foreach(&comp->pipelines, entry) {
1494       struct compute_pipeline_cache_entry *pc_entry = entry->data;
1495
1496       VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL);
1497       free(pc_entry);
1498    }
1499    VKSCR(DestroyPipeline)(screen->dev, comp->base_pipeline, NULL);
1500    VKSCR(DestroyShaderModule)(screen->dev, comp->module->shader, NULL);
1501    free(comp->module);
1502
1503    ralloc_free(comp);
1504 }
1505
1506 ALWAYS_INLINE static bool
1507 compute_can_shortcut(const struct zink_compute_program *comp)
1508 {
1509    return !comp->use_local_size && !comp->curr->num_uniforms && !comp->curr->has_nonseamless;
1510 }
1511
1512 VkPipeline
1513 zink_get_compute_pipeline(struct zink_screen *screen,
1514                       struct zink_compute_program *comp,
1515                       struct zink_compute_pipeline_state *state)
1516 {
1517    struct hash_entry *entry = NULL;
1518
1519    if (!state->dirty && !state->module_changed)
1520       return state->pipeline;
1521    if (state->dirty) {
1522       if (state->pipeline) //avoid on first hash
1523          state->final_hash ^= state->hash;
1524       if (comp->use_local_size)
1525          state->hash = hash_compute_pipeline_state_local_size(state);
1526       else
1527          state->hash = hash_compute_pipeline_state(state);
1528       state->dirty = false;
1529       state->final_hash ^= state->hash;
1530    }
1531
1532    util_queue_fence_wait(&comp->base.cache_fence);
1533    if (comp->base_pipeline && compute_can_shortcut(comp)) {
1534       state->pipeline = comp->base_pipeline;
1535       return state->pipeline;
1536    }
1537    entry = _mesa_hash_table_search_pre_hashed(&comp->pipelines, state->final_hash, state);
1538
1539    if (!entry) {
1540       VkPipeline pipeline = zink_create_compute_pipeline(screen, comp, state);
1541
1542       if (pipeline == VK_NULL_HANDLE)
1543          return VK_NULL_HANDLE;
1544
1545       zink_screen_update_pipeline_cache(screen, &comp->base, false);
1546       if (compute_can_shortcut(comp)) {
1547          /* don't add base pipeline to cache */
1548          state->pipeline = comp->base_pipeline = pipeline;
1549          return state->pipeline;
1550       }
1551
1552       struct compute_pipeline_cache_entry *pc_entry = CALLOC_STRUCT(compute_pipeline_cache_entry);
1553       if (!pc_entry)
1554          return VK_NULL_HANDLE;
1555
1556       memcpy(&pc_entry->state, state, sizeof(*state));
1557       pc_entry->pipeline = pipeline;
1558
1559       entry = _mesa_hash_table_insert_pre_hashed(&comp->pipelines, state->final_hash, pc_entry, pc_entry);
1560       assert(entry);
1561    }
1562
1563    struct compute_pipeline_cache_entry *cache_entry = entry->data;
1564    state->pipeline = cache_entry->pipeline;
1565    return state->pipeline;
1566 }
1567
1568 static void
1569 bind_gfx_stage(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *shader);
1570
1571 static void
1572 unbind_generated_gs(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *shader)
1573 {
1574    for (int i = 0; i < ARRAY_SIZE(shader->non_fs.generated_gs); i++) {
1575      if (ctx->gfx_stages[stage]->non_fs.generated_gs[i] &&
1576          ctx->gfx_stages[MESA_SHADER_GEOMETRY] ==
1577          ctx->gfx_stages[stage]->non_fs.generated_gs[i]) {
1578         assert(stage != MESA_SHADER_GEOMETRY); /* let's not keep recursing! */
1579         bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, NULL);
1580      }
1581    }
1582 }
1583
1584 static void
1585 bind_gfx_stage(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *shader)
1586 {
1587    if (shader && shader->nir->info.num_inlinable_uniforms)
1588       ctx->shader_has_inlinable_uniforms_mask |= 1 << stage;
1589    else
1590       ctx->shader_has_inlinable_uniforms_mask &= ~(1 << stage);
1591
1592    if (ctx->gfx_stages[stage]) {
1593       ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash;
1594
1595       if (stage != MESA_SHADER_FRAGMENT)
1596          unbind_generated_gs(ctx, stage, shader);
1597    }
1598
1599    ctx->gfx_stages[stage] = shader;
1600    ctx->gfx_dirty = ctx->gfx_stages[MESA_SHADER_FRAGMENT] && ctx->gfx_stages[MESA_SHADER_VERTEX];
1601    ctx->gfx_pipeline_state.modules_changed = true;
1602    if (shader) {
1603       ctx->shader_stages |= BITFIELD_BIT(stage);
1604       ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash;
1605    } else {
1606       ctx->gfx_pipeline_state.modules[stage] = VK_NULL_HANDLE;
1607       if (ctx->curr_program)
1608          ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
1609       ctx->curr_program = NULL;
1610       ctx->shader_stages &= ~BITFIELD_BIT(stage);
1611    }
1612 }
1613
1614 static enum pipe_prim_type
1615 gs_output_to_reduced_prim_type(struct shader_info *info)
1616 {
1617    switch (info->gs.output_primitive) {
1618    case SHADER_PRIM_POINTS:
1619       return PIPE_PRIM_POINTS;
1620
1621    case SHADER_PRIM_LINES:
1622    case SHADER_PRIM_LINE_LOOP:
1623    case SHADER_PRIM_LINE_STRIP:
1624    case SHADER_PRIM_LINES_ADJACENCY:
1625    case SHADER_PRIM_LINE_STRIP_ADJACENCY:
1626       return PIPE_PRIM_LINES;
1627
1628    case SHADER_PRIM_TRIANGLES:
1629    case SHADER_PRIM_TRIANGLE_STRIP:
1630    case SHADER_PRIM_TRIANGLE_FAN:
1631    case SHADER_PRIM_TRIANGLES_ADJACENCY:
1632    case SHADER_PRIM_TRIANGLE_STRIP_ADJACENCY:
1633       return PIPE_PRIM_TRIANGLES;
1634
1635    default:
1636       unreachable("unexpected output primitive type");
1637    }
1638 }
1639
1640 static enum pipe_prim_type
1641 update_rast_prim(struct zink_shader *shader)
1642 {
1643    struct shader_info *info = &shader->nir->info;
1644    if (info->stage == MESA_SHADER_GEOMETRY)
1645       return gs_output_to_reduced_prim_type(info);
1646    else if (info->stage == MESA_SHADER_TESS_EVAL) {
1647       if (info->tess.point_mode)
1648          return PIPE_PRIM_POINTS;
1649       else {
1650          switch (info->tess._primitive_mode) {
1651          case TESS_PRIMITIVE_ISOLINES:
1652             return PIPE_PRIM_LINES;
1653          case TESS_PRIMITIVE_TRIANGLES:
1654          case TESS_PRIMITIVE_QUADS:
1655             return PIPE_PRIM_TRIANGLES;
1656          default:
1657             return PIPE_PRIM_MAX;
1658          }
1659       }
1660    }
1661    return PIPE_PRIM_MAX;
1662 }
1663
1664 static void
1665 bind_last_vertex_stage(struct zink_context *ctx)
1666 {
1667    gl_shader_stage old = ctx->last_vertex_stage ? ctx->last_vertex_stage->nir->info.stage : MESA_SHADER_STAGES;
1668    if (ctx->gfx_stages[MESA_SHADER_GEOMETRY])
1669       ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_GEOMETRY];
1670    else if (ctx->gfx_stages[MESA_SHADER_TESS_EVAL])
1671       ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_TESS_EVAL];
1672    else
1673       ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_VERTEX];
1674    gl_shader_stage current = ctx->last_vertex_stage ? ctx->last_vertex_stage->nir->info.stage : MESA_SHADER_VERTEX;
1675
1676    /* update rast_prim */
1677    ctx->gfx_pipeline_state.shader_rast_prim =
1678       ctx->last_vertex_stage ? update_rast_prim(ctx->last_vertex_stage) :
1679                                PIPE_PRIM_MAX;
1680
1681    if (old != current) {
1682       if (!zink_screen(ctx->base.screen)->optimal_keys) {
1683          if (old != MESA_SHADER_STAGES) {
1684             memset(&ctx->gfx_pipeline_state.shader_keys.key[old].key.vs_base, 0, sizeof(struct zink_vs_key_base));
1685             ctx->dirty_gfx_stages |= BITFIELD_BIT(old);
1686          } else {
1687             /* always unset vertex shader values when changing to a non-vs last stage */
1688             memset(&ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_VERTEX].key.vs_base, 0, sizeof(struct zink_vs_key_base));
1689          }
1690       }
1691
1692       unsigned num_viewports = ctx->vp_state.num_viewports;
1693       struct zink_screen *screen = zink_screen(ctx->base.screen);
1694       /* number of enabled viewports is based on whether last vertex stage writes viewport index */
1695       if (ctx->last_vertex_stage) {
1696          if (ctx->last_vertex_stage->nir->info.outputs_written & (VARYING_BIT_VIEWPORT | VARYING_BIT_VIEWPORT_MASK))
1697             ctx->vp_state.num_viewports = MIN2(screen->info.props.limits.maxViewports, PIPE_MAX_VIEWPORTS);
1698          else
1699             ctx->vp_state.num_viewports = 1;
1700       } else {
1701          ctx->vp_state.num_viewports = 1;
1702       }
1703       ctx->vp_state_changed |= num_viewports != ctx->vp_state.num_viewports;
1704       if (!screen->info.have_EXT_extended_dynamic_state) {
1705          if (ctx->gfx_pipeline_state.dyn_state1.num_viewports != ctx->vp_state.num_viewports)
1706             ctx->gfx_pipeline_state.dirty = true;
1707          ctx->gfx_pipeline_state.dyn_state1.num_viewports = ctx->vp_state.num_viewports;
1708       }
1709       ctx->last_vertex_stage_dirty = true;
1710    }
1711 }
1712
1713 static void
1714 zink_bind_vs_state(struct pipe_context *pctx,
1715                    void *cso)
1716 {
1717    struct zink_context *ctx = zink_context(pctx);
1718    if (!cso && !ctx->gfx_stages[MESA_SHADER_VERTEX])
1719       return;
1720    bind_gfx_stage(ctx, MESA_SHADER_VERTEX, cso);
1721    bind_last_vertex_stage(ctx);
1722    if (cso) {
1723       struct zink_shader *zs = cso;
1724       ctx->shader_reads_drawid = BITSET_TEST(zs->nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
1725       ctx->shader_reads_basevertex = BITSET_TEST(zs->nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX);
1726    } else {
1727       ctx->shader_reads_drawid = false;
1728       ctx->shader_reads_basevertex = false;
1729    }
1730 }
1731
1732 /* if gl_SampleMask[] is written to, we have to ensure that we get a shader with the same sample count:
1733  * in GL, samples==1 means ignore gl_SampleMask[]
1734  * in VK, gl_SampleMask[] is never ignored
1735  */
1736 void
1737 zink_update_fs_key_samples(struct zink_context *ctx)
1738 {
1739    if (!ctx->gfx_stages[MESA_SHADER_FRAGMENT])
1740       return;
1741    nir_shader *nir = ctx->gfx_stages[MESA_SHADER_FRAGMENT]->nir;
1742    if (nir->info.outputs_written & (1 << FRAG_RESULT_SAMPLE_MASK)) {
1743       bool samples = zink_get_fs_base_key(ctx)->samples;
1744       if (samples != (ctx->fb_state.samples > 1))
1745          zink_set_fs_base_key(ctx)->samples = ctx->fb_state.samples > 1;
1746    }
1747 }
1748
1749 void zink_update_gs_key_rectangular_line(struct zink_context *ctx)
1750 {
1751    bool line_rectangular = zink_get_gs_key(ctx)->line_rectangular;
1752    if (line_rectangular != ctx->rast_state->base.line_rectangular)
1753       zink_set_gs_key(ctx)->line_rectangular = ctx->rast_state->base.line_rectangular;
1754 }
1755
1756 static void
1757 zink_bind_fs_state(struct pipe_context *pctx,
1758                    void *cso)
1759 {
1760    struct zink_context *ctx = zink_context(pctx);
1761    if (!cso && !ctx->gfx_stages[MESA_SHADER_FRAGMENT])
1762       return;
1763    unsigned shadow_mask = ctx->gfx_stages[MESA_SHADER_FRAGMENT] ? ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask : 0;
1764    bind_gfx_stage(ctx, MESA_SHADER_FRAGMENT, cso);
1765    ctx->fbfetch_outputs = 0;
1766    if (cso) {
1767       nir_shader *nir = ctx->gfx_stages[MESA_SHADER_FRAGMENT]->nir;
1768       if (nir->info.fs.uses_fbfetch_output) {
1769          nir_foreach_shader_out_variable(var, ctx->gfx_stages[MESA_SHADER_FRAGMENT]->nir) {
1770             if (var->data.fb_fetch_output)
1771                ctx->fbfetch_outputs |= BITFIELD_BIT(var->data.location - FRAG_RESULT_DATA0);
1772          }
1773       }
1774       zink_update_fs_key_samples(ctx);
1775       if (zink_screen(pctx->screen)->info.have_EXT_rasterization_order_attachment_access) {
1776          if (ctx->gfx_pipeline_state.rast_attachment_order != nir->info.fs.uses_fbfetch_output)
1777             ctx->gfx_pipeline_state.dirty = true;
1778          ctx->gfx_pipeline_state.rast_attachment_order = nir->info.fs.uses_fbfetch_output;
1779       }
1780       zink_set_zs_needs_shader_swizzle_key(ctx, MESA_SHADER_FRAGMENT, false);
1781       if (shadow_mask != ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask &&
1782           !zink_screen(pctx->screen)->driver_workarounds.needs_zs_shader_swizzle)
1783          zink_update_shadow_samplerviews(ctx, shadow_mask | ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask);
1784       if (!zink_screen(ctx->base.screen)->driver_workarounds.track_renderpasses && !ctx->blitting)
1785          zink_parse_tc_info(ctx);
1786    }
1787    zink_update_fbfetch(ctx);
1788 }
1789
1790 static void
1791 zink_bind_gs_state(struct pipe_context *pctx,
1792                    void *cso)
1793 {
1794    struct zink_context *ctx = zink_context(pctx);
1795    if (!cso && !ctx->gfx_stages[MESA_SHADER_GEOMETRY])
1796       return;
1797    bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, cso);
1798    bind_last_vertex_stage(ctx);
1799 }
1800
1801 static void
1802 zink_bind_tcs_state(struct pipe_context *pctx,
1803                    void *cso)
1804 {
1805    bind_gfx_stage(zink_context(pctx), MESA_SHADER_TESS_CTRL, cso);
1806 }
1807
1808 static void
1809 zink_bind_tes_state(struct pipe_context *pctx,
1810                    void *cso)
1811 {
1812    struct zink_context *ctx = zink_context(pctx);
1813    if (!cso && !ctx->gfx_stages[MESA_SHADER_TESS_EVAL])
1814       return;
1815    if (!!ctx->gfx_stages[MESA_SHADER_TESS_EVAL] != !!cso) {
1816       if (!cso) {
1817          /* if unsetting a TESS that uses a generated TCS, ensure the TCS is unset */
1818          if (ctx->gfx_stages[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs)
1819             ctx->gfx_stages[MESA_SHADER_TESS_CTRL] = NULL;
1820       }
1821    }
1822    bind_gfx_stage(ctx, MESA_SHADER_TESS_EVAL, cso);
1823    bind_last_vertex_stage(ctx);
1824 }
1825
1826 static void *
1827 zink_create_cs_state(struct pipe_context *pctx,
1828                      const struct pipe_compute_state *shader)
1829 {
1830    struct nir_shader *nir;
1831    if (shader->ir_type != PIPE_SHADER_IR_NIR)
1832       nir = zink_tgsi_to_nir(pctx->screen, shader->prog);
1833    else
1834       nir = (struct nir_shader *)shader->prog;
1835
1836    if (nir->info.uses_bindless)
1837       zink_descriptors_init_bindless(zink_context(pctx));
1838
1839    return create_compute_program(zink_context(pctx), nir);
1840 }
1841
1842 static void
1843 zink_bind_cs_state(struct pipe_context *pctx,
1844                    void *cso)
1845 {
1846    struct zink_context *ctx = zink_context(pctx);
1847    struct zink_compute_program *comp = cso;
1848    if (comp && comp->nir->info.num_inlinable_uniforms)
1849       ctx->shader_has_inlinable_uniforms_mask |= 1 << MESA_SHADER_COMPUTE;
1850    else
1851       ctx->shader_has_inlinable_uniforms_mask &= ~(1 << MESA_SHADER_COMPUTE);
1852
1853    if (ctx->curr_compute) {
1854       zink_batch_reference_program(&ctx->batch, &ctx->curr_compute->base);
1855       ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
1856       ctx->compute_pipeline_state.module = VK_NULL_HANDLE;
1857       ctx->compute_pipeline_state.module_hash = 0;
1858    }
1859    ctx->compute_pipeline_state.dirty = true;
1860    ctx->curr_compute = comp;
1861    if (comp && comp != ctx->curr_compute) {
1862       ctx->compute_pipeline_state.module_hash = ctx->curr_compute->curr->hash;
1863       if (util_queue_fence_is_signalled(&comp->base.cache_fence))
1864          ctx->compute_pipeline_state.module = ctx->curr_compute->curr->shader;
1865       ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
1866       if (ctx->compute_pipeline_state.key.base.nonseamless_cube_mask)
1867          ctx->compute_dirty = true;
1868    }
1869    zink_select_launch_grid(ctx);
1870 }
1871
1872 static void
1873 zink_delete_cs_shader_state(struct pipe_context *pctx, void *cso)
1874 {
1875    struct zink_compute_program *comp = cso;
1876    zink_compute_program_reference(zink_screen(pctx->screen), &comp, NULL);
1877 }
1878
1879 void
1880 zink_delete_shader_state(struct pipe_context *pctx, void *cso)
1881 {
1882    zink_shader_free(zink_screen(pctx->screen), cso);
1883 }
1884
1885 void *
1886 zink_create_gfx_shader_state(struct pipe_context *pctx, const struct pipe_shader_state *shader)
1887 {
1888    nir_shader *nir;
1889    if (shader->type != PIPE_SHADER_IR_NIR)
1890       nir = zink_tgsi_to_nir(pctx->screen, shader->tokens);
1891    else
1892       nir = (struct nir_shader *)shader->ir.nir;
1893
1894    if (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_fbfetch_output)
1895       zink_descriptor_util_init_fbfetch(zink_context(pctx));
1896    if (nir->info.uses_bindless)
1897       zink_descriptors_init_bindless(zink_context(pctx));
1898
1899    return zink_shader_create(zink_screen(pctx->screen), nir, &shader->stream_output);
1900 }
1901
1902 static void
1903 zink_delete_cached_shader_state(struct pipe_context *pctx, void *cso)
1904 {
1905    struct zink_screen *screen = zink_screen(pctx->screen);
1906    util_shader_reference(pctx, &screen->shaders, &cso, NULL);
1907 }
1908
1909 static void *
1910 zink_create_cached_shader_state(struct pipe_context *pctx, const struct pipe_shader_state *shader)
1911 {
1912    bool cache_hit;
1913    struct zink_screen *screen = zink_screen(pctx->screen);
1914    return util_live_shader_cache_get(pctx, &screen->shaders, shader, &cache_hit);
1915 }
1916
1917 /* caller must lock prog->libs->lock */
1918 struct zink_gfx_library_key *
1919 zink_create_pipeline_lib(struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
1920 {
1921    struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key);
1922    gkey->optimal_key = state->optimal_key;
1923    assert(gkey->optimal_key);
1924    memcpy(gkey->modules, prog->modules, sizeof(gkey->modules));
1925    gkey->pipeline = zink_create_gfx_pipeline_library(screen, prog);
1926    _mesa_set_add(&prog->libs->libs, gkey);
1927    return gkey;
1928 }
1929
1930 static const char *
1931 print_exe_stages(VkShaderStageFlags stages)
1932 {
1933    if (stages == VK_SHADER_STAGE_VERTEX_BIT)
1934       return "VS";
1935    if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_GEOMETRY_BIT))
1936       return "VS+GS";
1937    if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))
1938       return "VS+TCS+TES";
1939    if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_GEOMETRY_BIT))
1940       return "VS+TCS+TES+GS";
1941    if (stages == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1942       return "TCS";
1943    if (stages == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1944       return "TES";
1945    if (stages == VK_SHADER_STAGE_GEOMETRY_BIT)
1946       return "GS";
1947    if (stages == VK_SHADER_STAGE_FRAGMENT_BIT)
1948       return "FS";
1949    if (stages == VK_SHADER_STAGE_COMPUTE_BIT)
1950       return "CS";
1951    unreachable("unhandled combination of stages!");
1952 }
1953
1954 static void
1955 print_pipeline_stats(struct zink_screen *screen, VkPipeline pipeline)
1956 {
1957    VkPipelineInfoKHR pinfo = {
1958      VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR,
1959      NULL,
1960      pipeline
1961    };
1962    unsigned exe_count = 0;
1963    VkPipelineExecutablePropertiesKHR props[10] = {0};
1964    for (unsigned i = 0; i < ARRAY_SIZE(props); i++) {
1965       props[i].sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_PROPERTIES_KHR;
1966       props[i].pNext = NULL;
1967    }
1968    VKSCR(GetPipelineExecutablePropertiesKHR)(screen->dev, &pinfo, &exe_count, NULL);
1969    VKSCR(GetPipelineExecutablePropertiesKHR)(screen->dev, &pinfo, &exe_count, props);
1970    printf("PIPELINE STATISTICS:");
1971    for (unsigned e = 0; e < exe_count; e++) {
1972       VkPipelineExecutableInfoKHR info = {
1973          VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR,
1974          NULL,
1975          pipeline,
1976          e
1977       };
1978       unsigned count = 0;
1979       printf("\n\t%s (%s): ", print_exe_stages(props[e].stages), props[e].name);
1980       VkPipelineExecutableStatisticKHR *stats = NULL;
1981       VKSCR(GetPipelineExecutableStatisticsKHR)(screen->dev, &info, &count, NULL);
1982       stats = calloc(count, sizeof(VkPipelineExecutableStatisticKHR));
1983       for (unsigned i = 0; i < count; i++)
1984          stats[i].sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_STATISTIC_KHR;
1985       VKSCR(GetPipelineExecutableStatisticsKHR)(screen->dev, &info, &count, stats);
1986
1987       for (unsigned i = 0; i < count; i++) {
1988          if (i)
1989             printf(", ");
1990          switch (stats[i].format) {
1991          case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_BOOL32_KHR:
1992             printf("%s: %u", stats[i].name, stats[i].value.b32);
1993             break;
1994          case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR:
1995             printf("%s: %" PRIi64, stats[i].name, stats[i].value.i64);
1996             break;
1997          case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR:
1998             printf("%s: %" PRIu64, stats[i].name, stats[i].value.u64);
1999             break;
2000          case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR:
2001             printf("%s: %g", stats[i].name, stats[i].value.f64);
2002             break;
2003          default:
2004             unreachable("unknown statistic");
2005          }
2006       }
2007    }
2008    printf("\n");
2009 }
2010
2011 static void
2012 precompile_job(void *data, void *gdata, int thread_index)
2013 {
2014    struct zink_screen *screen = gdata;
2015    struct zink_gfx_program *prog = data;
2016
2017    struct zink_gfx_pipeline_state state = {0};
2018    state.shader_keys_optimal.key.vs_base.last_vertex_stage = true;
2019    state.shader_keys_optimal.key.tcs.patch_vertices = 3; //random guess, generated tcs precompile is hard
2020    state.optimal_key = state.shader_keys_optimal.key.val;
2021    generate_gfx_program_modules_optimal(NULL, screen, prog, &state);
2022    zink_screen_get_pipeline_cache(screen, &prog->base, true);
2023    simple_mtx_lock(&prog->libs->lock);
2024    zink_create_pipeline_lib(screen, prog, &state);
2025    simple_mtx_unlock(&prog->libs->lock);
2026    zink_screen_update_pipeline_cache(screen, &prog->base, true);
2027 }
2028
2029 static void
2030 precompile_separate_shader_job(void *data, void *gdata, int thread_index)
2031 {
2032    struct zink_screen *screen = gdata;
2033    struct zink_shader *zs = data;
2034
2035    zs->precompile.mod = zink_shader_compile_separate(screen, zs);
2036    zink_descriptor_shader_init(screen, zs);
2037    VkShaderModule mods[ZINK_GFX_SHADER_COUNT] = {0};
2038    mods[zs->nir->info.stage] = zs->precompile.mod;
2039    zs->precompile.gpl = zink_create_gfx_pipeline_separate(screen, mods, zs->precompile.layout);
2040 }
2041
2042 static void
2043 zink_link_gfx_shader(struct pipe_context *pctx, void **shaders)
2044 {
2045    struct zink_context *ctx = zink_context(pctx);
2046    struct zink_shader **zshaders = (struct zink_shader **)shaders;
2047    if (shaders[MESA_SHADER_COMPUTE])
2048       return;
2049    /* can't precompile fixedfunc */
2050    if (!shaders[MESA_SHADER_VERTEX] || !shaders[MESA_SHADER_FRAGMENT]) {
2051       if (shaders[MESA_SHADER_VERTEX] || shaders[MESA_SHADER_FRAGMENT]) {
2052          struct zink_shader *zs = shaders[MESA_SHADER_VERTEX] ? shaders[MESA_SHADER_VERTEX] : shaders[MESA_SHADER_FRAGMENT];
2053          if (zs->nir->info.separate_shader && !zs->precompile.mod && util_queue_fence_is_signalled(&zs->precompile.fence) &&
2054              zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB &&
2055              /* sample shading can't precompile */
2056              (!shaders[MESA_SHADER_FRAGMENT] || !zs->nir->info.fs.uses_sample_shading))
2057             util_queue_add_job(&zink_screen(pctx->screen)->cache_get_thread, zs, &zs->precompile.fence, precompile_separate_shader_job, NULL, 0);
2058       }
2059       return;
2060    }
2061    unsigned hash = 0;
2062    unsigned shader_stages = 0;
2063    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
2064       if (zshaders[i]) {
2065          hash ^= zshaders[i]->hash;
2066          shader_stages |= BITFIELD_BIT(i);
2067       }
2068    }
2069    unsigned tess_stages = BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL);
2070    unsigned tess = shader_stages & tess_stages;
2071    /* can't do fixedfunc tes either */
2072    if (tess && !shaders[MESA_SHADER_TESS_EVAL])
2073       return;
2074    struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(shader_stages)];
2075    simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
2076    /* link can be called repeatedly with the same shaders: ignore */
2077    if (_mesa_hash_table_search_pre_hashed(ht, hash, shaders)) {
2078       simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
2079       return;
2080    }
2081    struct zink_gfx_program *prog = zink_create_gfx_program(ctx, zshaders, 3, hash);
2082    u_foreach_bit(i, shader_stages)
2083       assert(prog->shaders[i]);
2084    _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
2085    prog->base.removed = false;
2086    simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
2087    if (zink_debug & ZINK_DEBUG_SHADERDB) {
2088       struct zink_screen *screen = zink_screen(pctx->screen);
2089       if (screen->optimal_keys)
2090          generate_gfx_program_modules_optimal(ctx, screen,  prog, &ctx->gfx_pipeline_state);
2091       else
2092          generate_gfx_program_modules(ctx, screen,  prog, &ctx->gfx_pipeline_state);
2093       VkPipeline pipeline = zink_create_gfx_pipeline(screen, prog, &ctx->gfx_pipeline_state, ctx->gfx_pipeline_state.element_state->binding_map,  shaders[MESA_SHADER_TESS_EVAL] ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, true);
2094       print_pipeline_stats(screen, pipeline);
2095    } else {
2096       util_queue_add_job(&zink_screen(pctx->screen)->cache_get_thread, prog, &prog->base.cache_fence, precompile_job, NULL, 0);
2097    }
2098 }
2099
2100 void
2101 zink_program_init(struct zink_context *ctx)
2102 {
2103    ctx->base.create_vs_state = zink_create_cached_shader_state;
2104    ctx->base.bind_vs_state = zink_bind_vs_state;
2105    ctx->base.delete_vs_state = zink_delete_cached_shader_state;
2106
2107    ctx->base.create_fs_state = zink_create_cached_shader_state;
2108    ctx->base.bind_fs_state = zink_bind_fs_state;
2109    ctx->base.delete_fs_state = zink_delete_cached_shader_state;
2110
2111    ctx->base.create_gs_state = zink_create_cached_shader_state;
2112    ctx->base.bind_gs_state = zink_bind_gs_state;
2113    ctx->base.delete_gs_state = zink_delete_cached_shader_state;
2114
2115    ctx->base.create_tcs_state = zink_create_cached_shader_state;
2116    ctx->base.bind_tcs_state = zink_bind_tcs_state;
2117    ctx->base.delete_tcs_state = zink_delete_cached_shader_state;
2118
2119    ctx->base.create_tes_state = zink_create_cached_shader_state;
2120    ctx->base.bind_tes_state = zink_bind_tes_state;
2121    ctx->base.delete_tes_state = zink_delete_cached_shader_state;
2122
2123    ctx->base.create_compute_state = zink_create_cs_state;
2124    ctx->base.bind_compute_state = zink_bind_cs_state;
2125    ctx->base.delete_compute_state = zink_delete_cs_shader_state;
2126
2127    if (zink_screen(ctx->base.screen)->info.have_EXT_vertex_input_dynamic_state)
2128       _mesa_set_init(&ctx->gfx_inputs, ctx, hash_gfx_input_dynamic, equals_gfx_input_dynamic);
2129    else
2130       _mesa_set_init(&ctx->gfx_inputs, ctx, hash_gfx_input, equals_gfx_input);
2131    if (zink_screen(ctx->base.screen)->have_full_ds3)
2132       _mesa_set_init(&ctx->gfx_outputs, ctx, hash_gfx_output_ds3, equals_gfx_output_ds3);
2133    else
2134       _mesa_set_init(&ctx->gfx_outputs, ctx, hash_gfx_output, equals_gfx_output);
2135    /* validate struct packing */
2136    STATIC_ASSERT(offsetof(struct zink_gfx_output_key, sample_mask) == sizeof(uint32_t));
2137    STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, vertex_buffers_enabled_mask) - offsetof(struct zink_gfx_pipeline_state, input) ==
2138                  offsetof(struct zink_gfx_input_key, vertex_buffers_enabled_mask) - offsetof(struct zink_gfx_input_key, input));
2139    STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, vertex_strides) - offsetof(struct zink_gfx_pipeline_state, input) ==
2140                  offsetof(struct zink_gfx_input_key, vertex_strides) - offsetof(struct zink_gfx_input_key, input));
2141    STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, element_state) - offsetof(struct zink_gfx_pipeline_state, input) ==
2142                  offsetof(struct zink_gfx_input_key, element_state) - offsetof(struct zink_gfx_input_key, input));
2143
2144    STATIC_ASSERT(sizeof(union zink_shader_key_optimal) == sizeof(uint32_t));
2145
2146    if (zink_screen(ctx->base.screen)->info.have_EXT_graphics_pipeline_library || zink_debug & ZINK_DEBUG_SHADERDB)
2147       ctx->base.link_shader = zink_link_gfx_shader;
2148 }
2149
2150 bool
2151 zink_set_rasterizer_discard(struct zink_context *ctx, bool disable)
2152 {
2153    bool value = disable ? false : (ctx->rast_state ? ctx->rast_state->base.rasterizer_discard : false);
2154    bool changed = ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard != value;
2155    ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard = value;
2156    if (!changed)
2157       return false;
2158    if (!zink_screen(ctx->base.screen)->info.have_EXT_extended_dynamic_state2)
2159       ctx->gfx_pipeline_state.dirty |= true;
2160    ctx->rasterizer_discard_changed = true;
2161    return true;
2162 }
2163
2164 void
2165 zink_driver_thread_add_job(struct pipe_screen *pscreen, void *data,
2166                            struct util_queue_fence *fence,
2167                            pipe_driver_thread_func execute,
2168                            pipe_driver_thread_func cleanup,
2169                            const size_t job_size)
2170 {
2171    struct zink_screen *screen = zink_screen(pscreen);
2172    util_queue_add_job(&screen->cache_get_thread, data, fence, execute, cleanup, job_size);
2173 }
2174
2175 static bool
2176 has_edge_flags(struct zink_context *ctx)
2177 {
2178    switch(ctx->gfx_pipeline_state.gfx_prim_mode) {
2179    case PIPE_PRIM_POINTS:
2180    case PIPE_PRIM_LINE_STRIP:
2181    case PIPE_PRIM_LINE_STRIP_ADJACENCY:
2182    case PIPE_PRIM_LINES:
2183    case PIPE_PRIM_LINE_LOOP:
2184    case PIPE_PRIM_LINES_ADJACENCY:
2185    case PIPE_PRIM_TRIANGLE_STRIP:
2186    case PIPE_PRIM_TRIANGLE_FAN:
2187    case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
2188    case PIPE_PRIM_QUAD_STRIP:
2189       return false;
2190    case PIPE_PRIM_TRIANGLES:
2191    case PIPE_PRIM_TRIANGLES_ADJACENCY:
2192    case PIPE_PRIM_QUADS:
2193    case PIPE_PRIM_POLYGON:
2194    case PIPE_PRIM_PATCHES:
2195    case PIPE_PRIM_MAX:
2196    default:
2197       break;
2198    }
2199    return ctx->gfx_pipeline_state.rast_prim == PIPE_PRIM_LINES &&
2200           ctx->gfx_stages[MESA_SHADER_VERTEX]->has_edgeflags;
2201 }
2202
2203 void
2204 zink_set_primitive_emulation_keys(struct zink_context *ctx)
2205 {
2206    struct zink_screen *screen = zink_screen(ctx->base.screen);
2207    bool lower_line_stipple = ctx->gfx_pipeline_state.rast_prim == PIPE_PRIM_LINES &&
2208                              screen->driver_workarounds.no_linestipple &&
2209                              ctx->rast_state->base.line_stipple_enable &&
2210                              !ctx->num_so_targets;
2211
2212    bool lower_point_smooth = ctx->gfx_pipeline_state.rast_prim == PIPE_PRIM_POINTS &&
2213                              screen->driconf.emulate_point_smooth &&
2214                              ctx->rast_state->base.point_smooth;
2215
2216    if (zink_get_fs_key(ctx)->lower_line_stipple != lower_line_stipple) {
2217       assert(zink_get_gs_key(ctx)->lower_line_stipple ==
2218              zink_get_fs_key(ctx)->lower_line_stipple);
2219       zink_set_fs_key(ctx)->lower_line_stipple = lower_line_stipple;
2220       zink_set_gs_key(ctx)->lower_line_stipple = lower_line_stipple;
2221    }
2222
2223    bool lower_line_smooth = ctx->gfx_pipeline_state.rast_prim == PIPE_PRIM_LINES &&
2224                             screen->driver_workarounds.no_linesmooth &&
2225                             ctx->rast_state->base.line_smooth &&
2226                             !ctx->num_so_targets;
2227
2228    bool lower_edge_flags = has_edge_flags(ctx);
2229
2230    if (zink_get_fs_key(ctx)->lower_line_smooth != lower_line_smooth) {
2231       assert(zink_get_gs_key(ctx)->lower_line_smooth ==
2232              zink_get_fs_key(ctx)->lower_line_smooth);
2233       zink_set_fs_key(ctx)->lower_line_smooth = lower_line_smooth;
2234       zink_set_gs_key(ctx)->lower_line_smooth = lower_line_smooth;
2235    }
2236
2237    if (zink_get_fs_key(ctx)->lower_point_smooth != lower_point_smooth) {
2238       zink_set_fs_key(ctx)->lower_point_smooth = lower_point_smooth;
2239    }
2240
2241    if (lower_line_stipple || lower_line_smooth || lower_edge_flags ||
2242        zink_get_gs_key(ctx)->lower_gl_point) {
2243       enum pipe_shader_type prev_vertex_stage =
2244          ctx->gfx_stages[MESA_SHADER_TESS_EVAL] ?
2245             MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
2246
2247       if (!ctx->gfx_stages[MESA_SHADER_GEOMETRY] ||
2248           (ctx->gfx_stages[MESA_SHADER_GEOMETRY]->nir->info.gs.input_primitive != ctx->gfx_pipeline_state.gfx_prim_mode)) {
2249          assert(!screen->optimal_keys);
2250
2251          if (!ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode]) {
2252             nir_shader *nir = nir_create_passthrough_gs(
2253                &screen->nir_options,
2254                ctx->gfx_stages[prev_vertex_stage]->nir,
2255                ctx->gfx_pipeline_state.gfx_prim_mode,
2256                lower_edge_flags,
2257                false);
2258
2259             struct zink_shader *shader = zink_shader_create(screen, nir, NULL);
2260             ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode] = shader;
2261             shader->non_fs.is_generated = true;
2262          }
2263
2264          bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY,
2265                         ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode]);
2266       }
2267    } else if (ctx->gfx_stages[MESA_SHADER_GEOMETRY] &&
2268               ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated)
2269          bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, NULL);
2270 }
2271
2272 void
2273 zink_create_primitive_emulation_gs(struct zink_context *ctx)
2274 {
2275    struct zink_screen *screen = zink_screen(ctx->base.screen);
2276    bool lower_edge_flags = has_edge_flags(ctx);
2277
2278    if (lower_edge_flags) {
2279       enum pipe_shader_type prev_vertex_stage =
2280          ctx->gfx_stages[MESA_SHADER_TESS_EVAL] ?
2281             MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
2282
2283       if (!ctx->gfx_stages[MESA_SHADER_GEOMETRY] ||
2284           (ctx->gfx_stages[MESA_SHADER_GEOMETRY]->nir->info.gs.input_primitive != ctx->gfx_pipeline_state.gfx_prim_mode)) {
2285
2286          if (!ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode]) {
2287             nir_shader *nir = nir_create_passthrough_gs(
2288                &screen->nir_options,
2289                ctx->gfx_stages[prev_vertex_stage]->nir,
2290                ctx->gfx_pipeline_state.gfx_prim_mode,
2291                lower_edge_flags,
2292                false);
2293
2294             struct zink_shader *shader = zink_shader_create(screen, nir, NULL);
2295             ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode] = shader;
2296             shader->non_fs.is_generated = true;
2297          }
2298
2299          bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY,
2300                         ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode]);
2301       }
2302    } else if (ctx->gfx_stages[MESA_SHADER_GEOMETRY] &&
2303               ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated)
2304          bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, NULL);
2305 }