src/gallium/drivers/zink/zink_program.c

   1 /*
   2  * Copyright 2018 Collabora Ltd.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * on the rights to use, copy, modify, merge, publish, distribute, sub
   8  * license, and/or sell copies of the Software, and to permit persons to whom
   9  * the Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #include "zink_program.h"
  25
  26 #include "zink_compiler.h"
  27 #include "zink_context.h"
  28 #include "zink_descriptors.h"
  29 #include "zink_helpers.h"
  30 #include "zink_pipeline.h"
  31 #include "zink_render_pass.h"
  32 #include "zink_resource.h"
  33 #include "zink_screen.h"
  34 #include "zink_state.h"
  35 #include "zink_inlines.h"
  36
  37 #include "util/u_debug.h"
  38 #include "util/u_memory.h"
  39 #include "util/u_prim.h"
  40 #include "nir_serialize.h"
  41
  42 /* for pipeline cache */
  43 #define XXH_INLINE_ALL
  44 #include "util/xxhash.h"
  45
  46 void
  47 debug_describe_zink_gfx_program(char *buf, const struct zink_gfx_program *ptr)
  48 {
  49    sprintf(buf, "zink_gfx_program");
  50 }
  51
  52 void
  53 debug_describe_zink_compute_program(char *buf, const struct zink_compute_program *ptr)
  54 {
  55    sprintf(buf, "zink_compute_program");
  56 }
  57
  58 ALWAYS_INLINE static bool
  59 shader_key_matches_tcs_nongenerated(const struct zink_shader_module *zm, const struct zink_shader_key *key, unsigned num_uniforms)
  60 {
  61    if (zm->num_uniforms != num_uniforms || zm->has_nonseamless != !!key->base.nonseamless_cube_mask)
  62       return false;
  63    const uint32_t nonseamless_size = zm->has_nonseamless ? sizeof(uint32_t) : 0;
  64    return (!nonseamless_size || !memcmp(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size)) &&
  65           (!num_uniforms || !memcmp(zm->key + zm->key_size + nonseamless_size,
  66                                     key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t)));
  67 }
  68
  69 ALWAYS_INLINE static bool
  70 shader_key_matches(const struct zink_shader_module *zm,
  71                    const struct zink_shader_key *key, unsigned num_uniforms,
  72                    bool has_inline, bool has_nonseamless)
  73 {
  74    const uint32_t nonseamless_size = !has_nonseamless && zm->has_nonseamless ? sizeof(uint32_t) : 0;
  75    if (has_inline) {
  76       if (zm->num_uniforms != num_uniforms ||
  77           (num_uniforms &&
  78            memcmp(zm->key + zm->key_size + nonseamless_size,
  79                   key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t))))
  80          return false;
  81    }
  82    if (!has_nonseamless) {
  83       if (zm->has_nonseamless != !!key->base.nonseamless_cube_mask ||
  84           (nonseamless_size && memcmp(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size)))
  85          return false;
  86    }
  87    return !memcmp(zm->key, key, zm->key_size);
  88 }
  89
  90 static uint32_t
  91 shader_module_hash(const struct zink_shader_module *zm)
  92 {
  93    const uint32_t nonseamless_size = zm->has_nonseamless ? sizeof(uint32_t) : 0;
  94    unsigned key_size = zm->key_size + nonseamless_size + zm->num_uniforms * sizeof(uint32_t);
  95    return _mesa_hash_data(zm->key, key_size);
  96 }
  97
  98 ALWAYS_INLINE static void
  99 gather_shader_module_info(struct zink_context *ctx, struct zink_screen *screen,
 100                           struct zink_shader *zs, struct zink_gfx_program *prog,
 101                           struct zink_gfx_pipeline_state *state,
 102                           bool has_inline, //is inlining enabled?
 103                           bool has_nonseamless, //is nonseamless ext present?
 104                           unsigned *inline_size, unsigned *nonseamless_size)
 105 {
 106    gl_shader_stage stage = zs->nir->info.stage;
 107    struct zink_shader_key *key = &state->shader_keys.key[stage];
 108    if (has_inline && ctx && zs->nir->info.num_inlinable_uniforms &&
 109        ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(stage)) {
 110       if (zs->can_inline && (screen->is_cpu || prog->inlined_variant_count[stage] < ZINK_MAX_INLINED_VARIANTS))
 111          *inline_size = zs->nir->info.num_inlinable_uniforms;
 112       else
 113          key->inline_uniforms = false;
 114    }
 115    if (!has_nonseamless && key->base.nonseamless_cube_mask)
 116       *nonseamless_size = sizeof(uint32_t);
 117 }
 118
 119 ALWAYS_INLINE static struct zink_shader_module *
 120 create_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen,
 121                                struct zink_shader *zs, struct zink_gfx_program *prog,
 122                                gl_shader_stage stage,
 123                                struct zink_gfx_pipeline_state *state,
 124                                unsigned inline_size, unsigned nonseamless_size,
 125                                bool has_inline, //is inlining enabled?
 126                                bool has_nonseamless) //is nonseamless ext present?
 127 {
 128    VkShaderModule mod;
 129    struct zink_shader_module *zm;
 130    const struct zink_shader_key *key = &state->shader_keys.key[stage];
 131    /* non-generated tcs won't use the shader key */
 132    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->tcs.is_generated;
 133    zm = malloc(sizeof(struct zink_shader_module) + key->size + (!has_nonseamless ? nonseamless_size : 0) + inline_size * sizeof(uint32_t));
 134    if (!zm) {
 135       return NULL;
 136    }
 137    unsigned patch_vertices = state->shader_keys.key[MESA_SHADER_TESS_CTRL ].key.tcs.patch_vertices;
 138    if (stage == MESA_SHADER_TESS_CTRL && zs->tcs.is_generated && zs->spirv) {
 139       assert(ctx); //TODO async
 140       mod = zink_shader_tcs_compile(screen, zs, patch_vertices);
 141    } else {
 142       mod = zink_shader_compile(screen, zs, prog->nir[stage], key);
 143    }
 144    if (!mod) {
 145       FREE(zm);
 146       return NULL;
 147    }
 148    zm->shader = mod;
 149    zm->num_uniforms = inline_size;
 150    if (!is_nongenerated_tcs) {
 151       zm->key_size = key->size;
 152       memcpy(zm->key, key, key->size);
 153    } else {
 154       zm->key_size = 0;
 155       memset(zm->key, 0, key->size);
 156    }
 157    if (!has_nonseamless && nonseamless_size) {
 158       /* nonseamless mask gets added to base key if it exists */
 159       memcpy(zm->key + key->size, &key->base.nonseamless_cube_mask, nonseamless_size);
 160    }
 161    zm->has_nonseamless = has_nonseamless ? 0 : !!nonseamless_size;
 162    if (inline_size)
 163       memcpy(zm->key + key->size + nonseamless_size, key->base.inlined_uniform_values, inline_size * sizeof(uint32_t));
 164    if (stage == MESA_SHADER_TESS_CTRL && zs->tcs.is_generated)
 165       zm->hash = patch_vertices;
 166    else
 167       zm->hash = shader_module_hash(zm);
 168    zm->default_variant = !inline_size && !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
 169    if (inline_size)
 170       prog->inlined_variant_count[stage]++;
 171    util_dynarray_append(&prog->shader_cache[stage][has_nonseamless ? 0 : !!nonseamless_size][!!inline_size], void*, zm);
 172    return zm;
 173 }
 174
 175 ALWAYS_INLINE static struct zink_shader_module *
 176 get_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen,
 177                             struct zink_shader *zs, struct zink_gfx_program *prog,
 178                             gl_shader_stage stage,
 179                             struct zink_gfx_pipeline_state *state,
 180                             unsigned inline_size, unsigned nonseamless_size,
 181                             bool has_inline, //is inlining enabled?
 182                             bool has_nonseamless) //is nonseamless ext present?
 183 {
 184    const struct zink_shader_key *key = &state->shader_keys.key[stage];
 185    /* non-generated tcs won't use the shader key */
 186    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->tcs.is_generated;
 187
 188    struct util_dynarray *shader_cache = &prog->shader_cache[stage][!has_nonseamless ? !!nonseamless_size : 0][has_inline ? !!inline_size : 0];
 189    unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
 190    struct zink_shader_module **pzm = shader_cache->data;
 191    for (unsigned i = 0; i < count; i++) {
 192       struct zink_shader_module *iter = pzm[i];
 193       if (is_nongenerated_tcs) {
 194          if (!shader_key_matches_tcs_nongenerated(iter, key, has_inline ? !!inline_size : 0))
 195             continue;
 196       } else {
 197          if (stage == MESA_SHADER_VERTEX && iter->key_size != key->size)
 198             continue;
 199          if (!shader_key_matches(iter, key, inline_size, has_inline, has_nonseamless))
 200             continue;
 201       }
 202       if (i > 0) {
 203          struct zink_shader_module *zero = pzm[0];
 204          pzm[0] = iter;
 205          pzm[i] = zero;
 206       }
 207       return iter;
 208    }
 209
 210    return NULL;
 211 }
 212
 213 ALWAYS_INLINE static struct zink_shader_module *
 214 create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen *screen,
 215                                        struct zink_shader *zs, struct zink_gfx_program *prog,
 216                                        gl_shader_stage stage,
 217                                        struct zink_gfx_pipeline_state *state)
 218 {
 219    VkShaderModule mod;
 220    struct zink_shader_module *zm;
 221    uint16_t *key;
 222    unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
 223    if (zs == prog->last_vertex_stage) {
 224       key = (uint16_t*)&state->shader_keys_optimal.key.vs_base;
 225    } else if (stage == MESA_SHADER_FRAGMENT) {
 226       key = (uint16_t*)&state->shader_keys_optimal.key.fs;
 227    } else if (stage == MESA_SHADER_TESS_CTRL && zs->tcs.is_generated) {
 228       key = (uint16_t*)&state->shader_keys_optimal.key.tcs;
 229    } else {
 230       key = NULL;
 231    }
 232    size_t key_size = sizeof(uint16_t);
 233    zm = calloc(1, sizeof(struct zink_shader_module) + (key ? key_size : 0));
 234    if (!zm) {
 235       return NULL;
 236    }
 237    if (stage == MESA_SHADER_TESS_CTRL && zs->tcs.is_generated && zs->spirv) {
 238       assert(ctx); //TODO async
 239       struct zink_tcs_key *tcs = (struct zink_tcs_key*)key;
 240       mod = zink_shader_tcs_compile(screen, zs, tcs->patch_vertices);
 241    } else {
 242       mod = zink_shader_compile(screen, zs, prog->nir[stage], (struct zink_shader_key*)key);
 243    }
 244    if (!mod) {
 245       FREE(zm);
 246       return NULL;
 247    }
 248    zm->shader = mod;
 249    /* non-generated tcs won't use the shader key */
 250    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->tcs.is_generated;
 251    if (key && !is_nongenerated_tcs) {
 252       zm->key_size = key_size;
 253       uint16_t *data = (uint16_t*)zm->key;
 254       /* sanitize actual key bits */
 255       *data = (*key) & mask;
 256    }
 257    zm->default_variant = !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
 258    util_dynarray_append(&prog->shader_cache[stage][0][0], void*, zm);
 259    return zm;
 260 }
 261
 262 ALWAYS_INLINE static struct zink_shader_module *
 263 get_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen *screen,
 264                                     struct zink_shader *zs, struct zink_gfx_program *prog,
 265                                     gl_shader_stage stage,
 266                                     struct zink_gfx_pipeline_state *state)
 267 {
 268    /* non-generated tcs won't use the shader key */
 269    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->tcs.is_generated;
 270    uint16_t *key;
 271    unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
 272    if (zs == prog->last_vertex_stage) {
 273       key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_base;
 274    } else if (stage == MESA_SHADER_FRAGMENT) {
 275       key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.fs;
 276    } else if (stage == MESA_SHADER_TESS_CTRL && zs->tcs.is_generated) {
 277       key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs;
 278    } else {
 279       key = NULL;
 280    }
 281    struct util_dynarray *shader_cache = &prog->shader_cache[stage][0][0];
 282    unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
 283    struct zink_shader_module **pzm = shader_cache->data;
 284    for (unsigned i = 0; i < count; i++) {
 285       struct zink_shader_module *iter = pzm[i];
 286       if (is_nongenerated_tcs) {
 287          /* always match */
 288       } else if (key) {
 289          uint16_t val = (*key) & mask;
 290          /* no key is bigger than uint16_t */
 291          if (memcmp(iter->key, &val, sizeof(uint16_t)))
 292             continue;
 293       }
 294       if (i > 0) {
 295          struct zink_shader_module *zero = pzm[0];
 296          pzm[0] = iter;
 297          pzm[i] = zero;
 298       }
 299       return iter;
 300    }
 301
 302    return NULL;
 303 }
 304
 305 static void
 306 zink_destroy_shader_module(struct zink_screen *screen, struct zink_shader_module *zm)
 307 {
 308    VKSCR(DestroyShaderModule)(screen->dev, zm->shader, NULL);
 309    free(zm);
 310 }
 311
 312 static void
 313 destroy_shader_cache(struct zink_screen *screen, struct util_dynarray *sc)
 314 {
 315    while (util_dynarray_contains(sc, void*)) {
 316       struct zink_shader_module *zm = util_dynarray_pop(sc, struct zink_shader_module*);
 317       zink_destroy_shader_module(screen, zm);
 318    }
 319 }
 320
 321 ALWAYS_INLINE static void
 322 update_gfx_shader_modules(struct zink_context *ctx,
 323                       struct zink_screen *screen,
 324                       struct zink_gfx_program *prog, uint32_t mask,
 325                       struct zink_gfx_pipeline_state *state,
 326                       bool has_inline, //is inlining enabled?
 327                       bool has_nonseamless) //is nonseamless ext present?
 328 {
 329    bool hash_changed = false;
 330    bool default_variants = true;
 331    assert(prog->modules[MESA_SHADER_VERTEX]);
 332    uint32_t variant_hash = prog->last_variant_hash;
 333    for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
 334       if (!(mask & BITFIELD_BIT(i)))
 335          continue;
 336
 337       assert(prog->shaders[i]);
 338
 339       unsigned inline_size = 0, nonseamless_size = 0;
 340       gather_shader_module_info(ctx, screen, prog->shaders[i], prog, state, has_inline, has_nonseamless, &inline_size, &nonseamless_size);
 341       struct zink_shader_module *zm = get_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
 342                                                                   inline_size, nonseamless_size, has_inline, has_nonseamless);
 343       if (!zm)
 344          zm = create_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
 345                                              inline_size, nonseamless_size, has_inline, has_nonseamless);
 346       state->modules[i] = zm->shader;
 347       if (prog->modules[i] == zm->shader)
 348          continue;
 349       variant_hash ^= prog->module_hash[i];
 350       hash_changed = true;
 351       default_variants &= zm->default_variant;
 352       prog->modules[i] = zm->shader;
 353       prog->module_hash[i] = zm->hash;
 354       if (has_inline) {
 355          if (zm->num_uniforms)
 356             prog->inline_variants |= BITFIELD_BIT(i);
 357          else
 358             prog->inline_variants &= ~BITFIELD_BIT(i);
 359       }
 360       variant_hash ^= prog->module_hash[i];
 361    }
 362
 363    if (hash_changed && state) {
 364       if (default_variants)
 365          prog->last_variant_hash = prog->default_variant_hash;
 366       else
 367          prog->last_variant_hash = variant_hash;
 368
 369       state->modules_changed = true;
 370    }
 371 }
 372
 373 ALWAYS_INLINE static void
 374 update_gfx_shader_modules_optimal(struct zink_context *ctx,
 375                                   struct zink_screen *screen,
 376                                   struct zink_gfx_program *prog, uint32_t mask,
 377                                   struct zink_gfx_pipeline_state *state)
 378 {
 379    assert(prog->modules[MESA_SHADER_VERTEX]);
 380    for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
 381       if (!(mask & BITFIELD_BIT(i)))
 382          continue;
 383
 384       assert(prog->shaders[i]);
 385
 386       struct zink_shader_module *zm = get_shader_module_for_stage_optimal(ctx, screen, prog->shaders[i], prog, i, state);
 387       if (!zm)
 388          zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[i], prog, i, state);
 389       if (prog->modules[i] == zm->shader)
 390          continue;
 391       state->modules_changed = true;
 392       prog->modules[i] = zm->shader;
 393    }
 394
 395    prog->last_variant_hash = state->shader_keys_optimal.key.val;
 396 }
 397
 398 static void
 399 generate_gfx_program_modules(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
 400 {
 401    assert(!prog->modules[MESA_SHADER_VERTEX]);
 402    uint32_t variant_hash = 0;
 403    bool default_variants = true;
 404    for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
 405       if (!(prog->stages_present & BITFIELD_BIT(i)))
 406          continue;
 407
 408       assert(prog->shaders[i]);
 409
 410       unsigned inline_size = 0, nonseamless_size = 0;
 411       gather_shader_module_info(ctx, screen, prog->shaders[i], prog, state,
 412                                 screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map,
 413                                 &inline_size, &nonseamless_size);
 414       struct zink_shader_module *zm = create_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
 415                                                                      inline_size, nonseamless_size,
 416                                                                      screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map);
 417       state->modules[i] = zm->shader;
 418       prog->modules[i] = zm->shader;
 419       prog->module_hash[i] = zm->hash;
 420       if (zm->num_uniforms)
 421          prog->inline_variants |= BITFIELD_BIT(i);
 422       default_variants &= zm->default_variant;
 423       variant_hash ^= prog->module_hash[i];
 424    }
 425
 426    p_atomic_dec(&prog->base.reference.count);
 427    state->modules_changed = true;
 428
 429    prog->last_variant_hash = variant_hash;
 430    if (default_variants)
 431       prog->default_variant_hash = prog->last_variant_hash;
 432 }
 433
 434 static void
 435 generate_gfx_program_modules_optimal(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
 436 {
 437    assert(!prog->modules[MESA_SHADER_VERTEX]);
 438    for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
 439       if (!(prog->stages_present & BITFIELD_BIT(i)))
 440          continue;
 441
 442       assert(prog->shaders[i]);
 443
 444       struct zink_shader_module *zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[i], prog, i, state);
 445       prog->modules[i] = zm->shader;
 446    }
 447
 448    p_atomic_dec(&prog->base.reference.count);
 449    state->modules_changed = true;
 450    prog->last_variant_hash = state->shader_keys_optimal.key.val;
 451 }
 452
 453 static uint32_t
 454 hash_pipeline_lib_generated_tcs(const void *key)
 455 {
 456    return 1;
 457 }
 458
 459
 460 static bool
 461 equals_pipeline_lib_generated_tcs(const void *a, const void *b)
 462 {
 463    return !memcmp(a, b, sizeof(uint32_t));
 464 }
 465
 466 static uint32_t
 467 hash_pipeline_lib(const void *key)
 468 {
 469    const struct zink_gfx_library_key *gkey = key;
 470    /* remove generated tcs bits */
 471    return zink_shader_key_optimal_no_tcs(gkey->optimal_key);
 472 }
 473
 474 static bool
 475 equals_pipeline_lib(const void *a, const void *b)
 476 {
 477    const struct zink_gfx_library_key *ak = a;
 478    const struct zink_gfx_library_key *bk = b;
 479    /* remove generated tcs bits */
 480    uint32_t val_a = zink_shader_key_optimal_no_tcs(ak->optimal_key);
 481    uint32_t val_b = zink_shader_key_optimal_no_tcs(bk->optimal_key);
 482    return val_a == val_b;
 483 }
 484
 485 uint32_t
 486 hash_gfx_input_dynamic(const void *key)
 487 {
 488    const struct zink_gfx_input_key *ikey = key;
 489    return ikey->idx;
 490 }
 491
 492 static bool
 493 equals_gfx_input_dynamic(const void *a, const void *b)
 494 {
 495    const struct zink_gfx_input_key *ikey_a = a;
 496    const struct zink_gfx_input_key *ikey_b = b;
 497    return ikey_a->idx == ikey_b->idx;
 498 }
 499
 500 uint32_t
 501 hash_gfx_input(const void *key)
 502 {
 503    const struct zink_gfx_input_key *ikey = key;
 504    if (ikey->uses_dynamic_stride)
 505       return ikey->input;
 506    return _mesa_hash_data(key, offsetof(struct zink_gfx_input_key, pipeline));
 507 }
 508
 509 static bool
 510 equals_gfx_input(const void *a, const void *b)
 511 {
 512    const struct zink_gfx_input_key *ikey_a = a;
 513    const struct zink_gfx_input_key *ikey_b = b;
 514    if (ikey_a->uses_dynamic_stride)
 515       return ikey_a->element_state == ikey_b->element_state &&
 516              !memcmp(a, b, offsetof(struct zink_gfx_input_key, vertex_buffers_enabled_mask));
 517    return !memcmp(a, b, offsetof(struct zink_gfx_input_key, pipeline));
 518 }
 519
 520 uint32_t
 521 hash_gfx_output_ds3(const void *key)
 522 {
 523    const uint8_t *data = key;
 524    return _mesa_hash_data(data, sizeof(uint32_t));
 525 }
 526
 527 static bool
 528 equals_gfx_output_ds3(const void *a, const void *b)
 529 {
 530    const uint8_t *da = a;
 531    const uint8_t *db = b;
 532    return !memcmp(da, db, sizeof(uint32_t));
 533 }
 534
 535 uint32_t
 536 hash_gfx_output(const void *key)
 537 {
 538    const uint8_t *data = key;
 539    return _mesa_hash_data(data, offsetof(struct zink_gfx_output_key, pipeline));
 540 }
 541
 542 static bool
 543 equals_gfx_output(const void *a, const void *b)
 544 {
 545    const uint8_t *da = a;
 546    const uint8_t *db = b;
 547    return !memcmp(da, db, offsetof(struct zink_gfx_output_key, pipeline));
 548 }
 549
 550 ALWAYS_INLINE static void
 551 update_gfx_program_nonseamless(struct zink_context *ctx, struct zink_gfx_program *prog, bool has_nonseamless)
 552 {
 553    struct zink_screen *screen = zink_screen(ctx->base.screen);
 554    if (screen->driconf.inline_uniforms)
 555       update_gfx_shader_modules(ctx, screen, prog,
 556                                 ctx->dirty_gfx_stages & prog->stages_present, &ctx->gfx_pipeline_state,
 557                                 true, has_nonseamless);
 558    else
 559       update_gfx_shader_modules(ctx, screen, prog,
 560                                 ctx->dirty_gfx_stages & prog->stages_present, &ctx->gfx_pipeline_state,
 561                                 false, has_nonseamless);
 562 }
 563
 564 static void
 565 update_gfx_program(struct zink_context *ctx, struct zink_gfx_program *prog)
 566 {
 567    struct zink_screen *screen = zink_screen(ctx->base.screen);
 568    if (screen->info.have_EXT_non_seamless_cube_map)
 569       update_gfx_program_nonseamless(ctx, prog, true);
 570    else
 571       update_gfx_program_nonseamless(ctx, prog, false);
 572 }
 573
 574 void
 575 zink_gfx_program_update(struct zink_context *ctx)
 576 {
 577    if (ctx->last_vertex_stage_dirty) {
 578       gl_shader_stage pstage = ctx->last_vertex_stage->nir->info.stage;
 579       ctx->dirty_gfx_stages |= BITFIELD_BIT(pstage);
 580       memcpy(&ctx->gfx_pipeline_state.shader_keys.key[pstage].key.vs_base,
 581              &ctx->gfx_pipeline_state.shader_keys.last_vertex.key.vs_base,
 582              sizeof(struct zink_vs_key_base));
 583       ctx->last_vertex_stage_dirty = false;
 584    }
 585    if (ctx->gfx_dirty) {
 586       struct zink_gfx_program *prog = NULL;
 587
 588       simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
 589       struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
 590       const uint32_t hash = ctx->gfx_hash;
 591       struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
 592       if (entry) {
 593          prog = (struct zink_gfx_program*)entry->data;
 594          for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
 595             if (prog->stages_present & ~ctx->dirty_gfx_stages & BITFIELD_BIT(i))
 596                ctx->gfx_pipeline_state.modules[i] = prog->modules[i];
 597          }
 598          /* ensure variants are always updated if keys have changed since last use */
 599          ctx->dirty_gfx_stages |= prog->stages_present;
 600          update_gfx_program(ctx, prog);
 601       } else {
 602          ctx->dirty_gfx_stages |= ctx->shader_stages;
 603          prog = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch);
 604          zink_screen_get_pipeline_cache(zink_screen(ctx->base.screen), &prog->base, false);
 605          _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
 606          generate_gfx_program_modules(ctx, zink_screen(ctx->base.screen), prog, &ctx->gfx_pipeline_state);
 607       }
 608       simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
 609       if (prog && prog != ctx->curr_program)
 610          zink_batch_reference_program(&ctx->batch, &prog->base);
 611       if (ctx->curr_program)
 612          ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 613       ctx->curr_program = prog;
 614       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 615       ctx->gfx_dirty = false;
 616    } else if (ctx->dirty_gfx_stages) {
 617       /* remove old hash */
 618       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 619       update_gfx_program(ctx, ctx->curr_program);
 620       /* apply new hash */
 621       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 622    }
 623    ctx->dirty_gfx_stages = 0;
 624 }
 625
 626 ALWAYS_INLINE static void
 627 update_gfx_shader_module_optimal(struct zink_context *ctx, struct zink_gfx_program *prog, gl_shader_stage pstage)
 628 {
 629    struct zink_screen *screen = zink_screen(ctx->base.screen);
 630    if (screen->info.have_EXT_graphics_pipeline_library)
 631       util_queue_fence_wait(&prog->base.cache_fence);
 632    struct zink_shader_module *zm = get_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state);
 633    if (!zm)
 634       zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state);
 635    prog->modules[pstage] = zm->shader;
 636 }
 637
 638 static void
 639 update_gfx_program_optimal(struct zink_context *ctx, struct zink_gfx_program *prog)
 640 {
 641    const union zink_shader_key_optimal *optimal_key = (union zink_shader_key_optimal*)&prog->last_variant_hash;
 642    if (ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_bits != optimal_key->vs_bits) {
 643       update_gfx_shader_module_optimal(ctx, prog, ctx->last_vertex_stage->nir->info.stage);
 644       ctx->gfx_pipeline_state.modules_changed = true;
 645    }
 646    if (ctx->gfx_pipeline_state.shader_keys_optimal.key.fs_bits != optimal_key->fs_bits) {
 647       update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_FRAGMENT);
 648       ctx->gfx_pipeline_state.modules_changed = true;
 649    }
 650    if (prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->tcs.is_generated &&
 651        ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs_bits != optimal_key->tcs_bits) {
 652       update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_TESS_CTRL);
 653       ctx->gfx_pipeline_state.modules_changed = true;
 654    }
 655    prog->last_variant_hash = ctx->gfx_pipeline_state.shader_keys_optimal.key.val;
 656 }
 657
 658 void
 659 zink_gfx_program_update_optimal(struct zink_context *ctx)
 660 {
 661    if (ctx->gfx_dirty) {
 662       struct zink_gfx_program *prog = NULL;
 663       ctx->gfx_pipeline_state.optimal_key = ctx->gfx_pipeline_state.shader_keys_optimal.key.val;
 664       struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
 665       const uint32_t hash = ctx->gfx_hash;
 666       simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
 667       struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
 668
 669       if (ctx->curr_program)
 670          ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 671       if (entry) {
 672          prog = (struct zink_gfx_program*)entry->data;
 673          update_gfx_program_optimal(ctx, prog);
 674       } else {
 675          ctx->dirty_gfx_stages |= ctx->shader_stages;
 676          prog = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch);
 677          zink_screen_get_pipeline_cache(zink_screen(ctx->base.screen), &prog->base, false);
 678          _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
 679          generate_gfx_program_modules_optimal(ctx, zink_screen(ctx->base.screen), prog, &ctx->gfx_pipeline_state);
 680       }
 681       simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
 682       if (prog && prog != ctx->curr_program)
 683          zink_batch_reference_program(&ctx->batch, &prog->base);
 684       ctx->curr_program = prog;
 685       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 686    } else if (ctx->dirty_gfx_stages) {
 687       /* remove old hash */
 688       ctx->gfx_pipeline_state.optimal_key = ctx->gfx_pipeline_state.shader_keys_optimal.key.val;
 689       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 690       update_gfx_program_optimal(ctx, ctx->curr_program);
 691       /* apply new hash */
 692       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
 693    }
 694    ctx->dirty_gfx_stages = 0;
 695    ctx->gfx_dirty = false;
 696    ctx->last_vertex_stage_dirty = false;
 697 }
 698
 699 static void
 700 optimized_compile_job(void *data, void *gdata, int thread_index)
 701 {
 702    struct zink_gfx_pipeline_cache_entry *pc_entry = data;
 703    struct zink_screen *screen = gdata;
 704    VkPipeline pipeline;
 705    if (pc_entry->gkey)
 706       pipeline = zink_create_gfx_pipeline_combined(screen, pc_entry->prog, pc_entry->ikey->pipeline, pc_entry->gkey->pipeline, pc_entry->okey->pipeline, true);
 707    else
 708       pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, &pc_entry->state, pc_entry->state.element_state->binding_map, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true);
 709    if (pipeline) {
 710       pc_entry->unoptimized_pipeline = pc_entry->pipeline;
 711       pc_entry->pipeline = pipeline;
 712    }
 713 }
 714
 715 void
 716 zink_gfx_program_compile_queue(struct zink_context *ctx, struct zink_gfx_pipeline_cache_entry *pc_entry)
 717 {
 718    util_queue_add_job(&zink_screen(ctx->base.screen)->cache_get_thread, pc_entry, &pc_entry->fence, optimized_compile_job, NULL, 0);
 719 }
 720
 721 static void
 722 update_cs_shader_module(struct zink_context *ctx, struct zink_compute_program *comp)
 723 {
 724    struct zink_screen *screen = zink_screen(ctx->base.screen);
 725    struct zink_shader *zs = comp->shader;
 726    VkShaderModule mod;
 727    struct zink_shader_module *zm = NULL;
 728    unsigned inline_size = 0, nonseamless_size = 0;
 729    struct zink_shader_key *key = &ctx->compute_pipeline_state.key;
 730
 731    if (ctx && zs->nir->info.num_inlinable_uniforms &&
 732        ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(MESA_SHADER_COMPUTE)) {
 733       if (screen->is_cpu || comp->inlined_variant_count < ZINK_MAX_INLINED_VARIANTS)
 734          inline_size = zs->nir->info.num_inlinable_uniforms;
 735       else
 736          key->inline_uniforms = false;
 737    }
 738    if (key->base.nonseamless_cube_mask)
 739       nonseamless_size = sizeof(uint32_t);
 740
 741    if (inline_size || nonseamless_size) {
 742       struct util_dynarray *shader_cache = &comp->shader_cache[!!nonseamless_size];
 743       unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
 744       struct zink_shader_module **pzm = shader_cache->data;
 745       for (unsigned i = 0; i < count; i++) {
 746          struct zink_shader_module *iter = pzm[i];
 747          if (!shader_key_matches(iter, key, inline_size,
 748                                  screen->driconf.inline_uniforms,
 749                                  screen->info.have_EXT_non_seamless_cube_map))
 750             continue;
 751          if (i > 0) {
 752             struct zink_shader_module *zero = pzm[0];
 753             pzm[0] = iter;
 754             pzm[i] = zero;
 755          }
 756          zm = iter;
 757       }
 758    } else {
 759       zm = comp->module;
 760    }
 761
 762    if (!zm) {
 763       zm = malloc(sizeof(struct zink_shader_module) + nonseamless_size + inline_size * sizeof(uint32_t));
 764       if (!zm) {
 765          return;
 766       }
 767       mod = zink_shader_compile(screen, zs, comp->shader->nir, key);
 768       if (!mod) {
 769          FREE(zm);
 770          return;
 771       }
 772       zm->shader = mod;
 773       zm->num_uniforms = inline_size;
 774       zm->key_size = 0;
 775       zm->has_nonseamless = !!nonseamless_size;
 776       assert(nonseamless_size || inline_size);
 777       if (nonseamless_size)
 778          memcpy(zm->key, &key->base.nonseamless_cube_mask, nonseamless_size);
 779       if (inline_size)
 780          memcpy(zm->key + nonseamless_size, key->base.inlined_uniform_values, inline_size * sizeof(uint32_t));
 781       zm->hash = shader_module_hash(zm);
 782       zm->default_variant = false;
 783       if (inline_size)
 784          comp->inlined_variant_count++;
 785
 786       /* this is otherwise the default variant, which is stored as comp->module */
 787       if (zm->num_uniforms || nonseamless_size)
 788          util_dynarray_append(&comp->shader_cache[!!nonseamless_size], void*, zm);
 789    }
 790    if (comp->curr == zm)
 791       return;
 792    ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
 793    comp->curr = zm;
 794    ctx->compute_pipeline_state.module_hash = zm->hash;
 795    ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
 796    ctx->compute_pipeline_state.module_changed = true;
 797 }
 798
 799 void
 800 zink_update_compute_program(struct zink_context *ctx)
 801 {
 802    util_queue_fence_wait(&ctx->curr_compute->base.cache_fence);
 803    update_cs_shader_module(ctx, ctx->curr_compute);
 804 }
 805
 806 VkPipelineLayout
 807 zink_pipeline_layout_create(struct zink_screen *screen, VkDescriptorSetLayout *dsl, unsigned num_dsl, bool is_compute)
 808 {
 809    VkPipelineLayoutCreateInfo plci = {0};
 810    plci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
 811
 812    plci.pSetLayouts = dsl;
 813    plci.setLayoutCount = num_dsl;
 814
 815    VkPushConstantRange pcr;
 816    if (!is_compute) {
 817       pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
 818       pcr.offset = 0;
 819       pcr.size = sizeof(struct zink_gfx_push_constant);
 820       plci.pushConstantRangeCount = 1;
 821       plci.pPushConstantRanges = &pcr;
 822    }
 823
 824    VkPipelineLayout layout;
 825    VkResult result = VKSCR(CreatePipelineLayout)(screen->dev, &plci, NULL, &layout);
 826    if (result != VK_SUCCESS) {
 827       mesa_loge("vkCreatePipelineLayout failed (%s)", vk_Result_to_str(result));
 828       return VK_NULL_HANDLE;
 829    }
 830
 831    return layout;
 832 }
 833
 834 static void *
 835 create_program(struct zink_context *ctx, bool is_compute)
 836 {
 837    struct zink_program *pg = rzalloc_size(NULL, is_compute ? sizeof(struct zink_compute_program) : sizeof(struct zink_gfx_program));
 838    if (!pg)
 839       return NULL;
 840
 841    pipe_reference_init(&pg->reference, 1);
 842    util_queue_fence_init(&pg->cache_fence);
 843    pg->is_compute = is_compute;
 844    pg->ctx = ctx;
 845    return (void*)pg;
 846 }
 847
 848 static void
 849 assign_io(struct zink_screen *screen,
 850           struct zink_gfx_program *prog,
 851           struct zink_shader *stages[ZINK_GFX_SHADER_COUNT])
 852 {
 853    struct zink_shader *shaders[MESA_SHADER_STAGES];
 854
 855    /* build array in pipeline order */
 856    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++)
 857       shaders[i] = stages[i];
 858
 859    for (unsigned i = 0; i < MESA_SHADER_FRAGMENT;) {
 860       nir_shader *producer = shaders[i]->nir;
 861       for (unsigned j = i + 1; j < ZINK_GFX_SHADER_COUNT; i++, j++) {
 862          struct zink_shader *consumer = shaders[j];
 863          if (!consumer)
 864             continue;
 865          if (!prog->nir[producer->info.stage])
 866             prog->nir[producer->info.stage] = nir_shader_clone(prog, producer);
 867          if (!prog->nir[j])
 868             prog->nir[j] = nir_shader_clone(prog, consumer->nir);
 869          zink_compiler_assign_io(screen, prog->nir[producer->info.stage], prog->nir[j]);
 870          i = j;
 871          break;
 872       }
 873    }
 874 }
 875
 876 struct zink_gfx_program *
 877 zink_create_gfx_program(struct zink_context *ctx,
 878                         struct zink_shader **stages,
 879                         unsigned vertices_per_patch)
 880 {
 881    struct zink_screen *screen = zink_screen(ctx->base.screen);
 882    struct zink_gfx_program *prog = create_program(ctx, false);
 883    if (!prog)
 884       goto fail;
 885
 886    prog->ctx = ctx;
 887
 888    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
 889       util_dynarray_init(&prog->shader_cache[i][0][0], NULL);
 890       util_dynarray_init(&prog->shader_cache[i][0][1], NULL);
 891       util_dynarray_init(&prog->shader_cache[i][1][0], NULL);
 892       util_dynarray_init(&prog->shader_cache[i][1][1], NULL);
 893       if (stages[i]) {
 894          prog->shaders[i] = stages[i];
 895          prog->stages_present |= BITFIELD_BIT(i);
 896       }
 897    }
 898    bool generated_tcs = false;
 899    if (stages[MESA_SHADER_TESS_EVAL] && !stages[MESA_SHADER_TESS_CTRL]) {
 900       prog->shaders[MESA_SHADER_TESS_EVAL]->tes.generated =
 901       prog->shaders[MESA_SHADER_TESS_CTRL] =
 902         zink_shader_tcs_create(screen, stages[MESA_SHADER_VERTEX], vertices_per_patch);
 903       prog->stages_present |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
 904       generated_tcs = true;
 905    }
 906    prog->stages_remaining = prog->stages_present;
 907
 908    assign_io(screen, prog, prog->shaders);
 909
 910    if (stages[MESA_SHADER_GEOMETRY])
 911       prog->last_vertex_stage = stages[MESA_SHADER_GEOMETRY];
 912    else if (stages[MESA_SHADER_TESS_EVAL])
 913       prog->last_vertex_stage = stages[MESA_SHADER_TESS_EVAL];
 914    else
 915       prog->last_vertex_stage = stages[MESA_SHADER_VERTEX];
 916
 917    for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) {
 918       for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
 919          _mesa_hash_table_init(&prog->pipelines[r][i], prog, NULL, zink_get_gfx_pipeline_eq_func(screen, prog));
 920          /* only need first 3/4 for point/line/tri/patch */
 921          if (screen->info.have_EXT_extended_dynamic_state &&
 922              i == (prog->last_vertex_stage->nir->info.stage == MESA_SHADER_TESS_EVAL ? 4 : 3))
 923             break;
 924       }
 925    }
 926
 927    if (generated_tcs)
 928       _mesa_set_init(&prog->libs, prog, hash_pipeline_lib_generated_tcs, equals_pipeline_lib_generated_tcs);
 929    else
 930       _mesa_set_init(&prog->libs, prog, hash_pipeline_lib, equals_pipeline_lib);
 931
 932    struct mesa_sha1 sctx;
 933    _mesa_sha1_init(&sctx);
 934    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
 935       if (prog->shaders[i]) {
 936          simple_mtx_lock(&prog->shaders[i]->lock);
 937          _mesa_set_add(prog->shaders[i]->programs, prog);
 938          simple_mtx_unlock(&prog->shaders[i]->lock);
 939          zink_gfx_program_reference(screen, NULL, prog);
 940          _mesa_sha1_update(&sctx, prog->shaders[i]->base.sha1, sizeof(prog->shaders[i]->base.sha1));
 941       }
 942    }
 943    _mesa_sha1_final(&sctx, prog->base.sha1);
 944
 945    if (!zink_descriptor_program_init(ctx, &prog->base))
 946       goto fail;
 947
 948    return prog;
 949
 950 fail:
 951    if (prog)
 952       zink_destroy_gfx_program(screen, prog);
 953    return NULL;
 954 }
 955
 956 static uint32_t
 957 hash_compute_pipeline_state_local_size(const void *key)
 958 {
 959    const struct zink_compute_pipeline_state *state = key;
 960    uint32_t hash = _mesa_hash_data(state, offsetof(struct zink_compute_pipeline_state, hash));
 961    hash = XXH32(&state->local_size[0], sizeof(state->local_size), hash);
 962    return hash;
 963 }
 964
 965 static uint32_t
 966 hash_compute_pipeline_state(const void *key)
 967 {
 968    const struct zink_compute_pipeline_state *state = key;
 969    return _mesa_hash_data(state, offsetof(struct zink_compute_pipeline_state, hash));
 970 }
 971
 972 void
 973 zink_program_update_compute_pipeline_state(struct zink_context *ctx, struct zink_compute_program *comp, const uint block[3])
 974 {
 975    if (comp->use_local_size) {
 976       for (int i = 0; i < ARRAY_SIZE(ctx->compute_pipeline_state.local_size); i++) {
 977          if (ctx->compute_pipeline_state.local_size[i] != block[i])
 978             ctx->compute_pipeline_state.dirty = true;
 979          ctx->compute_pipeline_state.local_size[i] = block[i];
 980       }
 981    }
 982 }
 983
 984 static bool
 985 equals_compute_pipeline_state(const void *a, const void *b)
 986 {
 987    const struct zink_compute_pipeline_state *sa = a;
 988    const struct zink_compute_pipeline_state *sb = b;
 989    return !memcmp(a, b, offsetof(struct zink_compute_pipeline_state, hash)) &&
 990           sa->module == sb->module;
 991 }
 992
 993 static bool
 994 equals_compute_pipeline_state_local_size(const void *a, const void *b)
 995 {
 996    const struct zink_compute_pipeline_state *sa = a;
 997    const struct zink_compute_pipeline_state *sb = b;
 998    return !memcmp(a, b, offsetof(struct zink_compute_pipeline_state, hash)) &&
 999           !memcmp(sa->local_size, sb->local_size, sizeof(sa->local_size)) &&
1000           sa->module == sb->module;
1001 }
1002
1003 static void
1004 precompile_compute_job(void *data, void *gdata, int thread_index)
1005 {
1006    struct zink_compute_program *comp = data;
1007    struct zink_screen *screen = gdata;
1008
1009    comp->shader = zink_shader_create(screen, comp->nir, NULL);
1010    comp->curr = comp->module = CALLOC_STRUCT(zink_shader_module);
1011    assert(comp->module);
1012    comp->module->shader = zink_shader_compile(screen, comp->shader, comp->shader->nir, NULL);
1013    assert(comp->module->shader);
1014    util_dynarray_init(&comp->shader_cache[0], NULL);
1015    util_dynarray_init(&comp->shader_cache[1], NULL);
1016
1017    struct blob blob = {0};
1018    blob_init(&blob);
1019    nir_serialize(&blob, comp->shader->nir, true);
1020
1021    struct mesa_sha1 sha1_ctx;
1022    _mesa_sha1_init(&sha1_ctx);
1023    _mesa_sha1_update(&sha1_ctx, blob.data, blob.size);
1024    _mesa_sha1_final(&sha1_ctx, comp->base.sha1);
1025    blob_finish(&blob);
1026
1027    zink_descriptor_program_init(comp->base.ctx, &comp->base);
1028
1029    zink_screen_get_pipeline_cache(screen, &comp->base, true);
1030    if (comp->base.can_precompile)
1031       comp->base_pipeline = zink_create_compute_pipeline(screen, comp, NULL);
1032    if (comp->base_pipeline)
1033       zink_screen_update_pipeline_cache(screen, &comp->base, true);
1034 }
1035
1036 static struct zink_compute_program *
1037 create_compute_program(struct zink_context *ctx, nir_shader *nir)
1038 {
1039    struct zink_screen *screen = zink_screen(ctx->base.screen);
1040    struct zink_compute_program *comp = create_program(ctx, true);
1041    if (!comp)
1042       return NULL;
1043    comp->nir = nir;
1044
1045    comp->use_local_size = !(nir->info.workgroup_size[0] ||
1046                             nir->info.workgroup_size[1] ||
1047                             nir->info.workgroup_size[2]);
1048    comp->base.can_precompile = !comp->use_local_size && (screen->info.have_EXT_non_seamless_cube_map || !zink_shader_has_cubes(nir));
1049    _mesa_hash_table_init(&comp->pipelines, comp, NULL, comp->use_local_size ?
1050                                                        equals_compute_pipeline_state_local_size :
1051                                                        equals_compute_pipeline_state);
1052    util_queue_add_job(&screen->cache_get_thread, comp, &comp->base.cache_fence,
1053                       precompile_compute_job, NULL, 0);
1054    return comp;
1055 }
1056
1057 uint32_t
1058 zink_program_get_descriptor_usage(struct zink_context *ctx, gl_shader_stage stage, enum zink_descriptor_type type)
1059 {
1060    struct zink_shader *zs = NULL;
1061    switch (stage) {
1062    case MESA_SHADER_VERTEX:
1063    case MESA_SHADER_TESS_CTRL:
1064    case MESA_SHADER_TESS_EVAL:
1065    case MESA_SHADER_GEOMETRY:
1066    case MESA_SHADER_FRAGMENT:
1067       zs = ctx->gfx_stages[stage];
1068       break;
1069    case MESA_SHADER_COMPUTE: {
1070       zs = ctx->curr_compute->shader;
1071       break;
1072    }
1073    default:
1074       unreachable("unknown shader type");
1075    }
1076    if (!zs)
1077       return 0;
1078    switch (type) {
1079    case ZINK_DESCRIPTOR_TYPE_UBO:
1080       return zs->ubos_used;
1081    case ZINK_DESCRIPTOR_TYPE_SSBO:
1082       return zs->ssbos_used;
1083    case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW:
1084       return BITSET_TEST_RANGE(zs->nir->info.textures_used, 0, PIPE_MAX_SAMPLERS - 1);
1085    case ZINK_DESCRIPTOR_TYPE_IMAGE:
1086       return BITSET_TEST_RANGE(zs->nir->info.images_used, 0, PIPE_MAX_SAMPLERS - 1);
1087    default:
1088       unreachable("unknown descriptor type!");
1089    }
1090    return 0;
1091 }
1092
1093 bool
1094 zink_program_descriptor_is_buffer(struct zink_context *ctx, gl_shader_stage stage, enum zink_descriptor_type type, unsigned i)
1095 {
1096    struct zink_shader *zs = NULL;
1097    switch (stage) {
1098    case MESA_SHADER_VERTEX:
1099    case MESA_SHADER_TESS_CTRL:
1100    case MESA_SHADER_TESS_EVAL:
1101    case MESA_SHADER_GEOMETRY:
1102    case MESA_SHADER_FRAGMENT:
1103       zs = ctx->gfx_stages[stage];
1104       break;
1105    case MESA_SHADER_COMPUTE: {
1106       zs = ctx->curr_compute->shader;
1107       break;
1108    }
1109    default:
1110       unreachable("unknown shader type");
1111    }
1112    if (!zs)
1113       return false;
1114    return zink_shader_descriptor_is_buffer(zs, type, i);
1115 }
1116
1117 static unsigned
1118 get_num_bindings(struct zink_shader *zs, enum zink_descriptor_type type)
1119 {
1120    switch (type) {
1121    case ZINK_DESCRIPTOR_TYPE_UNIFORMS:
1122       return !!zs->has_uniforms;
1123    case ZINK_DESCRIPTOR_TYPE_UBO:
1124    case ZINK_DESCRIPTOR_TYPE_SSBO:
1125       return zs->num_bindings[type];
1126    default:
1127       break;
1128    }
1129    unsigned num_bindings = 0;
1130    for (int i = 0; i < zs->num_bindings[type]; i++)
1131       num_bindings += zs->bindings[type][i].size;
1132    return num_bindings;
1133 }
1134
1135 unsigned
1136 zink_program_num_bindings_typed(const struct zink_program *pg, enum zink_descriptor_type type)
1137 {
1138    unsigned num_bindings = 0;
1139    if (pg->is_compute) {
1140       struct zink_compute_program *comp = (void*)pg;
1141       return get_num_bindings(comp->shader, type);
1142    }
1143    struct zink_gfx_program *prog = (void*)pg;
1144    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
1145       if (prog->shaders[i])
1146          num_bindings += get_num_bindings(prog->shaders[i], type);
1147    }
1148    return num_bindings;
1149 }
1150
1151 unsigned
1152 zink_program_num_bindings(const struct zink_program *pg)
1153 {
1154    unsigned num_bindings = 0;
1155    for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++)
1156       num_bindings += zink_program_num_bindings_typed(pg, i);
1157    return num_bindings;
1158 }
1159
1160 static void
1161 deinit_program(struct zink_screen *screen, struct zink_program *pg)
1162 {
1163    util_queue_fence_wait(&pg->cache_fence);
1164    if (pg->layout)
1165       VKSCR(DestroyPipelineLayout)(screen->dev, pg->layout, NULL);
1166
1167    if (pg->pipeline_cache)
1168       VKSCR(DestroyPipelineCache)(screen->dev, pg->pipeline_cache, NULL);
1169    zink_descriptor_program_deinit(screen, pg);
1170 }
1171
1172 void
1173 zink_destroy_gfx_program(struct zink_screen *screen,
1174                          struct zink_gfx_program *prog)
1175 {
1176    unsigned max_idx = ARRAY_SIZE(prog->pipelines[0]);
1177    if (screen->info.have_EXT_extended_dynamic_state) {
1178       /* only need first 3/4 for point/line/tri/patch */
1179       if ((prog->stages_present &
1180           (BITFIELD_BIT(MESA_SHADER_TESS_EVAL) | BITFIELD_BIT(MESA_SHADER_GEOMETRY))) ==
1181           BITFIELD_BIT(MESA_SHADER_TESS_EVAL))
1182          max_idx = 4;
1183       else
1184          max_idx = 3;
1185       max_idx++;
1186    }
1187
1188    for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
1189       for (int i = 0; i < max_idx; ++i) {
1190          hash_table_foreach(&prog->pipelines[r][i], entry) {
1191             struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data;
1192
1193             util_queue_fence_wait(&pc_entry->fence);
1194             VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL);
1195             VKSCR(DestroyPipeline)(screen->dev, pc_entry->unoptimized_pipeline, NULL);
1196             free(pc_entry);
1197          }
1198       }
1199    }
1200
1201    deinit_program(screen, &prog->base);
1202
1203    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1204       if (prog->shaders[i]) {
1205          _mesa_set_remove_key(prog->shaders[i]->programs, prog);
1206          prog->shaders[i] = NULL;
1207       }
1208       destroy_shader_cache(screen, &prog->shader_cache[i][0][0]);
1209       destroy_shader_cache(screen, &prog->shader_cache[i][0][1]);
1210       destroy_shader_cache(screen, &prog->shader_cache[i][1][0]);
1211       destroy_shader_cache(screen, &prog->shader_cache[i][1][1]);
1212       ralloc_free(prog->nir[i]);
1213    }
1214
1215    set_foreach_remove(&prog->libs, he) {
1216       struct zink_gfx_library_key *gkey = (void*)he->key;
1217       VKSCR(DestroyPipeline)(screen->dev, gkey->pipeline, NULL);
1218    }
1219
1220    ralloc_free(prog);
1221 }
1222
1223 void
1224 zink_destroy_compute_program(struct zink_screen *screen,
1225                              struct zink_compute_program *comp)
1226 {
1227    deinit_program(screen, &comp->base);
1228
1229    if (comp->shader)
1230       _mesa_set_remove_key(comp->shader->programs, comp);
1231    destroy_shader_cache(screen, &comp->shader_cache[0]);
1232    destroy_shader_cache(screen, &comp->shader_cache[1]);
1233
1234    hash_table_foreach(&comp->pipelines, entry) {
1235       struct compute_pipeline_cache_entry *pc_entry = entry->data;
1236
1237       VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL);
1238       free(pc_entry);
1239    }
1240    VKSCR(DestroyPipeline)(screen->dev, comp->base_pipeline, NULL);
1241    VKSCR(DestroyShaderModule)(screen->dev, comp->module->shader, NULL);
1242    free(comp->module);
1243
1244    ralloc_free(comp);
1245 }
1246
1247 ALWAYS_INLINE static bool
1248 compute_can_shortcut(const struct zink_compute_program *comp)
1249 {
1250    return !comp->use_local_size && !comp->curr->num_uniforms && !comp->curr->has_nonseamless;
1251 }
1252
1253 VkPipeline
1254 zink_get_compute_pipeline(struct zink_screen *screen,
1255                       struct zink_compute_program *comp,
1256                       struct zink_compute_pipeline_state *state)
1257 {
1258    struct hash_entry *entry = NULL;
1259
1260    if (!state->dirty && !state->module_changed)
1261       return state->pipeline;
1262    if (state->dirty) {
1263       if (state->pipeline) //avoid on first hash
1264          state->final_hash ^= state->hash;
1265       if (comp->use_local_size)
1266          state->hash = hash_compute_pipeline_state_local_size(state);
1267       else
1268          state->hash = hash_compute_pipeline_state(state);
1269       state->dirty = false;
1270       state->final_hash ^= state->hash;
1271    }
1272
1273    util_queue_fence_wait(&comp->base.cache_fence);
1274    if (comp->base_pipeline && compute_can_shortcut(comp)) {
1275       state->pipeline = comp->base_pipeline;
1276       return state->pipeline;
1277    }
1278    entry = _mesa_hash_table_search_pre_hashed(&comp->pipelines, state->final_hash, state);
1279
1280    if (!entry) {
1281       VkPipeline pipeline = zink_create_compute_pipeline(screen, comp, state);
1282
1283       if (pipeline == VK_NULL_HANDLE)
1284          return VK_NULL_HANDLE;
1285
1286       zink_screen_update_pipeline_cache(screen, &comp->base, false);
1287       if (compute_can_shortcut(comp)) {
1288          /* don't add base pipeline to cache */
1289          state->pipeline = comp->base_pipeline = pipeline;
1290          return state->pipeline;
1291       }
1292
1293       struct compute_pipeline_cache_entry *pc_entry = CALLOC_STRUCT(compute_pipeline_cache_entry);
1294       if (!pc_entry)
1295          return VK_NULL_HANDLE;
1296
1297       memcpy(&pc_entry->state, state, sizeof(*state));
1298       pc_entry->pipeline = pipeline;
1299
1300       entry = _mesa_hash_table_insert_pre_hashed(&comp->pipelines, state->final_hash, pc_entry, pc_entry);
1301       assert(entry);
1302    }
1303
1304    struct compute_pipeline_cache_entry *cache_entry = entry->data;
1305    state->pipeline = cache_entry->pipeline;
1306    return state->pipeline;
1307 }
1308
1309 ALWAYS_INLINE static void
1310 bind_gfx_stage(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *shader)
1311 {
1312    if (shader && shader->nir->info.num_inlinable_uniforms)
1313       ctx->shader_has_inlinable_uniforms_mask |= 1 << stage;
1314    else
1315       ctx->shader_has_inlinable_uniforms_mask &= ~(1 << stage);
1316
1317    if (ctx->gfx_stages[stage])
1318       ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash;
1319    ctx->gfx_stages[stage] = shader;
1320    ctx->gfx_dirty = ctx->gfx_stages[MESA_SHADER_FRAGMENT] && ctx->gfx_stages[MESA_SHADER_VERTEX];
1321    ctx->gfx_pipeline_state.modules_changed = true;
1322    if (shader) {
1323       ctx->shader_stages |= BITFIELD_BIT(stage);
1324       ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash;
1325    } else {
1326       ctx->gfx_pipeline_state.modules[stage] = VK_NULL_HANDLE;
1327       if (ctx->curr_program)
1328          ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
1329       ctx->curr_program = NULL;
1330       ctx->shader_stages &= ~BITFIELD_BIT(stage);
1331    }
1332 }
1333
1334 static enum pipe_prim_type
1335 gs_output_to_reduced_prim_type(struct shader_info *info)
1336 {
1337    switch (info->gs.output_primitive) {
1338    case SHADER_PRIM_POINTS:
1339       return PIPE_PRIM_POINTS;
1340
1341    case SHADER_PRIM_LINES:
1342    case SHADER_PRIM_LINE_LOOP:
1343    case SHADER_PRIM_LINE_STRIP:
1344    case SHADER_PRIM_LINES_ADJACENCY:
1345    case SHADER_PRIM_LINE_STRIP_ADJACENCY:
1346       return PIPE_PRIM_LINES;
1347
1348    case SHADER_PRIM_TRIANGLES:
1349    case SHADER_PRIM_TRIANGLE_STRIP:
1350    case SHADER_PRIM_TRIANGLE_FAN:
1351    case SHADER_PRIM_TRIANGLES_ADJACENCY:
1352    case SHADER_PRIM_TRIANGLE_STRIP_ADJACENCY:
1353       return PIPE_PRIM_TRIANGLES;
1354
1355    default:
1356       unreachable("unexpected output primitive type");
1357    }
1358 }
1359
1360 static enum pipe_prim_type
1361 update_rast_prim(struct zink_shader *shader)
1362 {
1363    struct shader_info *info = &shader->nir->info;
1364    if (info->stage == MESA_SHADER_GEOMETRY)
1365       return gs_output_to_reduced_prim_type(info);
1366    else if (info->stage == MESA_SHADER_TESS_EVAL) {
1367       if (info->tess.point_mode)
1368          return PIPE_PRIM_POINTS;
1369       else {
1370          switch (info->tess._primitive_mode) {
1371          case TESS_PRIMITIVE_ISOLINES:
1372             return PIPE_PRIM_LINES;
1373          case TESS_PRIMITIVE_TRIANGLES:
1374          case TESS_PRIMITIVE_QUADS:
1375             return PIPE_PRIM_TRIANGLES;
1376          default:
1377             return PIPE_PRIM_MAX;
1378          }
1379       }
1380    }
1381    return PIPE_PRIM_MAX;
1382 }
1383
1384 static void
1385 bind_last_vertex_stage(struct zink_context *ctx)
1386 {
1387    gl_shader_stage old = ctx->last_vertex_stage ? ctx->last_vertex_stage->nir->info.stage : MESA_SHADER_STAGES;
1388    if (ctx->gfx_stages[MESA_SHADER_GEOMETRY])
1389       ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_GEOMETRY];
1390    else if (ctx->gfx_stages[MESA_SHADER_TESS_EVAL])
1391       ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_TESS_EVAL];
1392    else
1393       ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_VERTEX];
1394    gl_shader_stage current = ctx->last_vertex_stage ? ctx->last_vertex_stage->nir->info.stage : MESA_SHADER_VERTEX;
1395
1396    /* update rast_prim */
1397    ctx->gfx_pipeline_state.shader_rast_prim =
1398       ctx->last_vertex_stage ? update_rast_prim(ctx->last_vertex_stage) :
1399                                PIPE_PRIM_MAX;
1400
1401    if (old != current) {
1402       if (!zink_screen(ctx->base.screen)->optimal_keys) {
1403          if (old != MESA_SHADER_STAGES) {
1404             memset(&ctx->gfx_pipeline_state.shader_keys.key[old].key.vs_base, 0, sizeof(struct zink_vs_key_base));
1405             ctx->dirty_gfx_stages |= BITFIELD_BIT(old);
1406          } else {
1407             /* always unset vertex shader values when changing to a non-vs last stage */
1408             memset(&ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_VERTEX].key.vs_base, 0, sizeof(struct zink_vs_key_base));
1409          }
1410       }
1411
1412       unsigned num_viewports = ctx->vp_state.num_viewports;
1413       struct zink_screen *screen = zink_screen(ctx->base.screen);
1414       /* number of enabled viewports is based on whether last vertex stage writes viewport index */
1415       if (ctx->last_vertex_stage) {
1416          if (ctx->last_vertex_stage->nir->info.outputs_written & (VARYING_BIT_VIEWPORT | VARYING_BIT_VIEWPORT_MASK))
1417             ctx->vp_state.num_viewports = MIN2(screen->info.props.limits.maxViewports, PIPE_MAX_VIEWPORTS);
1418          else
1419             ctx->vp_state.num_viewports = 1;
1420       } else {
1421          ctx->vp_state.num_viewports = 1;
1422       }
1423       ctx->vp_state_changed |= num_viewports != ctx->vp_state.num_viewports;
1424       if (!screen->info.have_EXT_extended_dynamic_state) {
1425          if (ctx->gfx_pipeline_state.dyn_state1.num_viewports != ctx->vp_state.num_viewports)
1426             ctx->gfx_pipeline_state.dirty = true;
1427          ctx->gfx_pipeline_state.dyn_state1.num_viewports = ctx->vp_state.num_viewports;
1428       }
1429       ctx->last_vertex_stage_dirty = true;
1430    }
1431 }
1432
1433 static void
1434 zink_bind_vs_state(struct pipe_context *pctx,
1435                    void *cso)
1436 {
1437    struct zink_context *ctx = zink_context(pctx);
1438    if (!cso && !ctx->gfx_stages[MESA_SHADER_VERTEX])
1439       return;
1440    bind_gfx_stage(ctx, MESA_SHADER_VERTEX, cso);
1441    bind_last_vertex_stage(ctx);
1442    if (cso) {
1443       struct zink_shader *zs = cso;
1444       ctx->shader_reads_drawid = BITSET_TEST(zs->nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
1445       ctx->shader_reads_basevertex = BITSET_TEST(zs->nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX);
1446    } else {
1447       ctx->shader_reads_drawid = false;
1448       ctx->shader_reads_basevertex = false;
1449    }
1450 }
1451
1452 /* if gl_SampleMask[] is written to, we have to ensure that we get a shader with the same sample count:
1453  * in GL, samples==1 means ignore gl_SampleMask[]
1454  * in VK, gl_SampleMask[] is never ignored
1455  */
1456 void
1457 zink_update_fs_key_samples(struct zink_context *ctx)
1458 {
1459    if (!ctx->gfx_stages[MESA_SHADER_FRAGMENT])
1460       return;
1461    nir_shader *nir = ctx->gfx_stages[MESA_SHADER_FRAGMENT]->nir;
1462    if (nir->info.outputs_written & (1 << FRAG_RESULT_SAMPLE_MASK)) {
1463       bool samples = zink_get_fs_key(ctx)->samples;
1464       if (samples != (ctx->fb_state.samples > 1))
1465          zink_set_fs_key(ctx)->samples = ctx->fb_state.samples > 1;
1466    }
1467 }
1468
1469 static void
1470 zink_bind_fs_state(struct pipe_context *pctx,
1471                    void *cso)
1472 {
1473    struct zink_context *ctx = zink_context(pctx);
1474    if (!cso && !ctx->gfx_stages[MESA_SHADER_FRAGMENT])
1475       return;
1476    bind_gfx_stage(ctx, MESA_SHADER_FRAGMENT, cso);
1477    ctx->fbfetch_outputs = 0;
1478    if (cso) {
1479       nir_shader *nir = ctx->gfx_stages[MESA_SHADER_FRAGMENT]->nir;
1480       if (nir->info.fs.uses_fbfetch_output) {
1481          nir_foreach_shader_out_variable(var, ctx->gfx_stages[MESA_SHADER_FRAGMENT]->nir) {
1482             if (var->data.fb_fetch_output)
1483                ctx->fbfetch_outputs |= BITFIELD_BIT(var->data.location - FRAG_RESULT_DATA0);
1484          }
1485       }
1486       zink_update_fs_key_samples(ctx);
1487       if (zink_screen(pctx->screen)->info.have_EXT_rasterization_order_attachment_access) {
1488          if (ctx->gfx_pipeline_state.rast_attachment_order != nir->info.fs.uses_fbfetch_output)
1489             ctx->gfx_pipeline_state.dirty = true;
1490          ctx->gfx_pipeline_state.rast_attachment_order = nir->info.fs.uses_fbfetch_output;
1491       }
1492    }
1493    zink_update_fbfetch(ctx);
1494 }
1495
1496 static void
1497 zink_bind_gs_state(struct pipe_context *pctx,
1498                    void *cso)
1499 {
1500    struct zink_context *ctx = zink_context(pctx);
1501    if (!cso && !ctx->gfx_stages[MESA_SHADER_GEOMETRY])
1502       return;
1503    bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, cso);
1504    bind_last_vertex_stage(ctx);
1505 }
1506
1507 static void
1508 zink_bind_tcs_state(struct pipe_context *pctx,
1509                    void *cso)
1510 {
1511    bind_gfx_stage(zink_context(pctx), MESA_SHADER_TESS_CTRL, cso);
1512 }
1513
1514 static void
1515 zink_bind_tes_state(struct pipe_context *pctx,
1516                    void *cso)
1517 {
1518    struct zink_context *ctx = zink_context(pctx);
1519    if (!cso && !ctx->gfx_stages[MESA_SHADER_TESS_EVAL])
1520       return;
1521    if (!!ctx->gfx_stages[MESA_SHADER_TESS_EVAL] != !!cso) {
1522       if (!cso) {
1523          /* if unsetting a TESS that uses a generated TCS, ensure the TCS is unset */
1524          if (ctx->gfx_stages[MESA_SHADER_TESS_EVAL]->tes.generated)
1525             ctx->gfx_stages[MESA_SHADER_TESS_CTRL] = NULL;
1526       }
1527    }
1528    bind_gfx_stage(ctx, MESA_SHADER_TESS_EVAL, cso);
1529    bind_last_vertex_stage(ctx);
1530 }
1531
1532 static void *
1533 zink_create_cs_state(struct pipe_context *pctx,
1534                      const struct pipe_compute_state *shader)
1535 {
1536    struct nir_shader *nir;
1537    if (shader->ir_type != PIPE_SHADER_IR_NIR)
1538       nir = zink_tgsi_to_nir(pctx->screen, shader->prog);
1539    else
1540       nir = (struct nir_shader *)shader->prog;
1541
1542    if (nir->info.uses_bindless)
1543       zink_descriptors_init_bindless(zink_context(pctx));
1544
1545    return create_compute_program(zink_context(pctx), nir);
1546 }
1547
1548 static void
1549 zink_bind_cs_state(struct pipe_context *pctx,
1550                    void *cso)
1551 {
1552    struct zink_context *ctx = zink_context(pctx);
1553    struct zink_compute_program *comp = cso;
1554    if (comp && comp->nir->info.num_inlinable_uniforms)
1555       ctx->shader_has_inlinable_uniforms_mask |= 1 << MESA_SHADER_COMPUTE;
1556    else
1557       ctx->shader_has_inlinable_uniforms_mask &= ~(1 << MESA_SHADER_COMPUTE);
1558
1559    if (ctx->curr_compute) {
1560       zink_batch_reference_program(&ctx->batch, &ctx->curr_compute->base);
1561       ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
1562       ctx->compute_pipeline_state.module = VK_NULL_HANDLE;
1563       ctx->compute_pipeline_state.module_hash = 0;
1564    }
1565    ctx->compute_pipeline_state.dirty = true;
1566    ctx->curr_compute = comp;
1567    if (comp && comp != ctx->curr_compute) {
1568       ctx->compute_pipeline_state.module_hash = ctx->curr_compute->curr->hash;
1569       if (util_queue_fence_is_signalled(&comp->base.cache_fence))
1570          ctx->compute_pipeline_state.module = ctx->curr_compute->curr->shader;
1571       ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
1572       if (ctx->compute_pipeline_state.key.base.nonseamless_cube_mask)
1573          ctx->compute_dirty = true;
1574    }
1575    zink_select_launch_grid(ctx);
1576 }
1577
1578 static void
1579 zink_delete_cs_shader_state(struct pipe_context *pctx, void *cso)
1580 {
1581    struct zink_compute_program *comp = cso;
1582    zink_compute_program_reference(zink_screen(pctx->screen), &comp, NULL);
1583 }
1584
1585 void
1586 zink_delete_shader_state(struct pipe_context *pctx, void *cso)
1587 {
1588    zink_shader_free(zink_screen(pctx->screen), cso);
1589 }
1590
1591 void *
1592 zink_create_gfx_shader_state(struct pipe_context *pctx, const struct pipe_shader_state *shader)
1593 {
1594    nir_shader *nir;
1595    if (shader->type != PIPE_SHADER_IR_NIR)
1596       nir = zink_tgsi_to_nir(pctx->screen, shader->tokens);
1597    else
1598       nir = (struct nir_shader *)shader->ir.nir;
1599
1600    if (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_fbfetch_output)
1601       zink_descriptor_util_init_fbfetch(zink_context(pctx));
1602    if (nir->info.uses_bindless)
1603       zink_descriptors_init_bindless(zink_context(pctx));
1604
1605    return zink_shader_create(zink_screen(pctx->screen), nir, &shader->stream_output);
1606 }
1607
1608 static void
1609 zink_delete_cached_shader_state(struct pipe_context *pctx, void *cso)
1610 {
1611    struct zink_screen *screen = zink_screen(pctx->screen);
1612    util_shader_reference(pctx, &screen->shaders, &cso, NULL);
1613 }
1614
1615 static void *
1616 zink_create_cached_shader_state(struct pipe_context *pctx, const struct pipe_shader_state *shader)
1617 {
1618    bool cache_hit;
1619    struct zink_screen *screen = zink_screen(pctx->screen);
1620    return util_live_shader_cache_get(pctx, &screen->shaders, shader, &cache_hit);
1621 }
1622
1623 struct zink_gfx_library_key *
1624 zink_create_pipeline_lib(struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
1625 {
1626    struct zink_gfx_library_key *gkey = rzalloc(prog, struct zink_gfx_library_key);
1627    gkey->optimal_key = state->optimal_key;
1628    assert(gkey->optimal_key);
1629    memcpy(gkey->modules, prog->modules, sizeof(gkey->modules));
1630    gkey->pipeline = zink_create_gfx_pipeline_library(screen, prog);
1631    _mesa_set_add(&prog->libs, gkey);
1632    return gkey;
1633 }
1634
1635 static const char *
1636 print_exe_stages(VkShaderStageFlags stages)
1637 {
1638    if (stages == VK_SHADER_STAGE_VERTEX_BIT)
1639       return "VS";
1640    if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_GEOMETRY_BIT))
1641       return "VS+GS";
1642    if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))
1643       return "VS+TCS+TES";
1644    if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_GEOMETRY_BIT))
1645       return "VS+TCS+TES+GS";
1646    if (stages == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1647       return "TCS";
1648    if (stages == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1649       return "TES";
1650    if (stages == VK_SHADER_STAGE_GEOMETRY_BIT)
1651       return "GS";
1652    if (stages == VK_SHADER_STAGE_FRAGMENT_BIT)
1653       return "FS";
1654    if (stages == VK_SHADER_STAGE_COMPUTE_BIT)
1655       return "CS";
1656    unreachable("unhandled combination of stages!");
1657 }
1658
1659 static void
1660 print_pipeline_stats(struct zink_screen *screen, VkPipeline pipeline)
1661 {
1662    VkPipelineInfoKHR pinfo = {
1663      VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR,
1664      NULL,
1665      pipeline
1666    };
1667    unsigned exe_count = 0;
1668    VkPipelineExecutablePropertiesKHR props[10] = {0};
1669    for (unsigned i = 0; i < ARRAY_SIZE(props); i++) {
1670       props[i].sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_PROPERTIES_KHR;
1671       props[i].pNext = NULL;
1672    }
1673    VKSCR(GetPipelineExecutablePropertiesKHR)(screen->dev, &pinfo, &exe_count, NULL);
1674    VKSCR(GetPipelineExecutablePropertiesKHR)(screen->dev, &pinfo, &exe_count, props);
1675    printf("PIPELINE STATISTICS:");
1676    for (unsigned e = 0; e < exe_count; e++) {
1677       VkPipelineExecutableInfoKHR info = {
1678          VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR,
1679          NULL,
1680          pipeline,
1681          e
1682       };
1683       unsigned count = 0;
1684       printf("\n\t%s (%s): ", print_exe_stages(props[e].stages), props[e].name);
1685       VkPipelineExecutableStatisticKHR *stats = NULL;
1686       VKSCR(GetPipelineExecutableStatisticsKHR)(screen->dev, &info, &count, NULL);
1687       stats = calloc(count, sizeof(VkPipelineExecutableStatisticKHR));
1688       for (unsigned i = 0; i < count; i++)
1689          stats[i].sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_STATISTIC_KHR;
1690       VKSCR(GetPipelineExecutableStatisticsKHR)(screen->dev, &info, &count, stats);
1691
1692       for (unsigned i = 0; i < count; i++) {
1693          if (i)
1694             printf(", ");
1695          switch (stats[i].format) {
1696          case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_BOOL32_KHR:
1697             printf("%s: %u", stats[i].name, stats[i].value.b32);
1698             break;
1699          case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR:
1700             printf("%s: %" PRIi64, stats[i].name, stats[i].value.i64);
1701             break;
1702          case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR:
1703             printf("%s: %" PRIu64, stats[i].name, stats[i].value.u64);
1704             break;
1705          case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR:
1706             printf("%s: %g", stats[i].name, stats[i].value.f64);
1707             break;
1708          default:
1709             unreachable("unknown statistic");
1710          }
1711       }
1712    }
1713    printf("\n");
1714 }
1715
1716 static void
1717 precompile_job(void *data, void *gdata, int thread_index)
1718 {
1719    struct zink_screen *screen = gdata;
1720    struct zink_gfx_program *prog = data;
1721
1722    struct zink_gfx_pipeline_state state = {0};
1723    state.shader_keys_optimal.key.vs_base.last_vertex_stage = true;
1724    state.shader_keys_optimal.key.tcs.patch_vertices = 3; //random guess, generated tcs precompile is hard
1725    state.optimal_key = state.shader_keys_optimal.key.val;
1726    generate_gfx_program_modules_optimal(NULL, screen, prog, &state);
1727    zink_screen_get_pipeline_cache(screen, &prog->base, true);
1728    zink_create_pipeline_lib(screen, prog, &state);
1729    zink_screen_update_pipeline_cache(screen, &prog->base, true);
1730 }
1731
1732 static void
1733 zink_link_gfx_shader(struct pipe_context *pctx, void **shaders)
1734 {
1735    struct zink_context *ctx = zink_context(pctx);
1736    struct zink_shader **zshaders = (struct zink_shader **)shaders;
1737    if (shaders[MESA_SHADER_COMPUTE])
1738       return;
1739    /* can't precompile fixedfunc */
1740    if (!shaders[MESA_SHADER_VERTEX] || !shaders[MESA_SHADER_FRAGMENT])
1741       return;
1742    unsigned hash = 0;
1743    unsigned shader_stages = 0;
1744    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
1745       if (zshaders[i]) {
1746          hash ^= zshaders[i]->hash;
1747          shader_stages |= BITFIELD_BIT(i);
1748       }
1749    }
1750    unsigned tess_stages = BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL);
1751    unsigned tess = shader_stages & tess_stages;
1752    /* can't do fixedfunc tes either */
1753    if (tess && !shaders[MESA_SHADER_TESS_EVAL])
1754       return;
1755    struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(shader_stages)];
1756    simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
1757    /* link can be called repeatedly with the same shaders: ignore */
1758    if (_mesa_hash_table_search_pre_hashed(ht, hash, shaders)) {
1759       simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
1760       return;
1761    }
1762    struct zink_gfx_program *prog = zink_create_gfx_program(ctx, zshaders, 3);
1763    u_foreach_bit(i, shader_stages)
1764       assert(prog->shaders[i]);
1765    _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
1766    simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
1767    if (zink_debug & ZINK_DEBUG_SHADERDB) {
1768       struct zink_screen *screen = zink_screen(pctx->screen);
1769       if (screen->optimal_keys)
1770          generate_gfx_program_modules_optimal(ctx, screen,  prog, &ctx->gfx_pipeline_state);
1771       else
1772          generate_gfx_program_modules(ctx, screen,  prog, &ctx->gfx_pipeline_state);
1773       VkPipeline pipeline = zink_create_gfx_pipeline(screen, prog, &ctx->gfx_pipeline_state, ctx->gfx_pipeline_state.element_state->binding_map,  shaders[MESA_SHADER_TESS_EVAL] ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, true);
1774       print_pipeline_stats(screen, pipeline);
1775    } else {
1776       util_queue_add_job(&zink_screen(pctx->screen)->cache_get_thread, prog, &prog->base.cache_fence, precompile_job, NULL, 0);
1777    }
1778 }
1779
1780 void
1781 zink_program_init(struct zink_context *ctx)
1782 {
1783    ctx->base.create_vs_state = zink_create_cached_shader_state;
1784    ctx->base.bind_vs_state = zink_bind_vs_state;
1785    ctx->base.delete_vs_state = zink_delete_cached_shader_state;
1786
1787    ctx->base.create_fs_state = zink_create_cached_shader_state;
1788    ctx->base.bind_fs_state = zink_bind_fs_state;
1789    ctx->base.delete_fs_state = zink_delete_cached_shader_state;
1790
1791    ctx->base.create_gs_state = zink_create_cached_shader_state;
1792    ctx->base.bind_gs_state = zink_bind_gs_state;
1793    ctx->base.delete_gs_state = zink_delete_cached_shader_state;
1794
1795    ctx->base.create_tcs_state = zink_create_cached_shader_state;
1796    ctx->base.bind_tcs_state = zink_bind_tcs_state;
1797    ctx->base.delete_tcs_state = zink_delete_cached_shader_state;
1798
1799    ctx->base.create_tes_state = zink_create_cached_shader_state;
1800    ctx->base.bind_tes_state = zink_bind_tes_state;
1801    ctx->base.delete_tes_state = zink_delete_cached_shader_state;
1802
1803    ctx->base.create_compute_state = zink_create_cs_state;
1804    ctx->base.bind_compute_state = zink_bind_cs_state;
1805    ctx->base.delete_compute_state = zink_delete_cs_shader_state;
1806
1807    if (zink_screen(ctx->base.screen)->info.have_EXT_vertex_input_dynamic_state)
1808       _mesa_set_init(&ctx->gfx_inputs, ctx, hash_gfx_input_dynamic, equals_gfx_input_dynamic);
1809    else
1810       _mesa_set_init(&ctx->gfx_inputs, ctx, hash_gfx_input, equals_gfx_input);
1811    if (zink_screen(ctx->base.screen)->have_full_ds3)
1812       _mesa_set_init(&ctx->gfx_outputs, ctx, hash_gfx_output_ds3, equals_gfx_output_ds3);
1813    else
1814       _mesa_set_init(&ctx->gfx_outputs, ctx, hash_gfx_output, equals_gfx_output);
1815    /* validate struct packing */
1816    STATIC_ASSERT(offsetof(struct zink_gfx_output_key, sample_mask) == sizeof(uint32_t));
1817    STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, vertex_buffers_enabled_mask) - offsetof(struct zink_gfx_pipeline_state, input) ==
1818                  offsetof(struct zink_gfx_input_key, vertex_buffers_enabled_mask) - offsetof(struct zink_gfx_input_key, input));
1819    STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, vertex_strides) - offsetof(struct zink_gfx_pipeline_state, input) ==
1820                  offsetof(struct zink_gfx_input_key, vertex_strides) - offsetof(struct zink_gfx_input_key, input));
1821    STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, element_state) - offsetof(struct zink_gfx_pipeline_state, input) ==
1822                  offsetof(struct zink_gfx_input_key, element_state) - offsetof(struct zink_gfx_input_key, input));
1823
1824    STATIC_ASSERT(sizeof(union zink_shader_key_optimal) == sizeof(uint32_t));
1825
1826    if (zink_screen(ctx->base.screen)->info.have_EXT_graphics_pipeline_library || zink_debug & ZINK_DEBUG_SHADERDB)
1827       ctx->base.link_shader = zink_link_gfx_shader;
1828 }
1829
1830 bool
1831 zink_set_rasterizer_discard(struct zink_context *ctx, bool disable)
1832 {
1833    bool value = disable ? false : (ctx->rast_state ? ctx->rast_state->base.rasterizer_discard : false);
1834    bool changed = ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard != value;
1835    ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard = value;
1836    if (!changed)
1837       return false;
1838    if (!zink_screen(ctx->base.screen)->info.have_EXT_extended_dynamic_state2)
1839       ctx->gfx_pipeline_state.dirty |= true;
1840    ctx->rasterizer_discard_changed = true;
1841    return true;
1842 }
1843
1844 void
1845 zink_driver_thread_add_job(struct pipe_screen *pscreen, void *data,
1846                            struct util_queue_fence *fence,
1847                            pipe_driver_thread_func execute,
1848                            pipe_driver_thread_func cleanup,
1849                            const size_t job_size)
1850 {
1851    struct zink_screen *screen = zink_screen(pscreen);
1852    util_queue_add_job(&screen->cache_get_thread, data, fence, execute, cleanup, job_size);
1853 }