zink: fix store subsitution in `lower_pv_mode_gs_store`
[platform/upstream/mesa.git] / src / gallium / drivers / zink / zink_compiler.c
1 /*
2  * Copyright 2018 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23
24 #include "nir_opcodes.h"
25 #include "zink_context.h"
26 #include "zink_compiler.h"
27 #include "zink_descriptors.h"
28 #include "zink_program.h"
29 #include "zink_screen.h"
30 #include "nir_to_spirv/nir_to_spirv.h"
31
32 #include "pipe/p_state.h"
33
34 #include "nir.h"
35 #include "nir_xfb_info.h"
36 #include "nir/nir_draw_helpers.h"
37 #include "compiler/nir/nir_builder.h"
38 #include "compiler/nir/nir_serialize.h"
39 #include "compiler/nir/nir_builtin_builder.h"
40
41 #include "nir/tgsi_to_nir.h"
42 #include "tgsi/tgsi_dump.h"
43 #include "tgsi/tgsi_from_mesa.h"
44
45 #include "util/u_memory.h"
46
47 #include "compiler/spirv/nir_spirv.h"
48 #include "vulkan/util/vk_util.h"
49
50 bool
51 zink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask);
52
53
54 static void
55 copy_vars(nir_builder *b, nir_deref_instr *dst, nir_deref_instr *src)
56 {
57    assert(glsl_get_bare_type(dst->type) == glsl_get_bare_type(src->type));
58    if (glsl_type_is_struct(dst->type)) {
59       for (unsigned i = 0; i < glsl_get_length(dst->type); ++i) {
60          copy_vars(b, nir_build_deref_struct(b, dst, i), nir_build_deref_struct(b, src, i));
61       }
62    } else if (glsl_type_is_array_or_matrix(dst->type)) {
63       unsigned count = glsl_type_is_array(dst->type) ? glsl_array_size(dst->type) : glsl_get_matrix_columns(dst->type);
64       for (unsigned i = 0; i < count; i++) {
65          copy_vars(b, nir_build_deref_array_imm(b, dst, i), nir_build_deref_array_imm(b, src, i));
66       }
67    } else {
68       nir_ssa_def *load = nir_load_deref(b, src);
69       nir_store_deref(b, dst, load, BITFIELD_MASK(load->num_components));
70    }
71 }
72
73 #define SIZEOF_FIELD(type, field) sizeof(((type *)0)->field)
74
75 static void
76 create_gfx_pushconst(nir_shader *nir)
77 {
78 #define PUSHCONST_MEMBER(member_idx, field)                                                                     \
79 fields[member_idx].type =                                                                                       \
80    glsl_array_type(glsl_uint_type(), SIZEOF_FIELD(struct zink_gfx_push_constant, field) / sizeof(uint32_t), 0); \
81 fields[member_idx].name = ralloc_asprintf(nir, #field);                                                         \
82 fields[member_idx].offset = offsetof(struct zink_gfx_push_constant, field);
83
84    nir_variable *pushconst;
85    /* create compatible layout for the ntv push constant loader */
86    struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, ZINK_GFX_PUSHCONST_MAX);
87    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED, draw_mode_is_indexed);
88    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_ID, draw_id);
89    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, framebuffer_is_layered);
90    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL, default_inner_level);
91    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL, default_outer_level);
92    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN, line_stipple_pattern);
93    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_VIEWPORT_SCALE, viewport_scale);
94    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_WIDTH, line_width);
95
96    pushconst = nir_variable_create(nir, nir_var_mem_push_const,
97                                    glsl_struct_type(fields, ZINK_GFX_PUSHCONST_MAX, "struct", false),
98                                    "gfx_pushconst");
99    pushconst->data.location = INT_MAX; //doesn't really matter
100
101 #undef PUSHCONST_MEMBER
102 }
103
104 static bool
105 lower_64bit_vertex_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
106 {
107    if (instr->type != nir_instr_type_intrinsic)
108       return false;
109    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
110    if (intr->intrinsic != nir_intrinsic_load_deref)
111       return false;
112    nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
113    if (var->data.mode != nir_var_shader_in)
114       return false;
115    if (!glsl_type_is_64bit(var->type) || !glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) < 3)
116       return false;
117
118    /* create second variable for the split */
119    nir_variable *var2 = nir_variable_clone(var, b->shader);
120    /* split new variable into second slot */
121    var2->data.driver_location++;
122    nir_shader_add_variable(b->shader, var2);
123
124    unsigned total_num_components = glsl_get_vector_elements(var->type);
125    /* new variable is the second half of the dvec */
126    var2->type = glsl_vector_type(glsl_get_base_type(var->type), glsl_get_vector_elements(var->type) - 2);
127    /* clamp original variable to a dvec2 */
128    var->type = glsl_vector_type(glsl_get_base_type(var->type), 2);
129
130    b->cursor = nir_after_instr(instr);
131
132    /* this is the first load instruction for the first half of the dvec3/4 components */
133    nir_ssa_def *load = nir_load_var(b, var);
134    /* this is the second load instruction for the second half of the dvec3/4 components */
135    nir_ssa_def *load2 = nir_load_var(b, var2);
136
137    nir_ssa_def *def[4];
138    /* create a new dvec3/4 comprised of all the loaded components from both variables */
139    def[0] = nir_vector_extract(b, load, nir_imm_int(b, 0));
140    def[1] = nir_vector_extract(b, load, nir_imm_int(b, 1));
141    def[2] = nir_vector_extract(b, load2, nir_imm_int(b, 0));
142    if (total_num_components == 4)
143       def[3] = nir_vector_extract(b, load2, nir_imm_int(b, 1));
144    nir_ssa_def *new_vec = nir_vec(b, def, total_num_components);
145    /* use the assembled dvec3/4 for all other uses of the load */
146    nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, new_vec,
147                                   new_vec->parent_instr);
148
149    /* remove the original instr and its deref chain */
150    nir_instr *parent = intr->src[0].ssa->parent_instr;
151    nir_instr_remove(instr);
152    nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
153
154    return true;
155 }
156
157 /* mesa/gallium always provides UINT versions of 64bit formats:
158  * - rewrite loads as 32bit vec loads
159  * - cast back to 64bit
160  */
161 static bool
162 lower_64bit_uint_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
163 {
164    if (instr->type != nir_instr_type_intrinsic)
165       return false;
166    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
167    if (intr->intrinsic != nir_intrinsic_load_deref)
168       return false;
169    nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
170    if (var->data.mode != nir_var_shader_in)
171       return false;
172    if (glsl_get_bit_size(var->type) != 64 || glsl_get_base_type(var->type) >= GLSL_TYPE_SAMPLER)
173       return false;
174
175    unsigned num_components = glsl_get_vector_elements(var->type);
176    enum glsl_base_type base_type;
177    switch (glsl_get_base_type(var->type)) {
178    case GLSL_TYPE_UINT64:
179       base_type = GLSL_TYPE_UINT;
180       break;
181    case GLSL_TYPE_INT64:
182       base_type = GLSL_TYPE_INT;
183       break;
184    case GLSL_TYPE_DOUBLE:
185       base_type = GLSL_TYPE_FLOAT;
186       break;
187    default:
188       unreachable("unknown 64-bit vertex attribute format!");
189    }
190    var->type = glsl_vector_type(base_type, num_components * 2);
191
192    b->cursor = nir_after_instr(instr);
193
194    nir_ssa_def *load = nir_load_var(b, var);
195    nir_ssa_def *casted[2];
196    for (unsigned i = 0; i < num_components; i++)
197      casted[i] = nir_pack_64_2x32(b, nir_channels(b, load, BITFIELD_RANGE(i * 2, 2)));
198    nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec(b, casted, num_components));
199
200    /* remove the original instr and its deref chain */
201    nir_instr *parent = intr->src[0].ssa->parent_instr;
202    nir_instr_remove(instr);
203    nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
204
205    return true;
206 }
207
208 /* "64-bit three- and four-component vectors consume two consecutive locations."
209  *  - 14.1.4. Location Assignment
210  *
211  * this pass splits dvec3 and dvec4 vertex inputs into a dvec2 and a double/dvec2 which
212  * are assigned to consecutive locations, loaded separately, and then assembled back into a
213  * composite value that's used in place of the original loaded ssa src
214  */
215 static bool
216 lower_64bit_vertex_attribs(nir_shader *shader)
217 {
218    if (shader->info.stage != MESA_SHADER_VERTEX)
219       return false;
220
221    bool progress = nir_shader_instructions_pass(shader, lower_64bit_vertex_attribs_instr, nir_metadata_dominance, NULL);
222    progress |= nir_shader_instructions_pass(shader, lower_64bit_uint_attribs_instr, nir_metadata_dominance, NULL);
223    return progress;
224 }
225
226 static bool
227 lower_basevertex_instr(nir_builder *b, nir_instr *in, void *data)
228 {
229    if (in->type != nir_instr_type_intrinsic)
230       return false;
231    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
232    if (instr->intrinsic != nir_intrinsic_load_base_vertex)
233       return false;
234
235    b->cursor = nir_after_instr(&instr->instr);
236    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
237    load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED));
238    nir_intrinsic_set_range(load, 4);
239    load->num_components = 1;
240    nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_mode_is_indexed");
241    nir_builder_instr_insert(b, &load->instr);
242
243    nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel,
244                                           nir_build_alu(b, nir_op_ieq, &load->dest.ssa, nir_imm_int(b, 1), NULL, NULL),
245                                           &instr->dest.ssa,
246                                           nir_imm_int(b, 0),
247                                           NULL);
248
249    nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, composite,
250                                   composite->parent_instr);
251    return true;
252 }
253
254 static bool
255 lower_basevertex(nir_shader *shader)
256 {
257    if (shader->info.stage != MESA_SHADER_VERTEX)
258       return false;
259
260    if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
261       return false;
262
263    return nir_shader_instructions_pass(shader, lower_basevertex_instr, nir_metadata_dominance, NULL);
264 }
265
266
267 static bool
268 lower_drawid_instr(nir_builder *b, nir_instr *in, void *data)
269 {
270    if (in->type != nir_instr_type_intrinsic)
271       return false;
272    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
273    if (instr->intrinsic != nir_intrinsic_load_draw_id)
274       return false;
275
276    b->cursor = nir_before_instr(&instr->instr);
277    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
278    load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_ID));
279    nir_intrinsic_set_range(load, 4);
280    load->num_components = 1;
281    nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_id");
282    nir_builder_instr_insert(b, &load->instr);
283
284    nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
285
286    return true;
287 }
288
289 static bool
290 lower_drawid(nir_shader *shader)
291 {
292    if (shader->info.stage != MESA_SHADER_VERTEX)
293       return false;
294
295    if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
296       return false;
297
298    return nir_shader_instructions_pass(shader, lower_drawid_instr, nir_metadata_dominance, NULL);
299 }
300
301 struct lower_gl_point_state {
302    nir_variable *gl_pos_out;
303    nir_variable *gl_point_size;
304 };
305
306 static bool
307 lower_gl_point_gs_instr(nir_builder *b, nir_instr *instr, void *data)
308 {
309    struct lower_gl_point_state *state = data;
310    nir_ssa_def *vp_scale, *pos;
311
312    if (instr->type != nir_instr_type_intrinsic)
313       return false;
314
315    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
316    if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
317        intrin->intrinsic != nir_intrinsic_emit_vertex)
318       return false;
319
320    if (nir_intrinsic_stream_id(intrin) != 0)
321       return false;
322
323    if (intrin->intrinsic == nir_intrinsic_end_primitive_with_counter ||
324          intrin->intrinsic == nir_intrinsic_end_primitive) {
325       nir_instr_remove(&intrin->instr);
326       return true;
327    }
328
329    b->cursor = nir_before_instr(instr);
330
331    // viewport-map endpoints
332    nir_ssa_def *vp_const_pos = nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE);
333    vp_scale = nir_load_push_constant(b, 2, 32, vp_const_pos, .base = 1, .range = 2);
334
335    // Load point info values
336    nir_ssa_def *point_size = nir_load_var(b, state->gl_point_size);
337    nir_ssa_def *point_pos = nir_load_var(b, state->gl_pos_out);
338
339    // w_delta = gl_point_size / width_viewport_size_scale * gl_Position.w
340    nir_ssa_def *w_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 0));
341    w_delta = nir_fmul(b, w_delta, nir_channel(b, point_pos, 3));
342    // halt_w_delta = w_delta / 2
343    nir_ssa_def *half_w_delta = nir_fmul(b, w_delta, nir_imm_float(b, 0.5));
344
345    // h_delta = gl_point_size / height_viewport_size_scale * gl_Position.w
346    nir_ssa_def *h_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 1));
347    h_delta = nir_fmul(b, h_delta, nir_channel(b, point_pos, 3));
348    // halt_h_delta = h_delta / 2
349    nir_ssa_def *half_h_delta = nir_fmul(b, h_delta, nir_imm_float(b, 0.5));
350
351    nir_ssa_def *point_dir[4][2] = {
352       { nir_imm_float(b, -1), nir_imm_float(b, -1) },
353       { nir_imm_float(b, -1), nir_imm_float(b, 1) },
354       { nir_imm_float(b, 1), nir_imm_float(b, -1) },
355       { nir_imm_float(b, 1), nir_imm_float(b, 1) }
356    };
357
358    nir_ssa_def *point_pos_x = nir_channel(b, point_pos, 0);
359    nir_ssa_def *point_pos_y = nir_channel(b, point_pos, 1);
360
361    for (size_t i = 0; i < 4; i++) {
362       pos = nir_vec4(b,
363                      nir_ffma(b, half_w_delta, point_dir[i][0], point_pos_x),
364                      nir_ffma(b, half_h_delta, point_dir[i][1], point_pos_y),
365                      nir_channel(b, point_pos, 2),
366                      nir_channel(b, point_pos, 3));
367
368       nir_store_var(b, state->gl_pos_out, pos, 0xf);
369
370       nir_emit_vertex(b);
371    }
372
373    nir_end_primitive(b);
374
375    nir_instr_remove(&intrin->instr);
376
377    return true;
378 }
379
380 static bool
381 lower_gl_point_gs(nir_shader *shader)
382 {
383    struct lower_gl_point_state state;
384    nir_builder b;
385
386    shader->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
387    shader->info.gs.vertices_out *= 4;
388
389    // Gets the gl_Position in and out
390    state.gl_pos_out =
391       nir_find_variable_with_location(shader, nir_var_shader_out,
392                                       VARYING_SLOT_POS);
393    state.gl_point_size =
394       nir_find_variable_with_location(shader, nir_var_shader_out,
395                                       VARYING_SLOT_PSIZ);
396
397    // if position in or gl_PointSize aren't written, we have nothing to do
398    if (!state.gl_pos_out || !state.gl_point_size)
399       return false;
400
401    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
402    nir_builder_init(&b, entry);
403    b.cursor = nir_before_cf_list(&entry->body);
404
405    return nir_shader_instructions_pass(shader, lower_gl_point_gs_instr,
406                                        nir_metadata_dominance, &state);
407 }
408
409 struct lower_pv_mode_state {
410    nir_variable *varyings[VARYING_SLOT_MAX];
411    nir_variable *pos_counter;
412    nir_variable *out_pos_counter;
413    nir_variable *ring_offset;
414    unsigned ring_size;
415    unsigned primitive_vert_count;
416    unsigned prim;
417 };
418
419 static nir_ssa_def*
420 lower_pv_mode_gs_ring_index(nir_builder *b,
421                             struct lower_pv_mode_state *state,
422                             nir_ssa_def *index)
423 {
424    nir_ssa_def *ring_offset = nir_load_var(b, state->ring_offset);
425    return nir_imod(b, nir_iadd(b, index, ring_offset),
426                       nir_imm_int(b, state->ring_size));
427 }
428
429 /* Given the final deref of chain of derefs this function will walk up the chain
430  * until it finds a var deref.
431  *
432  * It will then recreate an identical chain that ends with the provided deref.
433  */
434 static nir_deref_instr*
435 replicate_derefs(nir_builder *b, nir_deref_instr *old, nir_deref_instr *new)
436 {
437    nir_deref_instr *parent = nir_src_as_deref(old->parent);
438    switch(old->deref_type) {
439    case nir_deref_type_var:
440       return new;
441    case nir_deref_type_array:
442       assert(old->arr.index.is_ssa);
443       return nir_build_deref_array(b, replicate_derefs(b, parent, new), old->arr.index.ssa);
444    case nir_deref_type_struct:
445       return nir_build_deref_struct(b, replicate_derefs(b, parent, new), old->strct.index);
446    case nir_deref_type_array_wildcard:
447    case nir_deref_type_ptr_as_array:
448    case nir_deref_type_cast:
449       unreachable("unexpected deref type");
450    }
451    unreachable("impossible deref type");
452 }
453
454 static bool
455 lower_pv_mode_gs_store(nir_builder *b,
456                        nir_intrinsic_instr *intrin,
457                        struct lower_pv_mode_state *state)
458 {
459    b->cursor = nir_before_instr(&intrin->instr);
460    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
461    if (nir_deref_mode_is(deref, nir_var_shader_out)) {
462       nir_variable *var = nir_deref_instr_get_variable(deref);
463
464       gl_varying_slot location = var->data.location;
465       assert(state->varyings[location]);
466       assert(intrin->src[1].is_ssa);
467       nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
468       nir_ssa_def *index = lower_pv_mode_gs_ring_index(b, state, pos_counter);
469       nir_deref_instr *varying_deref = nir_build_deref_var(b, state->varyings[location]);
470       nir_deref_instr *ring_deref = nir_build_deref_array(b, varying_deref, index);
471       // recreate the chain of deref that lead to the store.
472       nir_deref_instr *new_top_deref = replicate_derefs(b, deref, ring_deref);
473       nir_store_deref(b, new_top_deref, intrin->src[1].ssa, nir_intrinsic_write_mask(intrin));
474       nir_instr_remove(&intrin->instr);
475       return true;
476    }
477
478    return false;
479 }
480
481 static void
482 lower_pv_mode_emit_rotated_prim(nir_builder *b,
483                                 struct lower_pv_mode_state *state,
484                                 nir_ssa_def *current_vertex)
485 {
486    nir_ssa_def *two = nir_imm_int(b, 2);
487    nir_ssa_def *three = nir_imm_int(b, 3);
488    bool is_triangle = state->primitive_vert_count == 3;
489    /* This shader will always see the last three vertices emitted by the user gs.
490     * The following table is used to to rotate primitives within a strip generated
491     * by the user gs such that the last vertex becomes the first.
492     *
493     * [lines, tris][even/odd index][vertex mod 3]
494     */
495    static const unsigned vert_maps[2][2][3] = {
496       {{1, 0, 0}, {1, 0, 0}},
497       {{2, 0, 1}, {2, 1, 0}}
498    };
499    /* When the primive supplied to the gs comes from a strip, the last provoking vertex
500     * is either the last or the second, depending on whether the triangle is at an odd
501     * or even position within the strip.
502     *
503     * odd or even primitive within draw
504     */
505    nir_ssa_def *odd_prim = nir_imod(b, nir_load_primitive_id(b), two);
506    for (unsigned i = 0; i < state->primitive_vert_count; i++) {
507       /* odd or even triangle within strip emitted by user GS
508        * this is handled using the table
509        */
510       nir_ssa_def *odd_user_prim = nir_imod(b, current_vertex, two);
511       unsigned offset_even = vert_maps[is_triangle][0][i];
512       unsigned offset_odd = vert_maps[is_triangle][1][i];
513       nir_ssa_def *offset_even_value = nir_imm_int(b, offset_even);
514       nir_ssa_def *offset_odd_value = nir_imm_int(b, offset_odd);
515       nir_ssa_def *rotated_i = nir_bcsel(b, nir_b2b1(b, odd_user_prim),
516                                             offset_odd_value, offset_even_value);
517       /* Here we account for how triangles are provided to the gs from a strip.
518        * For even primitives we rotate by 3, meaning we do nothing.
519        * For odd primitives we rotate by 2, combined with the previous rotation this
520        * means the second vertex becomes the last.
521        */
522       if (state->prim == ZINK_PVE_PRIMITIVE_TRISTRIP)
523         rotated_i = nir_imod(b, nir_iadd(b, rotated_i,
524                                             nir_isub(b, three,
525                                                         odd_prim)),
526                                             three);
527       /* Triangles that come from fans are provided to the gs the same way as
528        * odd triangles from a strip so always rotate by 2.
529        */
530       else if (state->prim == ZINK_PVE_PRIMITIVE_FAN)
531         rotated_i = nir_imod(b, nir_iadd_imm(b, rotated_i, 2),
532                                 three);
533       rotated_i = nir_iadd(b, rotated_i, current_vertex);
534       nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
535          gl_varying_slot location = var->data.location;
536          if (state->varyings[location]) {
537             nir_ssa_def *index = lower_pv_mode_gs_ring_index(b, state, rotated_i);
538             nir_deref_instr *value = nir_build_deref_array(b, nir_build_deref_var(b, state->varyings[location]), index);
539             copy_vars(b, nir_build_deref_var(b, var), value);
540          }
541       }
542       nir_emit_vertex(b);
543    }
544 }
545
546 static bool
547 lower_pv_mode_gs_emit_vertex(nir_builder *b,
548                              nir_intrinsic_instr *intrin,
549                              struct lower_pv_mode_state *state)
550 {
551    b->cursor = nir_before_instr(&intrin->instr);
552
553    // increment pos_counter
554    nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
555    nir_store_var(b, state->pos_counter, nir_iadd_imm(b, pos_counter, 1), 1);
556
557    nir_instr_remove(&intrin->instr);
558    return true;
559 }
560
561 static bool
562 lower_pv_mode_gs_end_primitive(nir_builder *b,
563                                nir_intrinsic_instr *intrin,
564                                struct lower_pv_mode_state *state)
565 {
566    b->cursor = nir_before_instr(&intrin->instr);
567
568    nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
569    nir_push_loop(b);
570    {
571       nir_ssa_def *out_pos_counter = nir_load_var(b, state->out_pos_counter);
572       nir_push_if(b, nir_ilt(b, nir_isub(b, pos_counter, out_pos_counter),
573                                 nir_imm_int(b, state->primitive_vert_count)));
574       nir_jump(b, nir_jump_break);
575       nir_pop_if(b, NULL);
576
577       lower_pv_mode_emit_rotated_prim(b, state, out_pos_counter);
578       nir_end_primitive(b);
579
580       nir_store_var(b, state->out_pos_counter, nir_iadd_imm(b, out_pos_counter, 1), 1);
581    }
582    nir_pop_loop(b, NULL);
583    /* Set the ring offset such that when position 0 is
584     * read we get the last value written
585     */
586    nir_store_var(b, state->ring_offset, pos_counter, 1);
587    nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
588    nir_store_var(b, state->out_pos_counter, nir_imm_int(b, 0), 1);
589
590    nir_instr_remove(&intrin->instr);
591    return true;
592 }
593
594 static bool
595 lower_pv_mode_gs_instr(nir_builder *b, nir_instr *instr, void *data)
596 {
597    if (instr->type != nir_instr_type_intrinsic)
598       return false;
599
600    struct lower_pv_mode_state *state = data;
601    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
602
603    switch (intrin->intrinsic) {
604    case nir_intrinsic_store_deref:
605       return lower_pv_mode_gs_store(b, intrin, state);
606    case nir_intrinsic_copy_deref:
607       unreachable("should be lowered");
608    case nir_intrinsic_emit_vertex_with_counter:
609    case nir_intrinsic_emit_vertex:
610       return lower_pv_mode_gs_emit_vertex(b, intrin, state);
611    case nir_intrinsic_end_primitive:
612    case nir_intrinsic_end_primitive_with_counter:
613       return lower_pv_mode_gs_end_primitive(b, intrin, state);
614    default:
615       return false;
616    }
617 }
618
619 static unsigned int
620 lower_pv_mode_vertices_for_prim(enum shader_prim prim)
621 {
622    switch (prim) {
623    case SHADER_PRIM_POINTS:
624       return 1;
625    case SHADER_PRIM_LINE_STRIP:
626       return 2;
627    case SHADER_PRIM_TRIANGLE_STRIP:
628       return 3;
629    default:
630       unreachable("unsupported primitive for gs output");
631    }
632 }
633
634 static bool
635 lower_pv_mode_gs(nir_shader *shader, unsigned prim)
636 {
637    nir_builder b;
638    struct lower_pv_mode_state state;
639    memset(state.varyings, 0, sizeof(state.varyings));
640
641    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
642    nir_builder_init(&b, entry);
643    b.cursor = nir_before_cf_list(&entry->body);
644
645    state.primitive_vert_count =
646       lower_pv_mode_vertices_for_prim(shader->info.gs.output_primitive);
647    state.ring_size = shader->info.gs.vertices_out;
648
649    nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
650       gl_varying_slot location = var->data.location;
651
652       char name[100];
653       snprintf(name, sizeof(name), "__tmp_primverts_%d", location);
654       state.varyings[location] =
655          nir_local_variable_create(entry,
656                                    glsl_array_type(var->type,
657                                                    state.ring_size,
658                                                    false),
659                                    name);
660    }
661
662    state.pos_counter = nir_local_variable_create(entry,
663                                                  glsl_uint_type(),
664                                                  "__pos_counter");
665
666    state.out_pos_counter = nir_local_variable_create(entry,
667                                                      glsl_uint_type(),
668                                                      "__out_pos_counter");
669
670    state.ring_offset = nir_local_variable_create(entry,
671                                                  glsl_uint_type(),
672                                                  "__ring_offset");
673
674    state.prim = prim;
675
676    // initialize pos_counter and out_pos_counter
677    nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
678    nir_store_var(&b, state.out_pos_counter, nir_imm_int(&b, 0), 1);
679    nir_store_var(&b, state.ring_offset, nir_imm_int(&b, 0), 1);
680
681    shader->info.gs.vertices_out = (shader->info.gs.vertices_out -
682                                    (state.primitive_vert_count - 1)) *
683                                   state.primitive_vert_count;
684    return nir_shader_instructions_pass(shader, lower_pv_mode_gs_instr,
685                                        nir_metadata_dominance, &state);
686 }
687
688 struct lower_line_stipple_state {
689    nir_variable *pos_out;
690    nir_variable *stipple_out;
691    nir_variable *prev_pos;
692    nir_variable *pos_counter;
693    nir_variable *stipple_counter;
694    bool line_rectangular;
695 };
696
697 static nir_ssa_def *
698 viewport_map(nir_builder *b, nir_ssa_def *vert,
699              nir_ssa_def *scale)
700 {
701    nir_ssa_def *w_recip = nir_frcp(b, nir_channel(b, vert, 3));
702    nir_ssa_def *ndc_point = nir_fmul(b, nir_channels(b, vert, 0x3),
703                                         w_recip);
704    return nir_fmul(b, ndc_point, scale);
705 }
706
707 static bool
708 lower_line_stipple_gs_instr(nir_builder *b, nir_instr *instr, void *data)
709 {
710    struct lower_line_stipple_state *state = data;
711    if (instr->type != nir_instr_type_intrinsic)
712       return false;
713
714    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
715    if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
716        intrin->intrinsic != nir_intrinsic_emit_vertex)
717       return false;
718
719    b->cursor = nir_before_instr(instr);
720
721    nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
722    // viewport-map endpoints
723    nir_ssa_def *vp_scale = nir_load_push_constant(b, 2, 32,
724                                                   nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE),
725                                                   .base = 1,
726                                                   .range = 2);
727    nir_ssa_def *prev = nir_load_var(b, state->prev_pos);
728    nir_ssa_def *curr = nir_load_var(b, state->pos_out);
729    prev = viewport_map(b, prev, vp_scale);
730    curr = viewport_map(b, curr, vp_scale);
731
732    // calculate length of line
733    nir_ssa_def *len;
734    if (state->line_rectangular)
735       len = nir_fast_distance(b, prev, curr);
736    else {
737       nir_ssa_def *diff = nir_fabs(b, nir_fsub(b, prev, curr));
738       len = nir_fmax(b, nir_channel(b, diff, 0), nir_channel(b, diff, 1));
739    }
740    // update stipple_counter
741    nir_store_var(b, state->stipple_counter,
742                     nir_fadd(b, nir_load_var(b, state->stipple_counter),
743                                 len), 1);
744    nir_pop_if(b, NULL);
745    // emit stipple out
746    nir_copy_var(b, state->stipple_out, state->stipple_counter);
747    nir_copy_var(b, state->prev_pos, state->pos_out);
748
749    // update prev_pos and pos_counter for next vertex
750    b->cursor = nir_after_instr(instr);
751    nir_store_var(b, state->pos_counter,
752                     nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
753                                     1), 1);
754
755    return true;
756 }
757
758 static bool
759 lower_line_stipple_gs(nir_shader *shader, bool line_rectangular)
760 {
761    nir_builder b;
762    struct lower_line_stipple_state state;
763
764    state.pos_out =
765       nir_find_variable_with_location(shader, nir_var_shader_out,
766                                       VARYING_SLOT_POS);
767
768    // if position isn't written, we have nothing to do
769    if (!state.pos_out)
770       return false;
771
772    state.stipple_out = nir_variable_create(shader, nir_var_shader_out,
773                                            glsl_float_type(),
774                                            "__stipple");
775    state.stipple_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
776    state.stipple_out->data.driver_location = shader->num_outputs++;
777    state.stipple_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
778    shader->info.outputs_written |= BITFIELD64_BIT(state.stipple_out->data.location);
779
780    // create temp variables
781    state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
782                                         glsl_vec4_type(),
783                                         "__prev_pos");
784    state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
785                                            glsl_uint_type(),
786                                            "__pos_counter");
787    state.stipple_counter = nir_variable_create(shader, nir_var_shader_temp,
788                                                glsl_float_type(),
789                                                "__stipple_counter");
790
791    state.line_rectangular = line_rectangular;
792    // initialize pos_counter and stipple_counter
793    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
794    nir_builder_init(&b, entry);
795    b.cursor = nir_before_cf_list(&entry->body);
796    nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
797    nir_store_var(&b, state.stipple_counter, nir_imm_float(&b, 0), 1);
798
799    return nir_shader_instructions_pass(shader, lower_line_stipple_gs_instr,
800                                        nir_metadata_dominance, &state);
801 }
802
803 static bool
804 lower_line_stipple_fs(nir_shader *shader)
805 {
806    nir_builder b;
807    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
808    nir_builder_init(&b, entry);
809
810    // create stipple counter
811    nir_variable *stipple = nir_variable_create(shader, nir_var_shader_in,
812                                                glsl_float_type(),
813                                                "__stipple");
814    stipple->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
815    stipple->data.driver_location = shader->num_inputs++;
816    stipple->data.location = MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
817    shader->info.inputs_read |= BITFIELD64_BIT(stipple->data.location);
818
819    nir_variable *sample_mask_out =
820       nir_find_variable_with_location(shader, nir_var_shader_out,
821                                       FRAG_RESULT_SAMPLE_MASK);
822    if (!sample_mask_out) {
823       sample_mask_out = nir_variable_create(shader, nir_var_shader_out,
824                                         glsl_uint_type(), "sample_mask");
825       sample_mask_out->data.driver_location = shader->num_outputs++;
826       sample_mask_out->data.location = FRAG_RESULT_SAMPLE_MASK;
827    }
828
829    b.cursor = nir_after_cf_list(&entry->body);
830
831    nir_ssa_def *pattern = nir_load_push_constant(&b, 1, 32,
832                                                  nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN),
833                                                  .base = 1);
834    nir_ssa_def *factor = nir_i2f32(&b, nir_ishr_imm(&b, pattern, 16));
835    pattern = nir_iand_imm(&b, pattern, 0xffff);
836
837    nir_ssa_def *sample_mask_in = nir_load_sample_mask_in(&b);
838    nir_variable *v = nir_local_variable_create(entry, glsl_uint_type(), NULL);
839    nir_variable *sample_mask = nir_local_variable_create(entry, glsl_uint_type(), NULL);
840    nir_store_var(&b, v, sample_mask_in, 1);
841    nir_store_var(&b, sample_mask, sample_mask_in, 1);
842    nir_push_loop(&b);
843    {
844       nir_ssa_def *value = nir_load_var(&b, v);
845       nir_ssa_def *index = nir_ufind_msb(&b, value);
846       nir_ssa_def *index_mask = nir_ishl(&b, nir_imm_int(&b, 1), index);
847       nir_ssa_def *new_value = nir_ixor(&b, value, index_mask);
848       nir_store_var(&b, v, new_value,  1);
849       nir_push_if(&b, nir_ieq_imm(&b, value, 0));
850       nir_jump(&b, nir_jump_break);
851       nir_pop_if(&b, NULL);
852
853       nir_ssa_def *stipple_pos =
854          nir_interp_deref_at_sample(&b, 1, 32,
855             &nir_build_deref_var(&b, stipple)->dest.ssa, index);
856       stipple_pos = nir_fmod(&b, nir_fdiv(&b, stipple_pos, factor),
857                                  nir_imm_float(&b, 16.0));
858       stipple_pos = nir_f2i32(&b, stipple_pos);
859       nir_ssa_def *bit =
860          nir_iand_imm(&b, nir_ishr(&b, pattern, stipple_pos), 1);
861       nir_push_if(&b, nir_ieq_imm(&b, bit, 0));
862       {
863          nir_ssa_def *value = nir_load_var(&b, sample_mask);
864          value = nir_ixor(&b, value, index_mask);
865          nir_store_var(&b, sample_mask, value, 1);
866       }
867       nir_pop_if(&b, NULL);
868    }
869    nir_pop_loop(&b, NULL);
870    nir_store_var(&b, sample_mask_out, nir_load_var(&b, sample_mask), 1);
871
872    return true;
873 }
874
875 struct lower_line_smooth_state {
876    nir_variable *pos_out;
877    nir_variable *line_coord_out;
878    nir_variable *prev_pos;
879    nir_variable *pos_counter;
880    nir_variable *prev_varyings[VARYING_SLOT_MAX][4],
881                 *varyings[VARYING_SLOT_MAX][4]; // location_frac
882 };
883
884 static bool
885 lower_line_smooth_gs_store(nir_builder *b,
886                            nir_intrinsic_instr *intrin,
887                            struct lower_line_smooth_state *state)
888 {
889    b->cursor = nir_before_instr(&intrin->instr);
890    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
891    if (nir_deref_mode_is(deref, nir_var_shader_out)) {
892       nir_variable *var = nir_deref_instr_get_variable(deref);
893
894       // we take care of position elsewhere
895       gl_varying_slot location = var->data.location;
896       unsigned location_frac = var->data.location_frac;
897       if (location != VARYING_SLOT_POS) {
898          assert(state->varyings[location]);
899          assert(intrin->src[1].is_ssa);
900          nir_store_var(b, state->varyings[location][location_frac],
901                        intrin->src[1].ssa,
902                        nir_intrinsic_write_mask(intrin));
903          nir_instr_remove(&intrin->instr);
904          return true;
905       }
906    }
907
908    return false;
909 }
910
911 static bool
912 lower_line_smooth_gs_emit_vertex(nir_builder *b,
913                                  nir_intrinsic_instr *intrin,
914                                  struct lower_line_smooth_state *state)
915 {
916    b->cursor = nir_before_instr(&intrin->instr);
917
918    nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
919    nir_ssa_def *vp_scale = nir_load_push_constant(b, 2, 32,
920                                                   nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE),
921                                                   .base = 1,
922                                                   .range = 2);
923    nir_ssa_def *prev = nir_load_var(b, state->prev_pos);
924    nir_ssa_def *curr = nir_load_var(b, state->pos_out);
925    nir_ssa_def *prev_vp = viewport_map(b, prev, vp_scale);
926    nir_ssa_def *curr_vp = viewport_map(b, curr, vp_scale);
927
928    nir_ssa_def *width = nir_load_push_constant(b, 1, 32,
929                                                nir_imm_int(b, ZINK_GFX_PUSHCONST_LINE_WIDTH),
930                                                .base = 1);
931    nir_ssa_def *half_width = nir_fadd_imm(b, nir_fmul_imm(b, width, 0.5), 0.5);
932
933    const unsigned yx[2] = { 1, 0 };
934    nir_ssa_def *vec = nir_fsub(b, curr_vp, prev_vp);
935    nir_ssa_def *len = nir_fast_length(b, vec);
936    nir_ssa_def *dir = nir_normalize(b, vec);
937    nir_ssa_def *half_length = nir_fmul_imm(b, len, 0.5);
938    half_length = nir_fadd_imm(b, half_length, 0.5);
939
940    nir_ssa_def *vp_scale_rcp = nir_frcp(b, vp_scale);
941    nir_ssa_def *tangent =
942       nir_fmul(b,
943                nir_fmul(b,
944                         nir_swizzle(b, dir, yx, 2),
945                         nir_imm_vec2(b, 1.0, -1.0)),
946                vp_scale_rcp);
947    tangent = nir_fmul(b, tangent, half_width);
948    tangent = nir_pad_vector_imm_int(b, tangent, 0, 4);
949    dir = nir_fmul_imm(b, nir_fmul(b, dir, vp_scale_rcp), 0.5);
950
951    nir_ssa_def *line_offets[8] = {
952       nir_fadd(b, tangent, nir_fneg(b, dir)),
953       nir_fadd(b, nir_fneg(b, tangent), nir_fneg(b, dir)),
954       tangent,
955       nir_fneg(b, tangent),
956       tangent,
957       nir_fneg(b, tangent),
958       nir_fadd(b, tangent, dir),
959       nir_fadd(b, nir_fneg(b, tangent), dir),
960    };
961    nir_ssa_def *line_coord =
962       nir_vec4(b, half_width, half_width, half_length, half_length);
963    nir_ssa_def *line_coords[8] = {
964       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,  -1,  1)),
965       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,  -1,  1)),
966       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,   0,  1)),
967       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,   0,  1)),
968       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,   0,  1)),
969       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,   0,  1)),
970       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,   1,  1)),
971       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,   1,  1)),
972    };
973
974    /* emit first end-cap, and start line */
975    for (int i = 0; i < 4; ++i) {
976       nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
977          gl_varying_slot location = var->data.location;
978          unsigned location_frac = var->data.location_frac;
979          if (state->prev_varyings[location][location_frac])
980             nir_copy_var(b, var, state->prev_varyings[location][location_frac]);
981       }
982       nir_store_var(b, state->pos_out,
983                     nir_fadd(b, prev, nir_fmul(b, line_offets[i],
984                              nir_channel(b, prev, 3))), 0xf);
985       nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
986       nir_emit_vertex(b);
987    }
988
989    /* finish line and emit last end-cap */
990    for (int i = 4; i < 8; ++i) {
991       nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
992          gl_varying_slot location = var->data.location;
993          unsigned location_frac = var->data.location_frac;
994          if (state->varyings[location][location_frac])
995             nir_copy_var(b, var, state->varyings[location][location_frac]);
996       }
997       nir_store_var(b, state->pos_out,
998                     nir_fadd(b, curr, nir_fmul(b, line_offets[i],
999                              nir_channel(b, curr, 3))), 0xf);
1000       nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
1001       nir_emit_vertex(b);
1002    }
1003    nir_end_primitive(b);
1004
1005    nir_pop_if(b, NULL);
1006
1007    nir_copy_var(b, state->prev_pos, state->pos_out);
1008    nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
1009       gl_varying_slot location = var->data.location;
1010       unsigned location_frac = var->data.location_frac;
1011       if (state->varyings[location][location_frac])
1012          nir_copy_var(b, state->prev_varyings[location][location_frac], state->varyings[location][location_frac]);
1013    }
1014
1015    // update prev_pos and pos_counter for next vertex
1016    b->cursor = nir_after_instr(&intrin->instr);
1017    nir_store_var(b, state->pos_counter,
1018                     nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
1019                                     1), 1);
1020
1021    nir_instr_remove(&intrin->instr);
1022    return true;
1023 }
1024
1025 static bool
1026 lower_line_smooth_gs_end_primitive(nir_builder *b,
1027                                    nir_intrinsic_instr *intrin,
1028                                    struct lower_line_smooth_state *state)
1029 {
1030    b->cursor = nir_before_instr(&intrin->instr);
1031
1032    // reset line counter
1033    nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
1034
1035    nir_instr_remove(&intrin->instr);
1036    return true;
1037 }
1038
1039 static bool
1040 lower_line_smooth_gs_instr(nir_builder *b, nir_instr *instr, void *data)
1041 {
1042    if (instr->type != nir_instr_type_intrinsic)
1043       return false;
1044
1045    struct lower_line_smooth_state *state = data;
1046    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1047
1048    switch (intrin->intrinsic) {
1049    case nir_intrinsic_store_deref:
1050       return lower_line_smooth_gs_store(b, intrin, state);
1051    case nir_intrinsic_copy_deref:
1052       unreachable("should be lowered");
1053    case nir_intrinsic_emit_vertex_with_counter:
1054    case nir_intrinsic_emit_vertex:
1055       return lower_line_smooth_gs_emit_vertex(b, intrin, state);
1056    case nir_intrinsic_end_primitive:
1057    case nir_intrinsic_end_primitive_with_counter:
1058       return lower_line_smooth_gs_end_primitive(b, intrin, state);
1059    default:
1060       return false;
1061    }
1062 }
1063
1064 static bool
1065 lower_line_smooth_gs(nir_shader *shader)
1066 {
1067    nir_builder b;
1068    struct lower_line_smooth_state state;
1069
1070    memset(state.varyings, 0, sizeof(state.varyings));
1071    memset(state.prev_varyings, 0, sizeof(state.prev_varyings));
1072    nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
1073       gl_varying_slot location = var->data.location;
1074       unsigned location_frac = var->data.location_frac;
1075       if (location == VARYING_SLOT_POS)
1076          continue;
1077
1078       char name[100];
1079       snprintf(name, sizeof(name), "__tmp_%d_%d", location, location_frac);
1080       state.varyings[location][location_frac] =
1081          nir_variable_create(shader, nir_var_shader_temp,
1082                               var->type, name);
1083
1084       snprintf(name, sizeof(name), "__tmp_prev_%d_%d", location, location_frac);
1085       state.prev_varyings[location][location_frac] =
1086          nir_variable_create(shader, nir_var_shader_temp,
1087                               var->type, name);
1088    }
1089
1090    state.pos_out =
1091       nir_find_variable_with_location(shader, nir_var_shader_out,
1092                                       VARYING_SLOT_POS);
1093
1094    // if position isn't written, we have nothing to do
1095    if (!state.pos_out)
1096       return false;
1097
1098    state.line_coord_out =
1099       nir_variable_create(shader, nir_var_shader_out, glsl_vec4_type(),
1100                           "__line_coord");
1101    state.line_coord_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
1102    state.line_coord_out->data.driver_location = shader->num_outputs++;
1103    state.line_coord_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
1104    shader->info.outputs_written |= BITFIELD64_BIT(state.line_coord_out->data.location);
1105
1106    // create temp variables
1107    state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
1108                                         glsl_vec4_type(),
1109                                         "__prev_pos");
1110    state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
1111                                            glsl_uint_type(),
1112                                            "__pos_counter");
1113
1114    // initialize pos_counter
1115    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
1116    nir_builder_init(&b, entry);
1117    b.cursor = nir_before_cf_list(&entry->body);
1118    nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
1119
1120    shader->info.gs.vertices_out = 8 * shader->info.gs.vertices_out;
1121    shader->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
1122
1123    return nir_shader_instructions_pass(shader, lower_line_smooth_gs_instr,
1124                                        nir_metadata_dominance, &state);
1125 }
1126
1127 static bool
1128 lower_line_smooth_fs(nir_shader *shader, bool lower_stipple)
1129 {
1130    int dummy;
1131    nir_builder b;
1132
1133    nir_variable *stipple_counter = NULL, *stipple_pattern = NULL;
1134    if (lower_stipple) {
1135       stipple_counter = nir_variable_create(shader, nir_var_shader_in,
1136                                             glsl_float_type(),
1137                                             "__stipple");
1138       stipple_counter->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
1139       stipple_counter->data.driver_location = shader->num_inputs++;
1140       stipple_counter->data.location =
1141          MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
1142       shader->info.inputs_read |= BITFIELD64_BIT(stipple_counter->data.location);
1143
1144       stipple_pattern = nir_variable_create(shader, nir_var_shader_temp,
1145                                             glsl_uint_type(),
1146                                             "stipple_pattern");
1147
1148       // initialize stipple_pattern
1149       nir_function_impl *entry = nir_shader_get_entrypoint(shader);
1150       nir_builder_init(&b, entry);
1151       b.cursor = nir_before_cf_list(&entry->body);
1152       nir_ssa_def *pattern = nir_load_push_constant(&b, 1, 32,
1153                                                    nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN),
1154                                                    .base = 1);
1155       nir_store_var(&b, stipple_pattern, pattern, 1);
1156    }
1157
1158    nir_lower_aaline_fs(shader, &dummy, stipple_counter, stipple_pattern);
1159    return true;
1160 }
1161
1162 static bool
1163 lower_dual_blend(nir_shader *shader)
1164 {
1165    bool progress = false;
1166    nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1);
1167    if (var) {
1168       var->data.location = FRAG_RESULT_DATA0;
1169       var->data.index = 1;
1170       progress = true;
1171    }
1172    nir_shader_preserve_all_metadata(shader);
1173    return progress;
1174 }
1175
1176 static bool
1177 lower_64bit_pack_instr(nir_builder *b, nir_instr *instr, void *data)
1178 {
1179    if (instr->type != nir_instr_type_alu)
1180       return false;
1181    nir_alu_instr *alu_instr = (nir_alu_instr *) instr;
1182    if (alu_instr->op != nir_op_pack_64_2x32 &&
1183        alu_instr->op != nir_op_unpack_64_2x32)
1184       return false;
1185    b->cursor = nir_before_instr(&alu_instr->instr);
1186    nir_ssa_def *src = nir_ssa_for_alu_src(b, alu_instr, 0);
1187    nir_ssa_def *dest;
1188    switch (alu_instr->op) {
1189    case nir_op_pack_64_2x32:
1190       dest = nir_pack_64_2x32_split(b, nir_channel(b, src, 0), nir_channel(b, src, 1));
1191       break;
1192    case nir_op_unpack_64_2x32:
1193       dest = nir_vec2(b, nir_unpack_64_2x32_split_x(b, src), nir_unpack_64_2x32_split_y(b, src));
1194       break;
1195    default:
1196       unreachable("Impossible opcode");
1197    }
1198    nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa, dest);
1199    nir_instr_remove(&alu_instr->instr);
1200    return true;
1201 }
1202
1203 static bool
1204 lower_64bit_pack(nir_shader *shader)
1205 {
1206    return nir_shader_instructions_pass(shader, lower_64bit_pack_instr,
1207                                        nir_metadata_block_index | nir_metadata_dominance, NULL);
1208 }
1209
1210 nir_shader *
1211 zink_create_quads_emulation_gs(const nir_shader_compiler_options *options,
1212                                const nir_shader *prev_stage)
1213 {
1214    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY,
1215                                                   options,
1216                                                   "filled quad gs");
1217
1218    nir_shader *nir = b.shader;
1219    nir->info.gs.input_primitive = SHADER_PRIM_LINES_ADJACENCY;
1220    nir->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
1221    nir->info.gs.vertices_in = 4;
1222    nir->info.gs.vertices_out = 6;
1223    nir->info.gs.invocations = 1;
1224    nir->info.gs.active_stream_mask = 1;
1225
1226    nir->info.has_transform_feedback_varyings = prev_stage->info.has_transform_feedback_varyings;
1227    memcpy(nir->info.xfb_stride, prev_stage->info.xfb_stride, sizeof(prev_stage->info.xfb_stride));
1228    if (prev_stage->xfb_info) {
1229       nir->xfb_info = mem_dup(prev_stage->xfb_info, sizeof(nir_xfb_info));
1230    }
1231
1232    nir_variable *in_vars[VARYING_SLOT_MAX];
1233    nir_variable *out_vars[VARYING_SLOT_MAX];
1234    unsigned num_vars = 0;
1235
1236    /* Create input/output variables. */
1237    nir_foreach_shader_out_variable(var, prev_stage) {
1238       assert(!var->data.patch);
1239
1240       char name[100];
1241       if (var->name)
1242          snprintf(name, sizeof(name), "in_%s", var->name);
1243       else
1244          snprintf(name, sizeof(name), "in_%d", var->data.driver_location);
1245
1246       nir_variable *in = nir_variable_clone(var, nir);
1247       ralloc_free(in->name);
1248       in->name = ralloc_strdup(in, name);
1249       in->type = glsl_array_type(var->type, 4, false);
1250       in->data.mode = nir_var_shader_in;
1251       nir_shader_add_variable(nir, in);
1252
1253       if (var->name)
1254          snprintf(name, sizeof(name), "out_%s", var->name);
1255       else
1256          snprintf(name, sizeof(name), "out_%d", var->data.driver_location);
1257
1258       nir_variable *out = nir_variable_clone(var, nir);
1259       ralloc_free(out->name);
1260       out->name = ralloc_strdup(out, name);
1261       out->data.mode = nir_var_shader_out;
1262       nir_shader_add_variable(nir, out);
1263
1264       in_vars[num_vars] = in;
1265       out_vars[num_vars++] = out;
1266    }
1267
1268    int mapping_first[] = {0, 1, 2, 0, 2, 3};
1269    int mapping_last[] = {0, 1, 3, 1, 2, 3};
1270    nir_ssa_def *last_pv_vert_def = nir_load_provoking_last(&b);
1271    last_pv_vert_def = nir_ine_imm(&b, last_pv_vert_def, 0);
1272    for (unsigned i = 0; i < 6; ++i) {
1273       /* swap indices 2 and 3 */
1274       nir_ssa_def *idx = nir_bcsel(&b, last_pv_vert_def,
1275                                    nir_imm_int(&b, mapping_last[i]),
1276                                    nir_imm_int(&b, mapping_first[i]));
1277       /* Copy inputs to outputs. */
1278       for (unsigned j = 0; j < num_vars; ++j) {
1279          if (in_vars[j]->data.location == VARYING_SLOT_EDGE) {
1280             continue;
1281          }
1282          nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in_vars[j]), idx);
1283          copy_vars(&b, nir_build_deref_var(&b, out_vars[j]), in_value);
1284       }
1285       nir_emit_vertex(&b, 0);
1286       if (i == 2)
1287         nir_end_primitive(&b, 0);
1288    }
1289
1290    nir_end_primitive(&b, 0);
1291    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1292    nir_validate_shader(nir, "in zink_create_quads_emulation_gs");
1293    return nir;
1294 }
1295
1296 static bool
1297 lower_system_values_to_inlined_uniforms_instr(nir_builder *b, nir_instr *instr, void *data)
1298 {
1299    if (instr->type != nir_instr_type_intrinsic)
1300       return false;
1301
1302    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1303
1304    int inlined_uniform_offset;
1305    switch (intrin->intrinsic) {
1306    case nir_intrinsic_load_flat_mask:
1307       inlined_uniform_offset = ZINK_INLINE_VAL_FLAT_MASK * sizeof(uint32_t);
1308       break;
1309    case nir_intrinsic_load_provoking_last:
1310       inlined_uniform_offset = ZINK_INLINE_VAL_PV_LAST_VERT * sizeof(uint32_t);
1311       break;
1312    default:
1313       return false;
1314    }
1315
1316    b->cursor = nir_before_instr(&intrin->instr);
1317    nir_ssa_def *new_dest_def = nir_load_ubo(b, 1, 32, nir_imm_int(b, 0),
1318                                             nir_imm_int(b, inlined_uniform_offset),
1319                                             .align_mul = 4, .align_offset = 0,
1320                                             .range_base = 0, .range = ~0);
1321    nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_dest_def);
1322    nir_instr_remove(instr);
1323    return true;
1324 }
1325
1326 bool
1327 zink_lower_system_values_to_inlined_uniforms(nir_shader *nir)
1328 {
1329    return nir_shader_instructions_pass(nir, lower_system_values_to_inlined_uniforms_instr,
1330                                        nir_metadata_dominance, NULL);
1331 }
1332
1333 void
1334 zink_screen_init_compiler(struct zink_screen *screen)
1335 {
1336    static const struct nir_shader_compiler_options
1337    default_options = {
1338       .lower_ffma16 = true,
1339       .lower_ffma32 = true,
1340       .lower_ffma64 = true,
1341       .lower_scmp = true,
1342       .lower_fdph = true,
1343       .lower_flrp32 = true,
1344       .lower_fpow = true,
1345       .lower_fsat = true,
1346       .lower_extract_byte = true,
1347       .lower_extract_word = true,
1348       .lower_insert_byte = true,
1349       .lower_insert_word = true,
1350
1351       /* We can only support 32-bit ldexp, but NIR doesn't have a flag
1352        * distinguishing 64-bit ldexp support (radeonsi *does* support 64-bit
1353        * ldexp, so we don't just always lower it in NIR).  Given that ldexp is
1354        * effectively unused (no instances in shader-db), it's not worth the
1355        * effort to do so.
1356        * */
1357       .lower_ldexp = true,
1358
1359       .lower_mul_high = true,
1360       .lower_rotate = true,
1361       .lower_uadd_carry = true,
1362       .lower_usub_borrow = true,
1363       .lower_uadd_sat = true,
1364       .lower_usub_sat = true,
1365       .lower_vector_cmp = true,
1366       .lower_int64_options = 0,
1367       .lower_doubles_options = 0,
1368       .lower_uniforms_to_ubo = true,
1369       .has_fsub = true,
1370       .has_isub = true,
1371       .has_txs = true,
1372       .lower_mul_2x32_64 = true,
1373       .support_16bit_alu = true, /* not quite what it sounds like */
1374       .max_unroll_iterations = 0,
1375    };
1376
1377    screen->nir_options = default_options;
1378
1379    if (!screen->info.feats.features.shaderInt64)
1380       screen->nir_options.lower_int64_options = ~0;
1381
1382    if (!screen->info.feats.features.shaderFloat64) {
1383       screen->nir_options.lower_doubles_options = ~0;
1384       screen->nir_options.lower_flrp64 = true;
1385       screen->nir_options.lower_ffma64 = true;
1386       /* soft fp64 function inlining will blow up loop bodies and effectively
1387        * stop Vulkan drivers from unrolling the loops.
1388        */
1389       screen->nir_options.max_unroll_iterations_fp64 = 32;
1390    }
1391
1392    /*
1393        The OpFRem and OpFMod instructions use cheap approximations of remainder,
1394        and the error can be large due to the discontinuity in trunc() and floor().
1395        This can produce mathematically unexpected results in some cases, such as
1396        FMod(x,x) computing x rather than 0, and can also cause the result to have
1397        a different sign than the infinitely precise result.
1398
1399        -Table 84. Precision of core SPIR-V Instructions
1400        * for drivers that are known to have imprecise fmod for doubles, lower dmod
1401     */
1402    if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV ||
1403        screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
1404        screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY)
1405       screen->nir_options.lower_doubles_options = nir_lower_dmod;
1406 }
1407
1408 const void *
1409 zink_get_compiler_options(struct pipe_screen *pscreen,
1410                           enum pipe_shader_ir ir,
1411                           gl_shader_stage shader)
1412 {
1413    assert(ir == PIPE_SHADER_IR_NIR);
1414    return &zink_screen(pscreen)->nir_options;
1415 }
1416
1417 struct nir_shader *
1418 zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
1419 {
1420    if (zink_debug & ZINK_DEBUG_TGSI) {
1421       fprintf(stderr, "TGSI shader:\n---8<---\n");
1422       tgsi_dump_to_file(tokens, 0, stderr);
1423       fprintf(stderr, "---8<---\n\n");
1424    }
1425
1426    return tgsi_to_nir(tokens, screen, false);
1427 }
1428
1429
1430 static bool
1431 dest_is_64bit(nir_dest *dest, void *state)
1432 {
1433    bool *lower = (bool *)state;
1434    if (dest && (nir_dest_bit_size(*dest) == 64)) {
1435       *lower = true;
1436       return false;
1437    }
1438    return true;
1439 }
1440
1441 static bool
1442 src_is_64bit(nir_src *src, void *state)
1443 {
1444    bool *lower = (bool *)state;
1445    if (src && (nir_src_bit_size(*src) == 64)) {
1446       *lower = true;
1447       return false;
1448    }
1449    return true;
1450 }
1451
1452 static bool
1453 filter_64_bit_instr(const nir_instr *const_instr, UNUSED const void *data)
1454 {
1455    bool lower = false;
1456    /* lower_alu_to_scalar required nir_instr to be const, but nir_foreach_*
1457     * doesn't have const variants, so do the ugly const_cast here. */
1458    nir_instr *instr = (nir_instr *)const_instr;
1459
1460    nir_foreach_dest(instr, dest_is_64bit, &lower);
1461    if (lower)
1462       return true;
1463    nir_foreach_src(instr, src_is_64bit, &lower);
1464    return lower;
1465 }
1466
1467 static bool
1468 filter_pack_instr(const nir_instr *const_instr, UNUSED const void *data)
1469 {
1470    nir_instr *instr = (nir_instr *)const_instr;
1471    nir_alu_instr *alu = nir_instr_as_alu(instr);
1472    switch (alu->op) {
1473    case nir_op_pack_64_2x32_split:
1474    case nir_op_pack_32_2x16_split:
1475    case nir_op_unpack_32_2x16_split_x:
1476    case nir_op_unpack_32_2x16_split_y:
1477    case nir_op_unpack_64_2x32_split_x:
1478    case nir_op_unpack_64_2x32_split_y:
1479       return true;
1480    default:
1481       break;
1482    }
1483    return false;
1484 }
1485
1486
1487 struct bo_vars {
1488    nir_variable *uniforms[5];
1489    nir_variable *ubo[5];
1490    nir_variable *ssbo[5];
1491    uint32_t first_ubo;
1492    uint32_t first_ssbo;
1493 };
1494
1495 static struct bo_vars
1496 get_bo_vars(struct zink_shader *zs, nir_shader *shader)
1497 {
1498    struct bo_vars bo;
1499    memset(&bo, 0, sizeof(bo));
1500    if (zs->ubos_used)
1501       bo.first_ubo = ffs(zs->ubos_used & ~BITFIELD_BIT(0)) - 2;
1502    assert(bo.first_ssbo < PIPE_MAX_CONSTANT_BUFFERS);
1503    if (zs->ssbos_used)
1504       bo.first_ssbo = ffs(zs->ssbos_used) - 1;
1505    assert(bo.first_ssbo < PIPE_MAX_SHADER_BUFFERS);
1506    nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
1507       unsigned idx = glsl_get_explicit_stride(glsl_get_struct_field(glsl_without_array(var->type), 0)) >> 1;
1508       if (var->data.mode == nir_var_mem_ssbo) {
1509          assert(!bo.ssbo[idx]);
1510          bo.ssbo[idx] = var;
1511       } else {
1512          if (var->data.driver_location) {
1513             assert(!bo.ubo[idx]);
1514             bo.ubo[idx] = var;
1515          } else {
1516             assert(!bo.uniforms[idx]);
1517             bo.uniforms[idx] = var;
1518          }
1519       }
1520    }
1521    return bo;
1522 }
1523
1524 static bool
1525 bound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
1526 {
1527    struct bo_vars *bo = data;
1528    if (instr->type != nir_instr_type_intrinsic)
1529       return false;
1530    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1531    nir_variable *var = NULL;
1532    nir_ssa_def *offset = NULL;
1533    bool is_load = true;
1534    b->cursor = nir_before_instr(instr);
1535
1536    switch (intr->intrinsic) {
1537    case nir_intrinsic_store_ssbo:
1538       var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
1539       offset = intr->src[2].ssa;
1540       is_load = false;
1541       break;
1542    case nir_intrinsic_load_ssbo:
1543       var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
1544       offset = intr->src[1].ssa;
1545       break;
1546    case nir_intrinsic_load_ubo:
1547       if (nir_src_is_const(intr->src[0]) && nir_src_as_const_value(intr->src[0])->u32 == 0)
1548          var = bo->uniforms[nir_dest_bit_size(intr->dest) >> 4];
1549       else
1550          var = bo->ubo[nir_dest_bit_size(intr->dest) >> 4];
1551       offset = intr->src[1].ssa;
1552       break;
1553    default:
1554       return false;
1555    }
1556    nir_src offset_src = nir_src_for_ssa(offset);
1557    if (!nir_src_is_const(offset_src))
1558       return false;
1559
1560    unsigned offset_bytes = nir_src_as_const_value(offset_src)->u32;
1561    const struct glsl_type *strct_type = glsl_get_array_element(var->type);
1562    unsigned size = glsl_array_size(glsl_get_struct_field(strct_type, 0));
1563    bool has_unsized = glsl_array_size(glsl_get_struct_field(strct_type, glsl_get_length(strct_type) - 1)) == 0;
1564    if (has_unsized || offset_bytes + intr->num_components - 1 < size)
1565       return false;
1566
1567    unsigned rewrites = 0;
1568    nir_ssa_def *result[2];
1569    for (unsigned i = 0; i < intr->num_components; i++) {
1570       if (offset_bytes + i >= size) {
1571          rewrites++;
1572          if (is_load)
1573             result[i] = nir_imm_zero(b, 1, nir_dest_bit_size(intr->dest));
1574       }
1575    }
1576    assert(rewrites == intr->num_components);
1577    if (is_load) {
1578       nir_ssa_def *load = nir_vec(b, result, intr->num_components);
1579       nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1580    }
1581    nir_instr_remove(instr);
1582    return true;
1583 }
1584
1585 static bool
1586 bound_bo_access(nir_shader *shader, struct zink_shader *zs)
1587 {
1588    struct bo_vars bo = get_bo_vars(zs, shader);
1589    return nir_shader_instructions_pass(shader, bound_bo_access_instr, nir_metadata_dominance, &bo);
1590 }
1591
1592 static void
1593 optimize_nir(struct nir_shader *s, struct zink_shader *zs)
1594 {
1595    bool progress;
1596    do {
1597       progress = false;
1598       if (s->options->lower_int64_options)
1599          NIR_PASS_V(s, nir_lower_int64);
1600       if (s->options->lower_doubles_options & nir_lower_fp64_full_software)
1601          NIR_PASS_V(s, lower_64bit_pack);
1602       NIR_PASS_V(s, nir_lower_vars_to_ssa);
1603       NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_pack_instr, NULL);
1604       NIR_PASS(progress, s, nir_opt_copy_prop_vars);
1605       NIR_PASS(progress, s, nir_copy_prop);
1606       NIR_PASS(progress, s, nir_opt_remove_phis);
1607       if (s->options->lower_int64_options) {
1608          NIR_PASS(progress, s, nir_lower_64bit_phis);
1609          NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_64_bit_instr, NULL);
1610       }
1611       NIR_PASS(progress, s, nir_opt_dce);
1612       NIR_PASS(progress, s, nir_opt_dead_cf);
1613       NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
1614       NIR_PASS(progress, s, nir_opt_cse);
1615       NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
1616       NIR_PASS(progress, s, nir_opt_algebraic);
1617       NIR_PASS(progress, s, nir_opt_constant_folding);
1618       NIR_PASS(progress, s, nir_opt_undef);
1619       NIR_PASS(progress, s, zink_nir_lower_b2b);
1620       if (zs)
1621          NIR_PASS(progress, s, bound_bo_access, zs);
1622    } while (progress);
1623
1624    do {
1625       progress = false;
1626       NIR_PASS(progress, s, nir_opt_algebraic_late);
1627       if (progress) {
1628          NIR_PASS_V(s, nir_copy_prop);
1629          NIR_PASS_V(s, nir_opt_dce);
1630          NIR_PASS_V(s, nir_opt_cse);
1631       }
1632    } while (progress);
1633 }
1634
1635 /* - copy the lowered fbfetch variable
1636  * - set the new one up as an input attachment for descriptor 0.6
1637  * - load it as an image
1638  * - overwrite the previous load
1639  */
1640 static bool
1641 lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
1642 {
1643    bool ms = data != NULL;
1644    if (instr->type != nir_instr_type_intrinsic)
1645       return false;
1646    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1647    if (intr->intrinsic != nir_intrinsic_load_deref)
1648       return false;
1649    nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
1650    if (!var->data.fb_fetch_output)
1651       return false;
1652    b->cursor = nir_after_instr(instr);
1653    nir_variable *fbfetch = nir_variable_clone(var, b->shader);
1654    /* If Dim is SubpassData, ... Image Format must be Unknown
1655     * - SPIRV OpTypeImage specification
1656     */
1657    fbfetch->data.image.format = 0;
1658    fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */
1659    fbfetch->data.mode = nir_var_uniform;
1660    fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1661    fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1662    fbfetch->data.sample = ms;
1663    enum glsl_sampler_dim dim = ms ? GLSL_SAMPLER_DIM_SUBPASS_MS : GLSL_SAMPLER_DIM_SUBPASS;
1664    fbfetch->type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
1665    nir_shader_add_variable(b->shader, fbfetch);
1666    nir_ssa_def *deref = &nir_build_deref_var(b, fbfetch)->dest.ssa;
1667    nir_ssa_def *sample = ms ? nir_load_sample_id(b) : nir_ssa_undef(b, 1, 32);
1668    nir_ssa_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0));
1669    nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1670    return true;
1671 }
1672
1673 static bool
1674 lower_fbfetch(nir_shader *shader, nir_variable **fbfetch, bool ms)
1675 {
1676    nir_foreach_shader_out_variable(var, shader) {
1677       if (var->data.fb_fetch_output) {
1678          *fbfetch = var;
1679          break;
1680       }
1681    }
1682    assert(*fbfetch);
1683    if (!*fbfetch)
1684       return false;
1685    return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, (void*)ms);
1686 }
1687
1688 /*
1689  * Add a check for out of bounds LOD for every texel fetch op
1690  * It boils down to:
1691  * - if (lod < query_levels(tex))
1692  * -    res = txf(tex)
1693  * - else
1694  * -    res = (0, 0, 0, 1)
1695  */
1696 static bool
1697 lower_txf_lod_robustness_instr(nir_builder *b, nir_instr *in, void *data)
1698 {
1699    if (in->type != nir_instr_type_tex)
1700       return false;
1701    nir_tex_instr *txf = nir_instr_as_tex(in);
1702    if (txf->op != nir_texop_txf)
1703       return false;
1704
1705    b->cursor = nir_before_instr(in);
1706    int lod_idx = nir_tex_instr_src_index(txf, nir_tex_src_lod);
1707    assert(lod_idx >= 0);
1708    nir_src lod_src = txf->src[lod_idx].src;
1709    if (nir_src_is_const(lod_src) && nir_src_as_const_value(lod_src)->u32 == 0)
1710       return false;
1711
1712    assert(lod_src.is_ssa);
1713    nir_ssa_def *lod = lod_src.ssa;
1714
1715    int offset_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_offset);
1716    int handle_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_handle);
1717    nir_tex_instr *levels = nir_tex_instr_create(b->shader,
1718                                                 !!(offset_idx >= 0) + !!(handle_idx >= 0));
1719    levels->op = nir_texop_query_levels;
1720    levels->texture_index = txf->texture_index;
1721    levels->dest_type = nir_type_int | lod->bit_size;
1722    if (offset_idx >= 0) {
1723       levels->src[0].src_type = nir_tex_src_texture_offset;
1724       nir_src_copy(&levels->src[0].src, &txf->src[offset_idx].src, &levels->instr);
1725    }
1726    if (handle_idx >= 0) {
1727       levels->src[!!(offset_idx >= 0)].src_type = nir_tex_src_texture_handle;
1728       nir_src_copy(&levels->src[!!(offset_idx >= 0)].src, &txf->src[handle_idx].src, &levels->instr);
1729    }
1730    nir_ssa_dest_init(&levels->instr, &levels->dest,
1731                      nir_tex_instr_dest_size(levels), 32, NULL);
1732    nir_builder_instr_insert(b, &levels->instr);
1733
1734    nir_if *lod_oob_if = nir_push_if(b, nir_ilt(b, lod, &levels->dest.ssa));
1735    nir_tex_instr *new_txf = nir_instr_as_tex(nir_instr_clone(b->shader, in));
1736    nir_builder_instr_insert(b, &new_txf->instr);
1737
1738    nir_if *lod_oob_else = nir_push_else(b, lod_oob_if);
1739    nir_const_value oob_values[4] = {0};
1740    unsigned bit_size = nir_alu_type_get_type_size(txf->dest_type);
1741    oob_values[3] = (txf->dest_type & nir_type_float) ?
1742                    nir_const_value_for_float(1.0, bit_size) : nir_const_value_for_uint(1, bit_size);
1743    nir_ssa_def *oob_val = nir_build_imm(b, nir_tex_instr_dest_size(txf), bit_size, oob_values);
1744
1745    nir_pop_if(b, lod_oob_else);
1746    nir_ssa_def *robust_txf = nir_if_phi(b, &new_txf->dest.ssa, oob_val);
1747
1748    nir_ssa_def_rewrite_uses(&txf->dest.ssa, robust_txf);
1749    nir_instr_remove_v(in);
1750    return true;
1751 }
1752
1753 /* This pass is used to workaround the lack of out of bounds LOD robustness
1754  * for texel fetch ops in VK_EXT_image_robustness.
1755  */
1756 static bool
1757 lower_txf_lod_robustness(nir_shader *shader)
1758 {
1759    return nir_shader_instructions_pass(shader, lower_txf_lod_robustness_instr, nir_metadata_none, NULL);
1760 }
1761
1762 /* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
1763 static bool
1764 check_psiz(struct nir_shader *s)
1765 {
1766    bool have_psiz = false;
1767    nir_foreach_shader_out_variable(var, s) {
1768       if (var->data.location == VARYING_SLOT_PSIZ) {
1769          /* genuine PSIZ outputs will have this set */
1770          have_psiz |= !!var->data.explicit_location;
1771       }
1772    }
1773    return have_psiz;
1774 }
1775
1776 static nir_variable *
1777 find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz)
1778 {
1779    assert((int)location >= 0);
1780
1781    unsigned found = 0;
1782    if (!location_frac && location != VARYING_SLOT_PSIZ) {
1783       nir_foreach_shader_out_variable(var, nir) {
1784          if (var->data.location == location)
1785             found++;
1786       }
1787    }
1788    if (found) {
1789       /* multiple variables found for this location: find the biggest one */
1790       nir_variable *out = NULL;
1791       unsigned slots = 0;
1792       nir_foreach_shader_out_variable(var, nir) {
1793          if (var->data.location == location) {
1794             unsigned count_slots = glsl_count_vec4_slots(var->type, false, false);
1795             if (count_slots > slots) {
1796                slots = count_slots;
1797                out = var;
1798             }
1799          }
1800       }
1801       return out;
1802    } else {
1803       /* only one variable found or this is location_frac */
1804       nir_foreach_shader_out_variable(var, nir) {
1805          if (var->data.location == location &&
1806              (var->data.location_frac == location_frac ||
1807               (glsl_type_is_array(var->type) ? glsl_array_size(var->type) : glsl_get_vector_elements(var->type)) >= location_frac + 1)) {
1808             if (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location)
1809                return var;
1810          }
1811       }
1812    }
1813    return NULL;
1814 }
1815
1816 static bool
1817 is_inlined(const bool *inlined, const struct pipe_stream_output *output)
1818 {
1819    for (unsigned i = 0; i < output->num_components; i++)
1820       if (!inlined[output->start_component + i])
1821          return false;
1822    return true;
1823 }
1824
1825 static void
1826 update_psiz_location(nir_shader *nir, nir_variable *psiz)
1827 {
1828    uint32_t last_output = util_last_bit64(nir->info.outputs_written);
1829    if (last_output < VARYING_SLOT_VAR0)
1830       last_output = VARYING_SLOT_VAR0;
1831    else
1832       last_output++;
1833    /* this should get fixed up by slot remapping */
1834    psiz->data.location = last_output;
1835 }
1836
1837 static const struct glsl_type *
1838 clamp_slot_type(const struct glsl_type *type, unsigned slot)
1839 {
1840    /* could be dvec/dmat/mat: each member is the same */
1841    const struct glsl_type *plain = glsl_without_array_or_matrix(type);
1842    /* determine size of each member type */
1843    unsigned slot_count = glsl_count_vec4_slots(plain, false, false);
1844    /* normalize slot idx to current type's size */
1845    slot %= slot_count;
1846    unsigned slot_components = glsl_get_components(plain);
1847    if (glsl_base_type_is_64bit(glsl_get_base_type(plain)))
1848       slot_components *= 2;
1849    /* create a vec4 mask of the selected slot's components out of all the components */
1850    uint32_t mask = BITFIELD_MASK(slot_components) & BITFIELD_RANGE(slot * 4, 4);
1851    /* return a vecN of the selected components */
1852    slot_components = util_bitcount(mask);
1853    return glsl_vec_type(slot_components);
1854 }
1855
1856 static const struct glsl_type *
1857 unroll_struct_type(const struct glsl_type *slot_type, unsigned *slot_idx)
1858 {
1859    const struct glsl_type *type = slot_type;
1860    unsigned slot_count = 0;
1861    unsigned cur_slot = 0;
1862    /* iterate over all the members in the struct, stopping once the slot idx is reached */
1863    for (unsigned i = 0; i < glsl_get_length(slot_type) && cur_slot <= *slot_idx; i++, cur_slot += slot_count) {
1864       /* use array type for slot counting but return array member type for unroll */
1865       const struct glsl_type *arraytype = glsl_get_struct_field(slot_type, i);
1866       type = glsl_without_array(arraytype);
1867       slot_count = glsl_count_vec4_slots(arraytype, false, false);
1868    }
1869    *slot_idx -= (cur_slot - slot_count);
1870    if (!glsl_type_is_struct_or_ifc(type))
1871       /* this is a fully unrolled struct: find the number of vec components to output */
1872       type = clamp_slot_type(type, *slot_idx);
1873    return type;
1874 }
1875
1876 static unsigned
1877 get_slot_components(nir_variable *var, unsigned slot, unsigned so_slot)
1878 {
1879    assert(var && slot < var->data.location + glsl_count_vec4_slots(var->type, false, false));
1880    const struct glsl_type *orig_type = var->type;
1881    const struct glsl_type *type = glsl_without_array(var->type);
1882    unsigned slot_idx = slot - so_slot;
1883    if (type != orig_type)
1884       slot_idx %= glsl_count_vec4_slots(type, false, false);
1885    /* need to find the vec4 that's being exported by this slot */
1886    while (glsl_type_is_struct_or_ifc(type))
1887       type = unroll_struct_type(type, &slot_idx);
1888
1889    /* arrays here are already fully unrolled from their structs, so slot handling is implicit */
1890    unsigned num_components = glsl_get_components(glsl_without_array(type));
1891    /* special handling: clip/cull distance are arrays with vector semantics */
1892    if (var->data.location == VARYING_SLOT_CLIP_DIST0 || var->data.location == VARYING_SLOT_CULL_DIST0) {
1893       num_components = glsl_array_size(type);
1894       if (slot_idx)
1895          /* this is the second vec4 */
1896          num_components %= 4;
1897       else
1898          /* this is the first vec4 */
1899          num_components = MIN2(num_components, 4);
1900    }
1901    assert(num_components);
1902    /* gallium handles xfb in terms of 32bit units */
1903    if (glsl_base_type_is_64bit(glsl_get_base_type(glsl_without_array(type))))
1904       num_components *= 2;
1905    return num_components;
1906 }
1907
1908 static const struct pipe_stream_output *
1909 find_packed_output(const struct pipe_stream_output_info *so_info, uint8_t *reverse_map, unsigned slot)
1910 {
1911    for (unsigned i = 0; i < so_info->num_outputs; i++) {
1912       const struct pipe_stream_output *packed_output = &so_info->output[i];
1913       if (reverse_map[packed_output->register_index] == slot)
1914          return packed_output;
1915    }
1916    return NULL;
1917 }
1918
1919 static void
1920 update_so_info(struct zink_shader *zs, nir_shader *nir, const struct pipe_stream_output_info *so_info,
1921                uint64_t outputs_written, bool have_psiz)
1922 {
1923    uint8_t reverse_map[VARYING_SLOT_MAX] = {0};
1924    unsigned slot = 0;
1925    /* semi-copied from iris */
1926    while (outputs_written) {
1927       int bit = u_bit_scan64(&outputs_written);
1928       /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */
1929       if (bit == VARYING_SLOT_PSIZ && !have_psiz)
1930          continue;
1931       reverse_map[slot++] = bit;
1932    }
1933
1934    bool have_fake_psiz = false;
1935    nir_foreach_shader_out_variable(var, nir) {
1936       if (var->data.location == VARYING_SLOT_PSIZ && !var->data.explicit_location)
1937          have_fake_psiz = true;
1938    }
1939
1940    bool inlined[VARYING_SLOT_MAX][4] = {0};
1941    uint64_t packed = 0;
1942    uint8_t packed_components[VARYING_SLOT_MAX] = {0};
1943    uint8_t packed_streams[VARYING_SLOT_MAX] = {0};
1944    uint8_t packed_buffers[VARYING_SLOT_MAX] = {0};
1945    uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0};
1946    nir_variable *psiz = NULL;
1947    for (unsigned i = 0; i < so_info->num_outputs; i++) {
1948       const struct pipe_stream_output *output = &so_info->output[i];
1949       unsigned slot = reverse_map[output->register_index];
1950       /* always set stride to be used during draw */
1951       zs->sinfo.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer];
1952       if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
1953          nir_variable *var = NULL;
1954          unsigned so_slot;
1955          while (!var)
1956             var = find_var_with_location_frac(nir, slot--, output->start_component, have_psiz);
1957          if (var->data.location == VARYING_SLOT_PSIZ)
1958             psiz = var;
1959          so_slot = slot + 1;
1960          slot = reverse_map[output->register_index];
1961          if (var->data.explicit_xfb_buffer) {
1962             /* handle dvec3 where gallium splits streamout over 2 registers */
1963             for (unsigned j = 0; j < output->num_components; j++)
1964                inlined[slot][output->start_component + j] = true;
1965          }
1966          if (is_inlined(inlined[slot], output))
1967             continue;
1968          bool is_struct = glsl_type_is_struct_or_ifc(glsl_without_array(var->type));
1969          unsigned num_components = get_slot_components(var, slot, so_slot);
1970          /* if this is the entire variable, try to blast it out during the initial declaration
1971           * structs must be handled later to ensure accurate analysis
1972           */
1973          if (!is_struct && (num_components == output->num_components || (num_components > output->num_components && output->num_components == 4))) {
1974             var->data.explicit_xfb_buffer = 1;
1975             var->data.xfb.buffer = output->output_buffer;
1976             var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
1977             var->data.offset = output->dst_offset * 4;
1978             var->data.stream = output->stream;
1979             for (unsigned j = 0; j < output->num_components; j++)
1980                inlined[slot][output->start_component + j] = true;
1981          } else {
1982             /* otherwise store some metadata for later */
1983             packed |= BITFIELD64_BIT(slot);
1984             packed_components[slot] += output->num_components;
1985             packed_streams[slot] |= BITFIELD_BIT(output->stream);
1986             packed_buffers[slot] |= BITFIELD_BIT(output->output_buffer);
1987             for (unsigned j = 0; j < output->num_components; j++)
1988                packed_offsets[output->register_index][j + output->start_component] = output->dst_offset + j;
1989          }
1990       }
1991    }
1992
1993    /* if this was flagged as a packed output before, and if all the components are
1994     * being output with the same stream on the same buffer with increasing offsets, this entire variable
1995     * can be consolidated into a single output to conserve locations
1996     */
1997    for (unsigned i = 0; i < so_info->num_outputs; i++) {
1998       const struct pipe_stream_output *output = &so_info->output[i];
1999       unsigned slot = reverse_map[output->register_index];
2000       if (is_inlined(inlined[slot], output))
2001          continue;
2002       if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
2003          nir_variable *var = NULL;
2004          while (!var)
2005             var = find_var_with_location_frac(nir, slot--, output->start_component, have_psiz);
2006          /* this is a lowered 64bit variable that can't be exported due to packing */
2007          if (var->data.is_xfb)
2008             goto out;
2009
2010          unsigned num_slots = glsl_count_vec4_slots(var->type, false, false);
2011          /* for each variable, iterate over all the variable's slots and inline the outputs */
2012          for (unsigned j = 0; j < num_slots; j++) {
2013             slot = var->data.location + j;
2014             const struct pipe_stream_output *packed_output = find_packed_output(so_info, reverse_map, slot);
2015             if (!packed_output)
2016                goto out;
2017
2018             /* if this slot wasn't packed or isn't in the same stream/buffer, skip consolidation */
2019             if (!(packed & BITFIELD64_BIT(slot)) ||
2020                 util_bitcount(packed_streams[slot]) != 1 ||
2021                 util_bitcount(packed_buffers[slot]) != 1)
2022                goto out;
2023
2024             /* if all the components the variable exports to this slot aren't captured, skip consolidation */
2025             unsigned num_components = get_slot_components(var, slot, var->data.location);
2026             if (num_components != packed_components[slot])
2027                goto out;
2028
2029             /* in order to pack the xfb output, all the offsets must be sequentially incrementing */
2030             uint32_t prev_offset = packed_offsets[packed_output->register_index][0];
2031             for (unsigned k = 1; k < num_components; k++) {
2032                /* if the offsets are not incrementing as expected, skip consolidation */
2033                if (packed_offsets[packed_output->register_index][k] != prev_offset + 1)
2034                   goto out;
2035                prev_offset = packed_offsets[packed_output->register_index][k + packed_output->start_component];
2036             }
2037          }
2038          /* this output can be consolidated: blast out all the data inlined */
2039          var->data.explicit_xfb_buffer = 1;
2040          var->data.xfb.buffer = output->output_buffer;
2041          var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
2042          var->data.offset = output->dst_offset * 4;
2043          var->data.stream = output->stream;
2044          /* GLSL specifies that interface blocks are split per-buffer in XFB */
2045          if (glsl_type_is_array(var->type) && glsl_array_size(var->type) > 1 && glsl_type_is_interface(glsl_without_array(var->type)))
2046             zs->sinfo.so_propagate |= BITFIELD_BIT(var->data.location - VARYING_SLOT_VAR0);
2047          /* mark all slot components inlined to skip subsequent loop iterations */
2048          for (unsigned j = 0; j < num_slots; j++) {
2049             slot = var->data.location + j;
2050             for (unsigned k = 0; k < packed_components[slot]; k++)
2051                inlined[slot][k] = true;
2052             packed &= ~BITFIELD64_BIT(slot);
2053          }
2054          continue;
2055       }
2056 out:
2057       /* these are packed/explicit varyings which can't be exported with normal output */
2058       zs->sinfo.so_info.output[zs->sinfo.so_info.num_outputs] = *output;
2059       /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
2060       zs->sinfo.so_info_slots[zs->sinfo.so_info.num_outputs++] = reverse_map[output->register_index];
2061    }
2062    zs->sinfo.have_xfb = zs->sinfo.so_info.num_outputs || zs->sinfo.so_propagate;
2063    /* ensure this doesn't get output in the shader by unsetting location */
2064    if (have_fake_psiz && psiz)
2065       update_psiz_location(nir, psiz);
2066 }
2067
2068 struct decompose_state {
2069   nir_variable **split;
2070   bool needs_w;
2071 };
2072
2073 static bool
2074 lower_attrib(nir_builder *b, nir_instr *instr, void *data)
2075 {
2076    struct decompose_state *state = data;
2077    nir_variable **split = state->split;
2078    if (instr->type != nir_instr_type_intrinsic)
2079       return false;
2080    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2081    if (intr->intrinsic != nir_intrinsic_load_deref)
2082       return false;
2083    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2084    nir_variable *var = nir_deref_instr_get_variable(deref);
2085    if (var != split[0])
2086       return false;
2087    unsigned num_components = glsl_get_vector_elements(split[0]->type);
2088    b->cursor = nir_after_instr(instr);
2089    nir_ssa_def *loads[4];
2090    for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
2091       loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
2092    if (state->needs_w) {
2093       /* oob load w comopnent to get correct value for int/float */
2094       loads[3] = nir_channel(b, loads[0], 3);
2095       loads[0] = nir_channel(b, loads[0], 0);
2096    }
2097    nir_ssa_def *new_load = nir_vec(b, loads, num_components);
2098    nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load);
2099    nir_instr_remove_v(instr);
2100    return true;
2101 }
2102
2103 static bool
2104 decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w)
2105 {
2106    uint32_t bits = 0;
2107    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in)
2108       bits |= BITFIELD_BIT(var->data.driver_location);
2109    bits = ~bits;
2110    u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) {
2111       nir_variable *split[5];
2112       struct decompose_state state;
2113       state.split = split;
2114       nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location);
2115       assert(var);
2116       split[0] = var;
2117       bits |= BITFIELD_BIT(var->data.driver_location);
2118       const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type);
2119       unsigned num_components = glsl_get_vector_elements(var->type);
2120       state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4;
2121       for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) {
2122          split[i+1] = nir_variable_clone(var, nir);
2123          split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i);
2124          if (decomposed_attrs_without_w & BITFIELD_BIT(location))
2125             split[i+1]->type = !i && num_components == 4 ? var->type : new_type;
2126          else
2127             split[i+1]->type = new_type;
2128          split[i+1]->data.driver_location = ffs(bits) - 1;
2129          bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location);
2130          nir_shader_add_variable(nir, split[i+1]);
2131       }
2132       var->data.mode = nir_var_shader_temp;
2133       nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state);
2134    }
2135    nir_fixup_deref_modes(nir);
2136    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2137    optimize_nir(nir, NULL);
2138    return true;
2139 }
2140
2141 static bool
2142 rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2143 {
2144    struct zink_screen *screen = data;
2145    const bool has_int64 = screen->info.feats.features.shaderInt64;
2146    if (instr->type != nir_instr_type_intrinsic)
2147       return false;
2148    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2149    b->cursor = nir_before_instr(instr);
2150    switch (intr->intrinsic) {
2151    case nir_intrinsic_ssbo_atomic_fadd:
2152    case nir_intrinsic_ssbo_atomic_add:
2153    case nir_intrinsic_ssbo_atomic_umin:
2154    case nir_intrinsic_ssbo_atomic_imin:
2155    case nir_intrinsic_ssbo_atomic_umax:
2156    case nir_intrinsic_ssbo_atomic_imax:
2157    case nir_intrinsic_ssbo_atomic_and:
2158    case nir_intrinsic_ssbo_atomic_or:
2159    case nir_intrinsic_ssbo_atomic_xor:
2160    case nir_intrinsic_ssbo_atomic_exchange:
2161    case nir_intrinsic_ssbo_atomic_comp_swap: {
2162       /* convert offset to uintN_t[idx] */
2163       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, nir_dest_bit_size(intr->dest) / 8);
2164       nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2165       return true;
2166    }
2167    case nir_intrinsic_load_ssbo:
2168    case nir_intrinsic_load_ubo: {
2169       /* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */
2170       bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo &&
2171                         nir_src_is_const(intr->src[0]) &&
2172                         nir_src_as_uint(intr->src[0]) == 0 &&
2173                         nir_dest_bit_size(intr->dest) == 64 &&
2174                         nir_intrinsic_align_offset(intr) % 8 != 0;
2175       force_2x32 |= nir_dest_bit_size(intr->dest) == 64 && !has_int64;
2176       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
2177       nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2178       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2179       if (force_2x32) {
2180          /* this is always scalarized */
2181          assert(intr->dest.ssa.num_components == 1);
2182          /* rewrite as 2x32 */
2183          nir_ssa_def *load[2];
2184          for (unsigned i = 0; i < 2; i++) {
2185             if (intr->intrinsic == nir_intrinsic_load_ssbo)
2186                load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2187             else
2188                load[i] = nir_load_ubo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0, .range = 4);
2189             nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr));
2190          }
2191          /* cast back to 64bit */
2192          nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2193          nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
2194          nir_instr_remove(instr);
2195       }
2196       return true;
2197    }
2198    case nir_intrinsic_load_shared:
2199       b->cursor = nir_before_instr(instr);
2200       bool force_2x32 = nir_dest_bit_size(intr->dest) == 64 && !has_int64;
2201       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
2202       nir_instr_rewrite_src_ssa(instr, &intr->src[0], offset);
2203       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2204       if (force_2x32) {
2205          /* this is always scalarized */
2206          assert(intr->dest.ssa.num_components == 1);
2207          /* rewrite as 2x32 */
2208          nir_ssa_def *load[2];
2209          for (unsigned i = 0; i < 2; i++)
2210             load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0);
2211          /* cast back to 64bit */
2212          nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2213          nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
2214          nir_instr_remove(instr);
2215          return true;
2216       }
2217       break;
2218    case nir_intrinsic_store_ssbo: {
2219       b->cursor = nir_before_instr(instr);
2220       bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2221       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2222       nir_instr_rewrite_src_ssa(instr, &intr->src[2], offset);
2223       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2224       if (force_2x32) {
2225          /* this is always scalarized */
2226          assert(intr->src[0].ssa->num_components == 1);
2227          nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2228          for (unsigned i = 0; i < 2; i++)
2229             nir_store_ssbo(b, vals[i], intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0);
2230          nir_instr_remove(instr);
2231       }
2232       return true;
2233    }
2234    case nir_intrinsic_store_shared: {
2235       b->cursor = nir_before_instr(instr);
2236       bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2237       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2238       nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2239       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2240       if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) {
2241          /* this is always scalarized */
2242          assert(intr->src[0].ssa->num_components == 1);
2243          nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2244          for (unsigned i = 0; i < 2; i++)
2245             nir_store_shared(b, vals[i], nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2246          nir_instr_remove(instr);
2247       }
2248       return true;
2249    }
2250    default:
2251       break;
2252    }
2253    return false;
2254 }
2255
2256 static bool
2257 rewrite_bo_access(nir_shader *shader, struct zink_screen *screen)
2258 {
2259    return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen);
2260 }
2261
2262 static nir_variable *
2263 get_bo_var(nir_shader *shader, struct bo_vars *bo, bool ssbo, nir_src *src, unsigned bit_size)
2264 {
2265    nir_variable *var, **ptr;
2266    unsigned idx = ssbo || (nir_src_is_const(*src) && !nir_src_as_uint(*src)) ? 0 : 1;
2267
2268    if (ssbo)
2269       ptr = &bo->ssbo[bit_size >> 4];
2270    else {
2271       if (!idx) {
2272          ptr = &bo->uniforms[bit_size >> 4];
2273       } else
2274          ptr = &bo->ubo[bit_size >> 4];
2275    }
2276    var = *ptr;
2277    if (!var) {
2278       if (ssbo)
2279          var = bo->ssbo[32 >> 4];
2280       else {
2281          if (!idx)
2282             var = bo->uniforms[32 >> 4];
2283          else
2284             var = bo->ubo[32 >> 4];
2285       }
2286       var = nir_variable_clone(var, shader);
2287       if (ssbo)
2288          var->name = ralloc_asprintf(shader, "%s@%u", "ssbos", bit_size);
2289       else
2290          var->name = ralloc_asprintf(shader, "%s@%u", idx ? "ubos" : "uniform_0", bit_size);
2291       *ptr = var;
2292       nir_shader_add_variable(shader, var);
2293
2294       struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
2295       fields[0].name = ralloc_strdup(shader, "base");
2296       fields[1].name = ralloc_strdup(shader, "unsized");
2297       unsigned array_size = glsl_get_length(var->type);
2298       const struct glsl_type *bare_type = glsl_without_array(var->type);
2299       const struct glsl_type *array_type = glsl_get_struct_field(bare_type, 0);
2300       unsigned length = glsl_get_length(array_type);
2301       const struct glsl_type *type;
2302       const struct glsl_type *unsized = glsl_array_type(glsl_uintN_t_type(bit_size), 0, bit_size / 8);
2303       if (bit_size > 32) {
2304          assert(bit_size == 64);
2305          type = glsl_array_type(glsl_uintN_t_type(bit_size), length / 2, bit_size / 8);
2306       } else {
2307          type = glsl_array_type(glsl_uintN_t_type(bit_size), length * (32 / bit_size), bit_size / 8);
2308       }
2309       fields[0].type = type;
2310       fields[1].type = unsized;
2311       var->type = glsl_array_type(glsl_struct_type(fields, glsl_get_length(bare_type), "struct", false), array_size, 0);
2312       var->data.driver_location = idx;
2313    }
2314    return var;
2315 }
2316
2317 static void
2318 rewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo)
2319 {
2320    nir_intrinsic_op op;
2321    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2322    switch (intr->intrinsic) {
2323    case nir_intrinsic_ssbo_atomic_fadd:
2324       op = nir_intrinsic_deref_atomic_fadd;
2325       break;
2326    case nir_intrinsic_ssbo_atomic_fmin:
2327       op = nir_intrinsic_deref_atomic_fmin;
2328       break;
2329    case nir_intrinsic_ssbo_atomic_fmax:
2330       op = nir_intrinsic_deref_atomic_fmax;
2331       break;
2332    case nir_intrinsic_ssbo_atomic_fcomp_swap:
2333       op = nir_intrinsic_deref_atomic_fcomp_swap;
2334       break;
2335    case nir_intrinsic_ssbo_atomic_add:
2336       op = nir_intrinsic_deref_atomic_add;
2337       break;
2338    case nir_intrinsic_ssbo_atomic_umin:
2339       op = nir_intrinsic_deref_atomic_umin;
2340       break;
2341    case nir_intrinsic_ssbo_atomic_imin:
2342       op = nir_intrinsic_deref_atomic_imin;
2343       break;
2344    case nir_intrinsic_ssbo_atomic_umax:
2345       op = nir_intrinsic_deref_atomic_umax;
2346       break;
2347    case nir_intrinsic_ssbo_atomic_imax:
2348       op = nir_intrinsic_deref_atomic_imax;
2349       break;
2350    case nir_intrinsic_ssbo_atomic_and:
2351       op = nir_intrinsic_deref_atomic_and;
2352       break;
2353    case nir_intrinsic_ssbo_atomic_or:
2354       op = nir_intrinsic_deref_atomic_or;
2355       break;
2356    case nir_intrinsic_ssbo_atomic_xor:
2357       op = nir_intrinsic_deref_atomic_xor;
2358       break;
2359    case nir_intrinsic_ssbo_atomic_exchange:
2360       op = nir_intrinsic_deref_atomic_exchange;
2361       break;
2362    case nir_intrinsic_ssbo_atomic_comp_swap:
2363       op = nir_intrinsic_deref_atomic_comp_swap;
2364       break;
2365    default:
2366       unreachable("unknown intrinsic");
2367    }
2368    nir_ssa_def *offset = intr->src[1].ssa;
2369    nir_src *src = &intr->src[0];
2370    nir_variable *var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
2371    nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2372    nir_ssa_def *idx = src->ssa;
2373    if (bo->first_ssbo)
2374       idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2375    nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx);
2376    nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2377
2378    /* generate new atomic deref ops for every component */
2379    nir_ssa_def *result[4];
2380    unsigned num_components = nir_dest_num_components(intr->dest);
2381    for (unsigned i = 0; i < num_components; i++) {
2382       nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
2383       nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op);
2384       nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, 1, nir_dest_bit_size(intr->dest), "");
2385       new_instr->src[0] = nir_src_for_ssa(&deref_arr->dest.ssa);
2386       /* deref ops have no offset src, so copy the srcs after it */
2387       for (unsigned i = 2; i < nir_intrinsic_infos[intr->intrinsic].num_srcs; i++)
2388          nir_src_copy(&new_instr->src[i - 1], &intr->src[i], &new_instr->instr);
2389       nir_builder_instr_insert(b, &new_instr->instr);
2390
2391       result[i] = &new_instr->dest.ssa;
2392       offset = nir_iadd_imm(b, offset, 1);
2393    }
2394
2395    nir_ssa_def *load = nir_vec(b, result, num_components);
2396    nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
2397    nir_instr_remove(instr);
2398 }
2399
2400 static bool
2401 remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2402 {
2403    struct bo_vars *bo = data;
2404    if (instr->type != nir_instr_type_intrinsic)
2405       return false;
2406    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2407    nir_variable *var = NULL;
2408    nir_ssa_def *offset = NULL;
2409    bool is_load = true;
2410    b->cursor = nir_before_instr(instr);
2411    nir_src *src;
2412    bool ssbo = true;
2413    switch (intr->intrinsic) {
2414    case nir_intrinsic_ssbo_atomic_fadd:
2415    case nir_intrinsic_ssbo_atomic_fmin:
2416    case nir_intrinsic_ssbo_atomic_fmax:
2417    case nir_intrinsic_ssbo_atomic_fcomp_swap:
2418    case nir_intrinsic_ssbo_atomic_add:
2419    case nir_intrinsic_ssbo_atomic_umin:
2420    case nir_intrinsic_ssbo_atomic_imin:
2421    case nir_intrinsic_ssbo_atomic_umax:
2422    case nir_intrinsic_ssbo_atomic_imax:
2423    case nir_intrinsic_ssbo_atomic_and:
2424    case nir_intrinsic_ssbo_atomic_or:
2425    case nir_intrinsic_ssbo_atomic_xor:
2426    case nir_intrinsic_ssbo_atomic_exchange:
2427    case nir_intrinsic_ssbo_atomic_comp_swap:
2428       rewrite_atomic_ssbo_instr(b, instr, bo);
2429       return true;
2430    case nir_intrinsic_store_ssbo:
2431       src = &intr->src[1];
2432       var = get_bo_var(b->shader, bo, true, src, nir_src_bit_size(intr->src[0]));
2433       offset = intr->src[2].ssa;
2434       is_load = false;
2435       break;
2436    case nir_intrinsic_load_ssbo:
2437       src = &intr->src[0];
2438       var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
2439       offset = intr->src[1].ssa;
2440       break;
2441    case nir_intrinsic_load_ubo:
2442       src = &intr->src[0];
2443       var = get_bo_var(b->shader, bo, false, src, nir_dest_bit_size(intr->dest));
2444       offset = intr->src[1].ssa;
2445       ssbo = false;
2446       break;
2447    default:
2448       return false;
2449    }
2450    assert(var);
2451    assert(offset);
2452    nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2453    nir_ssa_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa;
2454    if (!ssbo && bo->first_ubo && var->data.driver_location)
2455       idx = nir_iadd_imm(b, idx, -bo->first_ubo);
2456    else if (ssbo && bo->first_ssbo)
2457       idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2458    nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, nir_i2iN(b, idx, nir_dest_bit_size(deref_var->dest)));
2459    nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2460    assert(intr->num_components <= 2);
2461    if (is_load) {
2462       nir_ssa_def *result[2];
2463       for (unsigned i = 0; i < intr->num_components; i++) {
2464          nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, nir_i2iN(b, offset, nir_dest_bit_size(deref_struct->dest)));
2465          result[i] = nir_load_deref(b, deref_arr);
2466          if (intr->intrinsic == nir_intrinsic_load_ssbo)
2467             nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr));
2468          offset = nir_iadd_imm(b, offset, 1);
2469       }
2470       nir_ssa_def *load = nir_vec(b, result, intr->num_components);
2471       nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
2472    } else {
2473       nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, nir_i2iN(b, offset, nir_dest_bit_size(deref_struct->dest)));
2474       nir_build_store_deref(b, &deref_arr->dest.ssa, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr));
2475    }
2476    nir_instr_remove(instr);
2477    return true;
2478 }
2479
2480 static bool
2481 remove_bo_access(nir_shader *shader, struct zink_shader *zs)
2482 {
2483    struct bo_vars bo = get_bo_vars(zs, shader);
2484    return nir_shader_instructions_pass(shader, remove_bo_access_instr, nir_metadata_dominance, &bo);
2485 }
2486
2487 static bool
2488 find_var_deref(nir_shader *nir, nir_variable *var)
2489 {
2490    nir_foreach_function(function, nir) {
2491       if (!function->impl)
2492          continue;
2493
2494       nir_foreach_block(block, function->impl) {
2495          nir_foreach_instr(instr, block) {
2496             if (instr->type != nir_instr_type_deref)
2497                continue;
2498             nir_deref_instr *deref = nir_instr_as_deref(instr);
2499             if (deref->deref_type == nir_deref_type_var && deref->var == var)
2500                return true;
2501          }
2502       }
2503    }
2504    return false;
2505 }
2506
2507 struct clamp_layer_output_state {
2508    nir_variable *original;
2509    nir_variable *clamped;
2510 };
2511
2512 static void
2513 clamp_layer_output_emit(nir_builder *b, struct clamp_layer_output_state *state)
2514 {
2515    nir_ssa_def *is_layered = nir_load_push_constant(b, 1, 32,
2516                                                     nir_imm_int(b, ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED),
2517                                                     .base = ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, .range = 4);
2518    nir_deref_instr *original_deref = nir_build_deref_var(b, state->original);
2519    nir_deref_instr *clamped_deref = nir_build_deref_var(b, state->clamped);
2520    nir_ssa_def *layer = nir_bcsel(b, nir_ieq_imm(b, is_layered, 1),
2521                                   nir_load_deref(b, original_deref),
2522                                   nir_imm_int(b, 0));
2523    nir_store_deref(b, clamped_deref, layer, 0);
2524 }
2525
2526 static bool
2527 clamp_layer_output_instr(nir_builder *b, nir_instr *instr, void *data)
2528 {
2529    struct clamp_layer_output_state *state = data;
2530    switch (instr->type) {
2531    case nir_instr_type_intrinsic: {
2532       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2533       if (intr->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
2534           intr->intrinsic != nir_intrinsic_emit_vertex)
2535          return false;
2536       b->cursor = nir_before_instr(instr);
2537       clamp_layer_output_emit(b, state);
2538       return true;
2539    }
2540    default: return false;
2541    }
2542 }
2543
2544 static bool
2545 clamp_layer_output(nir_shader *vs, nir_shader *fs, unsigned *next_location)
2546 {
2547    switch (vs->info.stage) {
2548    case MESA_SHADER_VERTEX:
2549    case MESA_SHADER_GEOMETRY:
2550    case MESA_SHADER_TESS_EVAL:
2551       break;
2552    default:
2553       unreachable("invalid last vertex stage!");
2554    }
2555    struct clamp_layer_output_state state = {0};
2556    state.original = nir_find_variable_with_location(vs, nir_var_shader_out, VARYING_SLOT_LAYER);
2557    if (!state.original || !find_var_deref(vs, state.original))
2558       return false;
2559    state.clamped = nir_variable_create(vs, nir_var_shader_out, glsl_int_type(), "layer_clamped");
2560    state.clamped->data.location = VARYING_SLOT_LAYER;
2561    nir_variable *fs_var = nir_find_variable_with_location(fs, nir_var_shader_in, VARYING_SLOT_LAYER);
2562    if ((state.original->data.explicit_xfb_buffer || fs_var) && *next_location < MAX_VARYING) {
2563       state.original->data.location = VARYING_SLOT_VAR0; // Anything but a built-in slot
2564       state.original->data.driver_location = (*next_location)++;
2565       if (fs_var) {
2566          fs_var->data.location = state.original->data.location;
2567          fs_var->data.driver_location = state.original->data.driver_location;
2568       }
2569    } else {
2570       if (state.original->data.explicit_xfb_buffer) {
2571          /* Will xfb the clamped output but still better than nothing */
2572          state.clamped->data.explicit_xfb_buffer = state.original->data.explicit_xfb_buffer;
2573          state.clamped->data.xfb.buffer = state.original->data.xfb.buffer;
2574          state.clamped->data.xfb.stride = state.original->data.xfb.stride;
2575          state.clamped->data.offset = state.original->data.offset;
2576          state.clamped->data.stream = state.original->data.stream;
2577       }
2578       state.original->data.mode = nir_var_shader_temp;
2579       nir_fixup_deref_modes(vs);
2580    }
2581    if (vs->info.stage == MESA_SHADER_GEOMETRY) {
2582       nir_shader_instructions_pass(vs, clamp_layer_output_instr, nir_metadata_dominance, &state);
2583    } else {
2584       nir_builder b;
2585       nir_function_impl *impl = nir_shader_get_entrypoint(vs);
2586       nir_builder_init(&b, impl);
2587       assert(impl->end_block->predecessors->entries == 1);
2588       b.cursor = nir_after_cf_list(&impl->body);
2589       clamp_layer_output_emit(&b, &state);
2590       nir_metadata_preserve(impl, nir_metadata_dominance);
2591    }
2592    optimize_nir(vs, NULL);
2593    NIR_PASS_V(vs, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2594    return true;
2595 }
2596
2597 static void
2598 assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
2599 {
2600    unsigned slot = var->data.location;
2601    switch (slot) {
2602    case -1:
2603    case VARYING_SLOT_POS:
2604    case VARYING_SLOT_PNTC:
2605    case VARYING_SLOT_PSIZ:
2606    case VARYING_SLOT_LAYER:
2607    case VARYING_SLOT_PRIMITIVE_ID:
2608    case VARYING_SLOT_CLIP_DIST0:
2609    case VARYING_SLOT_CULL_DIST0:
2610    case VARYING_SLOT_VIEWPORT:
2611    case VARYING_SLOT_FACE:
2612    case VARYING_SLOT_TESS_LEVEL_OUTER:
2613    case VARYING_SLOT_TESS_LEVEL_INNER:
2614       /* use a sentinel value to avoid counting later */
2615       var->data.driver_location = UINT_MAX;
2616       break;
2617
2618    default:
2619       if (var->data.patch) {
2620          assert(slot >= VARYING_SLOT_PATCH0);
2621          slot -= VARYING_SLOT_PATCH0;
2622       }
2623       if (slot_map[slot] == 0xff) {
2624          assert(*reserved < MAX_VARYING);
2625          unsigned num_slots;
2626          if (nir_is_arrayed_io(var, stage))
2627             num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
2628          else
2629             num_slots = glsl_count_vec4_slots(var->type, false, false);
2630          assert(*reserved + num_slots <= MAX_VARYING);
2631          for (unsigned i = 0; i < num_slots; i++)
2632             slot_map[slot + i] = (*reserved)++;
2633       }
2634       slot = slot_map[slot];
2635       assert(slot < MAX_VARYING);
2636       var->data.driver_location = slot;
2637    }
2638 }
2639
2640 ALWAYS_INLINE static bool
2641 is_texcoord(gl_shader_stage stage, const nir_variable *var)
2642 {
2643    if (stage != MESA_SHADER_FRAGMENT)
2644       return false;
2645    return var->data.location >= VARYING_SLOT_TEX0 && 
2646           var->data.location <= VARYING_SLOT_TEX7;
2647 }
2648
2649 static bool
2650 assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
2651 {
2652    unsigned slot = var->data.location;
2653    switch (slot) {
2654    case VARYING_SLOT_POS:
2655    case VARYING_SLOT_PNTC:
2656    case VARYING_SLOT_PSIZ:
2657    case VARYING_SLOT_LAYER:
2658    case VARYING_SLOT_PRIMITIVE_ID:
2659    case VARYING_SLOT_CLIP_DIST0:
2660    case VARYING_SLOT_CULL_DIST0:
2661    case VARYING_SLOT_VIEWPORT:
2662    case VARYING_SLOT_FACE:
2663    case VARYING_SLOT_TESS_LEVEL_OUTER:
2664    case VARYING_SLOT_TESS_LEVEL_INNER:
2665       /* use a sentinel value to avoid counting later */
2666       var->data.driver_location = UINT_MAX;
2667       break;
2668    default:
2669       if (var->data.patch) {
2670          assert(slot >= VARYING_SLOT_PATCH0);
2671          slot -= VARYING_SLOT_PATCH0;
2672       }
2673       if (slot_map[slot] == (unsigned char)-1) {
2674          /* texcoords can't be eliminated in fs due to GL_COORD_REPLACE,
2675           * so keep for now and eliminate later
2676           */
2677          if (is_texcoord(stage, var)) {
2678             var->data.driver_location = -1;
2679             return true;
2680          }
2681          if (stage != MESA_SHADER_TESS_CTRL)
2682             /* dead io */
2683             return false;
2684          /* patch variables may be read in the workgroup */
2685          slot_map[slot] = (*reserved)++;
2686       }
2687       var->data.driver_location = slot_map[slot];
2688    }
2689    return true;
2690 }
2691
2692
2693 static bool
2694 rewrite_read_as_0(nir_builder *b, nir_instr *instr, void *data)
2695 {
2696    nir_variable *var = data;
2697    if (instr->type != nir_instr_type_intrinsic)
2698       return false;
2699
2700    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2701    if (intr->intrinsic != nir_intrinsic_load_deref)
2702       return false;
2703    nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
2704    if (deref_var != var)
2705       return false;
2706    b->cursor = nir_before_instr(instr);
2707    nir_ssa_def *zero = nir_imm_zero(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest));
2708    if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
2709       switch (var->data.location) {
2710       case VARYING_SLOT_COL0:
2711       case VARYING_SLOT_COL1:
2712       case VARYING_SLOT_BFC0:
2713       case VARYING_SLOT_BFC1:
2714          /* default color is 0,0,0,1 */
2715          if (nir_dest_num_components(intr->dest) == 4)
2716             zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
2717          break;
2718       default:
2719          break;
2720       }
2721    }
2722    nir_ssa_def_rewrite_uses(&intr->dest.ssa, zero);
2723    nir_instr_remove(instr);
2724    return true;
2725 }
2726
2727 void
2728 zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_shader *consumer)
2729 {
2730    unsigned reserved = 0;
2731    unsigned char slot_map[VARYING_SLOT_MAX];
2732    memset(slot_map, -1, sizeof(slot_map));
2733    bool do_fixup = false;
2734    nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
2735    if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
2736       /* remove injected pointsize from all but the last vertex stage */
2737       nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
2738       if (var && !var->data.explicit_location) {
2739          var->data.mode = nir_var_shader_temp;
2740          nir_fixup_deref_modes(producer);
2741          NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2742          optimize_nir(producer, NULL);
2743       }
2744    }
2745    if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
2746       /* never assign from tcs -> tes, always invert */
2747       nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in)
2748          assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map);
2749       nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) {
2750          if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map))
2751             /* this is an output, nothing more needs to be done for it to be dropped */
2752             do_fixup = true;
2753       }
2754    } else {
2755       nir_foreach_variable_with_modes(var, producer, nir_var_shader_out)
2756          assign_producer_var_io(producer->info.stage, var, &reserved, slot_map);
2757       nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) {
2758          if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) {
2759             do_fixup = true;
2760             /* input needs to be rewritten */
2761             nir_shader_instructions_pass(consumer, rewrite_read_as_0, nir_metadata_dominance, var);
2762          }
2763       }
2764       if (consumer->info.stage == MESA_SHADER_FRAGMENT && screen->driver_workarounds.needs_sanitised_layer)
2765          do_fixup |= clamp_layer_output(producer, consumer, &reserved);
2766    }
2767    if (!do_fixup)
2768       return;
2769    nir_fixup_deref_modes(nir);
2770    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2771    optimize_nir(nir, NULL);
2772 }
2773
2774 /* all types that hit this function contain something that is 64bit */
2775 static const struct glsl_type *
2776 rewrite_64bit_type(nir_shader *nir, const struct glsl_type *type, nir_variable *var, bool doubles_only)
2777 {
2778    if (glsl_type_is_array(type)) {
2779       const struct glsl_type *child = glsl_get_array_element(type);
2780       unsigned elements = glsl_array_size(type);
2781       unsigned stride = glsl_get_explicit_stride(type);
2782       return glsl_array_type(rewrite_64bit_type(nir, child, var, doubles_only), elements, stride);
2783    }
2784    /* rewrite structs recursively */
2785    if (glsl_type_is_struct_or_ifc(type)) {
2786       unsigned nmembers = glsl_get_length(type);
2787       struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, nmembers * 2);
2788       unsigned xfb_offset = 0;
2789       for (unsigned i = 0; i < nmembers; i++) {
2790          const struct glsl_struct_field *f = glsl_get_struct_field_data(type, i);
2791          fields[i] = *f;
2792          xfb_offset += glsl_get_component_slots(fields[i].type) * 4;
2793          if (i < nmembers - 1 && xfb_offset % 8 &&
2794              (glsl_contains_double(glsl_get_struct_field(type, i + 1)) ||
2795               (glsl_type_contains_64bit(glsl_get_struct_field(type, i + 1)) && !doubles_only))) {
2796             var->data.is_xfb = true;
2797          }
2798          fields[i].type = rewrite_64bit_type(nir, f->type, var, doubles_only);
2799       }
2800       return glsl_struct_type(fields, nmembers, glsl_get_type_name(type), glsl_struct_type_is_packed(type));
2801    }
2802    if (!glsl_type_is_64bit(type) || (!glsl_contains_double(type) && doubles_only))
2803       return type;
2804    if (doubles_only && glsl_type_is_vector_or_scalar(type))
2805       return glsl_vector_type(GLSL_TYPE_UINT64, glsl_get_vector_elements(type));
2806    enum glsl_base_type base_type;
2807    switch (glsl_get_base_type(type)) {
2808    case GLSL_TYPE_UINT64:
2809       base_type = GLSL_TYPE_UINT;
2810       break;
2811    case GLSL_TYPE_INT64:
2812       base_type = GLSL_TYPE_INT;
2813       break;
2814    case GLSL_TYPE_DOUBLE:
2815       base_type = GLSL_TYPE_FLOAT;
2816       break;
2817    default:
2818       unreachable("unknown 64-bit vertex attribute format!");
2819    }
2820    if (glsl_type_is_scalar(type))
2821       return glsl_vector_type(base_type, 2);
2822    unsigned num_components;
2823    if (glsl_type_is_matrix(type)) {
2824       /* align to vec4 size: dvec3-composed arrays are arrays of dvec3s */
2825       unsigned vec_components = glsl_get_vector_elements(type);
2826       if (vec_components == 3)
2827          vec_components = 4;
2828       num_components = vec_components * 2 * glsl_get_matrix_columns(type);
2829    } else {
2830       num_components = glsl_get_vector_elements(type) * 2;
2831       if (num_components <= 4)
2832          return glsl_vector_type(base_type, num_components);
2833    }
2834    /* dvec3/dvec4/dmatX: rewrite as struct { vec4, vec4, vec4, ... [vec2] } */
2835    struct glsl_struct_field fields[8] = {0};
2836    unsigned remaining = num_components;
2837    unsigned nfields = 0;
2838    for (unsigned i = 0; remaining; i++, remaining -= MIN2(4, remaining), nfields++) {
2839       assert(i < ARRAY_SIZE(fields));
2840       fields[i].name = "";
2841       fields[i].offset = i * 16;
2842       fields[i].type = glsl_vector_type(base_type, MIN2(4, remaining));
2843    }
2844    char buf[64];
2845    snprintf(buf, sizeof(buf), "struct(%s)", glsl_get_type_name(type));
2846    return glsl_struct_type(fields, nfields, buf, true);
2847 }
2848
2849 static const struct glsl_type *
2850 deref_is_matrix(nir_deref_instr *deref)
2851 {
2852    if (glsl_type_is_matrix(deref->type))
2853       return deref->type;
2854    nir_deref_instr *parent = nir_deref_instr_parent(deref);
2855    if (parent)
2856       return deref_is_matrix(parent);
2857    return NULL;
2858 }
2859
2860 static bool
2861 lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variable *var,
2862                           struct hash_table *derefs, struct set *deletes, bool doubles_only)
2863 {
2864    bool func_progress = false;
2865    if (!function->impl)
2866       return false;
2867    nir_builder b;
2868    nir_builder_init(&b, function->impl);
2869    nir_foreach_block(block, function->impl) {
2870       nir_foreach_instr_safe(instr, block) {
2871          switch (instr->type) {
2872          case nir_instr_type_deref: {
2873             nir_deref_instr *deref = nir_instr_as_deref(instr);
2874             if (!(deref->modes & var->data.mode))
2875                continue;
2876             if (nir_deref_instr_get_variable(deref) != var)
2877                continue;
2878
2879             /* matrix types are special: store the original deref type for later use */
2880             const struct glsl_type *matrix = deref_is_matrix(deref);
2881             nir_deref_instr *parent = nir_deref_instr_parent(deref);
2882             if (!matrix) {
2883                /* if this isn't a direct matrix deref, it's maybe a matrix row deref */
2884                hash_table_foreach(derefs, he) {
2885                   /* propagate parent matrix type to row deref */
2886                   if (he->key == parent)
2887                      matrix = he->data;
2888                }
2889             }
2890             if (matrix)
2891                _mesa_hash_table_insert(derefs, deref, (void*)matrix);
2892             if (deref->deref_type == nir_deref_type_var)
2893                deref->type = var->type;
2894             else
2895                deref->type = rewrite_64bit_type(shader, deref->type, var, doubles_only);
2896          }
2897          break;
2898          case nir_instr_type_intrinsic: {
2899             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2900             if (intr->intrinsic != nir_intrinsic_store_deref &&
2901                   intr->intrinsic != nir_intrinsic_load_deref)
2902                break;
2903             if (nir_intrinsic_get_var(intr, 0) != var)
2904                break;
2905             if ((intr->intrinsic == nir_intrinsic_store_deref && intr->src[1].ssa->bit_size != 64) ||
2906                   (intr->intrinsic == nir_intrinsic_load_deref && intr->dest.ssa.bit_size != 64))
2907                break;
2908             b.cursor = nir_before_instr(instr);
2909             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2910             unsigned num_components = intr->num_components * 2;
2911             nir_ssa_def *comp[NIR_MAX_VEC_COMPONENTS];
2912             /* this is the stored matrix type from the deref */
2913             struct hash_entry *he = _mesa_hash_table_search(derefs, deref);
2914             const struct glsl_type *matrix = he ? he->data : NULL;
2915             if (doubles_only && !matrix)
2916                break;
2917             func_progress = true;
2918             if (intr->intrinsic == nir_intrinsic_store_deref) {
2919                /* first, unpack the src data to 32bit vec2 components */
2920                for (unsigned i = 0; i < intr->num_components; i++) {
2921                   nir_ssa_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i));
2922                   comp[i * 2] = nir_channel(&b, ssa, 0);
2923                   comp[i * 2 + 1] = nir_channel(&b, ssa, 1);
2924                }
2925                unsigned wrmask = nir_intrinsic_write_mask(intr);
2926                unsigned mask = 0;
2927                /* expand writemask for doubled components */
2928                for (unsigned i = 0; i < intr->num_components; i++) {
2929                   if (wrmask & BITFIELD_BIT(i))
2930                      mask |= BITFIELD_BIT(i * 2) | BITFIELD_BIT(i * 2 + 1);
2931                }
2932                if (matrix) {
2933                   /* matrix types always come from array (row) derefs */
2934                   assert(deref->deref_type == nir_deref_type_array);
2935                   nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
2936                   /* let optimization clean up consts later */
2937                   nir_ssa_def *index = deref->arr.index.ssa;
2938                   /* this might be an indirect array index:
2939                      * - iterate over matrix columns
2940                      * - add if blocks for each column
2941                      * - perform the store in the block
2942                      */
2943                   for (unsigned idx = 0; idx < glsl_get_matrix_columns(matrix); idx++) {
2944                      nir_push_if(&b, nir_ieq_imm(&b, index, idx));
2945                      unsigned vec_components = glsl_get_vector_elements(matrix);
2946                      /* always clamp dvec3 to 4 components */
2947                      if (vec_components == 3)
2948                         vec_components = 4;
2949                      unsigned start_component = idx * vec_components * 2;
2950                      /* struct member */
2951                      unsigned member = start_component / 4;
2952                      /* number of components remaining */
2953                      unsigned remaining = num_components;
2954                      for (unsigned i = 0; i < num_components; member++) {
2955                         if (!(mask & BITFIELD_BIT(i)))
2956                            continue;
2957                         assert(member < glsl_get_length(var_deref->type));
2958                         /* deref the rewritten struct to the appropriate vec4/vec2 */
2959                         nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
2960                         unsigned incr = MIN2(remaining, 4);
2961                         /* assemble the write component vec */
2962                         nir_ssa_def *val = nir_vec(&b, &comp[i], incr);
2963                         /* use the number of components being written as the writemask */
2964                         if (glsl_get_vector_elements(strct->type) > val->num_components)
2965                            val = nir_pad_vector(&b, val, glsl_get_vector_elements(strct->type));
2966                         nir_store_deref(&b, strct, val, BITFIELD_MASK(incr));
2967                         remaining -= incr;
2968                         i += incr;
2969                      }
2970                      nir_pop_if(&b, NULL);
2971                   }
2972                   _mesa_set_add(deletes, &deref->instr);
2973                } else if (num_components <= 4) {
2974                   /* simple store case: just write out the components */
2975                   nir_ssa_def *dest = nir_vec(&b, comp, num_components);
2976                   nir_store_deref(&b, deref, dest, mask);
2977                } else {
2978                   /* writing > 4 components: access the struct and write to the appropriate vec4 members */
2979                   for (unsigned i = 0; num_components; i++, num_components -= MIN2(num_components, 4)) {
2980                      if (!(mask & BITFIELD_MASK(4)))
2981                         continue;
2982                      nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
2983                      nir_ssa_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4));
2984                      if (glsl_get_vector_elements(strct->type) > dest->num_components)
2985                         dest = nir_pad_vector(&b, dest, glsl_get_vector_elements(strct->type));
2986                      nir_store_deref(&b, strct, dest, mask & BITFIELD_MASK(4));
2987                      mask >>= 4;
2988                   }
2989                }
2990             } else {
2991                nir_ssa_def *dest = NULL;
2992                if (matrix) {
2993                   /* matrix types always come from array (row) derefs */
2994                   assert(deref->deref_type == nir_deref_type_array);
2995                   nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
2996                   /* let optimization clean up consts later */
2997                   nir_ssa_def *index = deref->arr.index.ssa;
2998                   /* this might be an indirect array index:
2999                      * - iterate over matrix columns
3000                      * - add if blocks for each column
3001                      * - phi the loads using the array index
3002                      */
3003                   unsigned cols = glsl_get_matrix_columns(matrix);
3004                   nir_ssa_def *dests[4];
3005                   for (unsigned idx = 0; idx < cols; idx++) {
3006                      /* don't add an if for the final row: this will be handled in the else */
3007                      if (idx < cols - 1)
3008                         nir_push_if(&b, nir_ieq_imm(&b, index, idx));
3009                      unsigned vec_components = glsl_get_vector_elements(matrix);
3010                      /* always clamp dvec3 to 4 components */
3011                      if (vec_components == 3)
3012                         vec_components = 4;
3013                      unsigned start_component = idx * vec_components * 2;
3014                      /* struct member */
3015                      unsigned member = start_component / 4;
3016                      /* number of components remaining */
3017                      unsigned remaining = num_components;
3018                      /* component index */
3019                      unsigned comp_idx = 0;
3020                      for (unsigned i = 0; i < num_components; member++) {
3021                         assert(member < glsl_get_length(var_deref->type));
3022                         nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
3023                         nir_ssa_def *load = nir_load_deref(&b, strct);
3024                         unsigned incr = MIN2(remaining, 4);
3025                         /* repack the loads to 64bit */
3026                         for (unsigned c = 0; c < incr / 2; c++, comp_idx++)
3027                            comp[comp_idx] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(c * 2, 2)));
3028                         remaining -= incr;
3029                         i += incr;
3030                      }
3031                      dest = dests[idx] = nir_vec(&b, comp, intr->num_components);
3032                      if (idx < cols - 1)
3033                         nir_push_else(&b, NULL);
3034                   }
3035                   /* loop over all the if blocks that were made, pop them, and phi the loaded+packed results */
3036                   for (unsigned idx = cols - 1; idx >= 1; idx--) {
3037                      nir_pop_if(&b, NULL);
3038                      dest = nir_if_phi(&b, dests[idx - 1], dest);
3039                   }
3040                   _mesa_set_add(deletes, &deref->instr);
3041                } else if (num_components <= 4) {
3042                   /* simple load case */
3043                   nir_ssa_def *load = nir_load_deref(&b, deref);
3044                   /* pack 32bit loads into 64bit: this will automagically get optimized out later */
3045                   for (unsigned i = 0; i < intr->num_components; i++) {
3046                      comp[i] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(i * 2, 2)));
3047                   }
3048                   dest = nir_vec(&b, comp, intr->num_components);
3049                } else {
3050                   /* writing > 4 components: access the struct and load the appropriate vec4 members */
3051                   for (unsigned i = 0; i < 2; i++, num_components -= 4) {
3052                      nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
3053                      nir_ssa_def *load = nir_load_deref(&b, strct);
3054                      comp[i * 2] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_MASK(2)));
3055                      if (num_components > 2)
3056                         comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2)));
3057                   }
3058                   dest = nir_vec(&b, comp, intr->num_components);
3059                }
3060                nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, dest, instr);
3061             }
3062             _mesa_set_add(deletes, instr);
3063             break;
3064          }
3065          break;
3066          default: break;
3067          }
3068       }
3069    }
3070    if (func_progress)
3071       nir_metadata_preserve(function->impl, nir_metadata_none);
3072    /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */
3073    set_foreach_remove(deletes, he)
3074       nir_instr_remove((void*)he->key);
3075    return func_progress;
3076 }
3077
3078 static bool
3079 lower_64bit_vars_loop(nir_shader *shader, nir_variable *var, struct hash_table *derefs,
3080                       struct set *deletes, bool doubles_only)
3081 {
3082    if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3083       return false;
3084    var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3085    /* once type is rewritten, rewrite all loads and stores */
3086    nir_foreach_function(function, shader)
3087       lower_64bit_vars_function(shader, function, var, derefs, deletes, doubles_only);
3088    return true;
3089 }
3090
3091 /* rewrite all input/output variables using 32bit types and load/stores */
3092 static bool
3093 lower_64bit_vars(nir_shader *shader, bool doubles_only)
3094 {
3095    bool progress = false;
3096    struct hash_table *derefs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3097    struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3098    nir_foreach_variable_with_modes(var, shader, nir_var_shader_in | nir_var_shader_out)
3099       progress |= lower_64bit_vars_loop(shader, var, derefs, deletes, doubles_only);
3100    nir_foreach_function(function, shader) {
3101       nir_foreach_function_temp_variable(var, function->impl) {
3102          if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3103             continue;
3104          var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3105          progress |= lower_64bit_vars_function(shader, function, var, derefs, deletes, doubles_only);
3106       }
3107    }
3108    ralloc_free(deletes);
3109    ralloc_free(derefs);
3110    if (progress) {
3111       nir_lower_alu_to_scalar(shader, filter_64_bit_instr, NULL);
3112       nir_lower_phis_to_scalar(shader, false);
3113       optimize_nir(shader, NULL);
3114    }
3115    return progress;
3116 }
3117
3118 static bool
3119 split_blocks(nir_shader *nir)
3120 {
3121    bool progress = false;
3122    bool changed = true;
3123    do {
3124       progress = false;
3125       nir_foreach_shader_out_variable(var, nir) {
3126          const struct glsl_type *base_type = glsl_without_array(var->type);
3127          nir_variable *members[32]; //can't have more than this without breaking NIR
3128          if (!glsl_type_is_struct(base_type))
3129             continue;
3130          /* TODO: arrays? */
3131          if (!glsl_type_is_struct(var->type) || glsl_get_length(var->type) == 1)
3132             continue;
3133          if (glsl_count_attribute_slots(var->type, false) == 1)
3134             continue;
3135          unsigned offset = 0;
3136          for (unsigned i = 0; i < glsl_get_length(var->type); i++) {
3137             members[i] = nir_variable_clone(var, nir);
3138             members[i]->type = glsl_get_struct_field(var->type, i);
3139             members[i]->name = (void*)glsl_get_struct_elem_name(var->type, i);
3140             members[i]->data.location += offset;
3141             offset += glsl_count_attribute_slots(members[i]->type, false);
3142             nir_shader_add_variable(nir, members[i]);
3143          }
3144          nir_foreach_function(function, nir) {
3145             bool func_progress = false;
3146             if (!function->impl)
3147                continue;
3148             nir_builder b;
3149             nir_builder_init(&b, function->impl);
3150             nir_foreach_block(block, function->impl) {
3151                nir_foreach_instr_safe(instr, block) {
3152                   switch (instr->type) {
3153                   case nir_instr_type_deref: {
3154                   nir_deref_instr *deref = nir_instr_as_deref(instr);
3155                   if (!(deref->modes & nir_var_shader_out))
3156                      continue;
3157                   if (nir_deref_instr_get_variable(deref) != var)
3158                      continue;
3159                   if (deref->deref_type != nir_deref_type_struct)
3160                      continue;
3161                   nir_deref_instr *parent = nir_deref_instr_parent(deref);
3162                   if (parent->deref_type != nir_deref_type_var)
3163                      continue;
3164                   deref->modes = nir_var_shader_temp;
3165                   parent->modes = nir_var_shader_temp;
3166                   b.cursor = nir_before_instr(instr);
3167                   nir_ssa_def *dest = &nir_build_deref_var(&b, members[deref->strct.index])->dest.ssa;
3168                   nir_ssa_def_rewrite_uses_after(&deref->dest.ssa, dest, &deref->instr);
3169                   nir_instr_remove(&deref->instr);
3170                   func_progress = true;
3171                   break;
3172                   }
3173                   default: break;
3174                   }
3175                }
3176             }
3177             if (func_progress)
3178                nir_metadata_preserve(function->impl, nir_metadata_none);
3179          }
3180          var->data.mode = nir_var_shader_temp;
3181          changed = true;
3182          progress = true;
3183       }
3184    } while (progress);
3185    return changed;
3186 }
3187
3188 static void
3189 zink_shader_dump(void *words, size_t size, const char *file)
3190 {
3191    FILE *fp = fopen(file, "wb");
3192    if (fp) {
3193       fwrite(words, 1, size, fp);
3194       fclose(fp);
3195       fprintf(stderr, "wrote '%s'...\n", file);
3196    }
3197 }
3198
3199 static struct zink_shader_object
3200 zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool separate)
3201 {
3202    VkShaderModuleCreateInfo smci = {0};
3203    VkShaderCreateInfoEXT sci = {0};
3204
3205    if (!spirv)
3206       spirv = zs->spirv;
3207
3208    if (zink_debug & ZINK_DEBUG_SPIRV) {
3209       char buf[256];
3210       static int i;
3211       snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
3212       zink_shader_dump(spirv->words, spirv->num_words * sizeof(uint32_t), buf);
3213    }
3214
3215    sci.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT;
3216    sci.stage = mesa_to_vk_shader_stage(zs->info.stage);
3217    if (sci.stage != VK_SHADER_STAGE_FRAGMENT_BIT)
3218       sci.nextStage = VK_SHADER_STAGE_FRAGMENT_BIT;
3219    sci.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT;
3220    sci.codeSize = spirv->num_words * sizeof(uint32_t);
3221    sci.pCode = spirv->words;
3222    sci.pName = "main";
3223    sci.setLayoutCount = 2;
3224    VkDescriptorSetLayout dsl[2] = {0};
3225    dsl[zs->info.stage == MESA_SHADER_FRAGMENT] = zs->precompile.dsl;
3226    sci.pSetLayouts = dsl;
3227    VkPushConstantRange pcr;
3228    pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
3229    pcr.offset = 0;
3230    pcr.size = sizeof(struct zink_gfx_push_constant);
3231    sci.pushConstantRangeCount = 1;
3232    sci.pPushConstantRanges = &pcr;
3233
3234    smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
3235    smci.codeSize = spirv->num_words * sizeof(uint32_t);
3236    smci.pCode = spirv->words;
3237
3238 #ifndef NDEBUG
3239    if (zink_debug & ZINK_DEBUG_VALIDATION) {
3240       static const struct spirv_to_nir_options spirv_options = {
3241          .environment = NIR_SPIRV_VULKAN,
3242          .caps = {
3243             .float64 = true,
3244             .int16 = true,
3245             .int64 = true,
3246             .tessellation = true,
3247             .float_controls = true,
3248             .image_ms_array = true,
3249             .image_read_without_format = true,
3250             .image_write_without_format = true,
3251             .storage_image_ms = true,
3252             .geometry_streams = true,
3253             .storage_8bit = true,
3254             .storage_16bit = true,
3255             .variable_pointers = true,
3256             .stencil_export = true,
3257             .post_depth_coverage = true,
3258             .transform_feedback = true,
3259             .device_group = true,
3260             .draw_parameters = true,
3261             .shader_viewport_index_layer = true,
3262             .multiview = true,
3263             .physical_storage_buffer_address = true,
3264             .int64_atomics = true,
3265             .subgroup_arithmetic = true,
3266             .subgroup_basic = true,
3267             .subgroup_ballot = true,
3268             .subgroup_quad = true,
3269             .subgroup_shuffle = true,
3270             .subgroup_vote = true,
3271             .vk_memory_model = true,
3272             .vk_memory_model_device_scope = true,
3273             .int8 = true,
3274             .float16 = true,
3275             .demote_to_helper_invocation = true,
3276             .sparse_residency = true,
3277             .min_lod = true,
3278          },
3279          .ubo_addr_format = nir_address_format_32bit_index_offset,
3280          .ssbo_addr_format = nir_address_format_32bit_index_offset,
3281          .phys_ssbo_addr_format = nir_address_format_64bit_global,
3282          .push_const_addr_format = nir_address_format_logical,
3283          .shared_addr_format = nir_address_format_32bit_offset,
3284       };
3285       uint32_t num_spec_entries = 0;
3286       struct nir_spirv_specialization *spec_entries = NULL;
3287       VkSpecializationInfo sinfo = {0};
3288       VkSpecializationMapEntry me[3];
3289       uint32_t size[3] = {1,1,1};
3290       if (!zs->info.workgroup_size[0]) {
3291          sinfo.mapEntryCount = 3;
3292          sinfo.pMapEntries = &me[0];
3293          sinfo.dataSize = sizeof(uint32_t) * 3;
3294          sinfo.pData = size;
3295          uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z};
3296          for (int i = 0; i < 3; i++) {
3297             me[i].size = sizeof(uint32_t);
3298             me[i].constantID = ids[i];
3299             me[i].offset = i * sizeof(uint32_t);
3300          }
3301          spec_entries = vk_spec_info_to_nir_spirv(&sinfo, &num_spec_entries);
3302       }
3303       nir_shader *nir = spirv_to_nir(spirv->words, spirv->num_words,
3304                          spec_entries, num_spec_entries,
3305                          clamp_stage(&zs->info), "main", &spirv_options, &screen->nir_options);
3306       assert(nir);
3307       ralloc_free(nir);
3308       free(spec_entries);
3309    }
3310 #endif
3311
3312    VkResult ret;
3313    struct zink_shader_object obj;
3314    if (!separate || !screen->info.have_EXT_shader_object)
3315       ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &obj.mod);
3316    else
3317       ret = VKSCR(CreateShadersEXT)(screen->dev, 1, &sci, NULL, &obj.obj);
3318    bool success = zink_screen_handle_vkresult(screen, ret);
3319    assert(success);
3320    return obj;
3321 }
3322
3323 static void
3324 prune_io(nir_shader *nir)
3325 {
3326    nir_foreach_shader_in_variable_safe(var, nir) {
3327       if (!find_var_deref(nir, var))
3328          var->data.mode = nir_var_shader_temp;
3329    }
3330    nir_foreach_shader_out_variable_safe(var, nir) {
3331       if (!find_var_deref(nir, var))
3332          var->data.mode = nir_var_shader_temp;
3333    }
3334    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3335 }
3336
3337 static void
3338 flag_shadow_tex(nir_variable *var, struct zink_shader *zs)
3339 {
3340    /* unconvert from zink_binding() */
3341    uint32_t sampler_id = var->data.binding - (PIPE_MAX_SAMPLERS * MESA_SHADER_FRAGMENT);
3342    assert(sampler_id < 32); //bitfield size for tracking
3343    zs->fs.legacy_shadow_mask |= BITFIELD_BIT(sampler_id);
3344 }
3345
3346 static nir_ssa_def *
3347 rewrite_tex_dest(nir_builder *b, nir_tex_instr *tex, nir_variable *var, void *data)
3348 {
3349    assert(var);
3350    const struct glsl_type *type = glsl_without_array(var->type);
3351    enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3352    bool is_int = glsl_base_type_is_integer(ret_type);
3353    unsigned bit_size = glsl_base_type_get_bit_size(ret_type);
3354    unsigned dest_size = nir_dest_bit_size(tex->dest);
3355    b->cursor = nir_after_instr(&tex->instr);
3356    unsigned num_components = nir_dest_num_components(tex->dest);
3357    bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse;
3358    if (bit_size == dest_size && !rewrite_depth)
3359       return NULL;
3360    nir_ssa_def *dest = &tex->dest.ssa;
3361    if (rewrite_depth && data) {
3362       if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
3363          flag_shadow_tex(var, data);
3364       else
3365          mesa_loge("unhandled old-style shadow sampler in non-fragment stage!");
3366       return NULL;
3367    }
3368    if (bit_size != dest_size) {
3369       tex->dest.ssa.bit_size = bit_size;
3370       tex->dest_type = nir_get_nir_type_for_glsl_base_type(ret_type);
3371
3372       if (is_int) {
3373          if (glsl_unsigned_base_type_of(ret_type) == ret_type)
3374             dest = nir_u2uN(b, &tex->dest.ssa, dest_size);
3375          else
3376             dest = nir_i2iN(b, &tex->dest.ssa, dest_size);
3377       } else {
3378          dest = nir_f2fN(b, &tex->dest.ssa, dest_size);
3379       }
3380       if (rewrite_depth)
3381          return dest;
3382       nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dest, dest->parent_instr);
3383    } else if (rewrite_depth) {
3384       return dest;
3385    }
3386    return dest;
3387 }
3388
3389 struct lower_zs_swizzle_state {
3390    bool shadow_only;
3391    unsigned base_sampler_id;
3392    const struct zink_zs_swizzle_key *swizzle;
3393 };
3394
3395 static bool
3396 lower_zs_swizzle_tex_instr(nir_builder *b, nir_instr *instr, void *data)
3397 {
3398    struct lower_zs_swizzle_state *state = data;
3399    const struct zink_zs_swizzle_key *swizzle_key = state->swizzle;
3400    assert(state->shadow_only || swizzle_key);
3401    if (instr->type != nir_instr_type_tex)
3402       return false;
3403    nir_tex_instr *tex = nir_instr_as_tex(instr);
3404    if (tex->op == nir_texop_txs || tex->op == nir_texop_lod ||
3405        (!tex->is_shadow && state->shadow_only) || tex->is_new_style_shadow)
3406       return false;
3407    if (tex->is_shadow && tex->op == nir_texop_tg4)
3408       /* Will not even try to emulate the shadow comparison */
3409       return false;
3410    int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
3411    nir_variable *var = NULL;
3412    if (handle != -1)
3413       /* gtfo bindless depth texture mode */
3414       return false;
3415    nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
3416       if (glsl_type_is_sampler(glsl_without_array(img->type))) {
3417          unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
3418          if (tex->texture_index >= img->data.driver_location &&
3419                tex->texture_index < img->data.driver_location + size) {
3420             var = img;
3421             break;
3422          }
3423       }
3424    }
3425    assert(var);
3426    uint32_t sampler_id = var->data.binding - state->base_sampler_id;
3427    const struct glsl_type *type = glsl_without_array(var->type);
3428    enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3429    bool is_int = glsl_base_type_is_integer(ret_type);
3430    unsigned num_components = nir_dest_num_components(tex->dest);
3431    if (tex->is_shadow)
3432       tex->is_new_style_shadow = true;
3433    nir_ssa_def *dest = rewrite_tex_dest(b, tex, var, NULL);
3434    assert(dest || !state->shadow_only);
3435    if (!dest && !(swizzle_key->mask & BITFIELD_BIT(sampler_id)))
3436       return false;
3437    else if (!dest)
3438       dest = &tex->dest.ssa;
3439    else
3440       tex->dest.ssa.num_components = 1;
3441    if (swizzle_key && (swizzle_key->mask & BITFIELD_BIT(sampler_id))) {
3442       /* these require manual swizzles */
3443       if (tex->op == nir_texop_tg4) {
3444          assert(!tex->is_shadow);
3445          nir_ssa_def *swizzle;
3446          switch (swizzle_key->swizzle[sampler_id].s[tex->component]) {
3447          case PIPE_SWIZZLE_0:
3448             swizzle = nir_imm_zero(b, 4, nir_dest_bit_size(tex->dest));
3449             break;
3450          case PIPE_SWIZZLE_1:
3451             if (is_int)
3452                swizzle = nir_imm_intN_t(b, 4, nir_dest_bit_size(tex->dest));
3453             else
3454                swizzle = nir_imm_floatN_t(b, 4, nir_dest_bit_size(tex->dest));
3455             break;
3456          default:
3457             if (!tex->component)
3458                return false;
3459             tex->component = 0;
3460             return true;
3461          }
3462          nir_ssa_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3463          return true;
3464       }
3465       nir_ssa_def *vec[4];
3466       for (unsigned i = 0; i < ARRAY_SIZE(vec); i++) {
3467          switch (swizzle_key->swizzle[sampler_id].s[i]) {
3468          case PIPE_SWIZZLE_0:
3469             vec[i] = nir_imm_zero(b, 1, nir_dest_bit_size(tex->dest));
3470             break;
3471          case PIPE_SWIZZLE_1:
3472             if (is_int)
3473                vec[i] = nir_imm_intN_t(b, 1, nir_dest_bit_size(tex->dest));
3474             else
3475                vec[i] = nir_imm_floatN_t(b, 1, nir_dest_bit_size(tex->dest));
3476             break;
3477          default:
3478             vec[i] = dest->num_components == 1 ? dest : nir_channel(b, dest, i);
3479             break;
3480          }
3481       }
3482       nir_ssa_def *swizzle = nir_vec(b, vec, num_components);
3483       nir_ssa_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3484    } else {
3485       assert(tex->is_shadow);
3486       nir_ssa_def *vec[4] = {dest, dest, dest, dest};
3487       nir_ssa_def *splat = nir_vec(b, vec, num_components);
3488       nir_ssa_def_rewrite_uses_after(dest, splat, splat->parent_instr);
3489    }
3490    return true;
3491 }
3492
3493 static bool
3494 lower_zs_swizzle_tex(nir_shader *nir, const void *swizzle, bool shadow_only)
3495 {
3496    unsigned base_sampler_id = gl_shader_stage_is_compute(nir->info.stage) ? 0 : PIPE_MAX_SAMPLERS * nir->info.stage;
3497    struct lower_zs_swizzle_state state = {shadow_only, base_sampler_id, swizzle};
3498    return nir_shader_instructions_pass(nir, lower_zs_swizzle_tex_instr, nir_metadata_dominance | nir_metadata_block_index, (void*)&state);
3499 }
3500
3501 static bool
3502 invert_point_coord_instr(nir_builder *b, nir_instr *instr, void *data)
3503 {
3504    if (instr->type != nir_instr_type_intrinsic)
3505       return false;
3506    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3507    if (intr->intrinsic != nir_intrinsic_load_deref)
3508       return false;
3509    nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
3510    if (deref_var->data.location != VARYING_SLOT_PNTC)
3511       return false;
3512    b->cursor = nir_after_instr(instr);
3513    nir_ssa_def *def = nir_vec2(b, nir_channel(b, &intr->dest.ssa, 0),
3514                                   nir_fsub(b, nir_imm_float(b, 1.0), nir_channel(b, &intr->dest.ssa, 1)));
3515    nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
3516    return true;
3517 }
3518
3519 static bool
3520 invert_point_coord(nir_shader *nir)
3521 {
3522    if (!(nir->info.inputs_read & BITFIELD64_BIT(VARYING_SLOT_PNTC)))
3523       return false;
3524    return nir_shader_instructions_pass(nir, invert_point_coord_instr, nir_metadata_dominance, NULL);
3525 }
3526
3527 static struct zink_shader_object
3528 compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool separate)
3529 {
3530    struct zink_shader_info *sinfo = &zs->sinfo;
3531    prune_io(nir);
3532
3533    NIR_PASS_V(nir, nir_convert_from_ssa, true);
3534
3535    struct zink_shader_object obj;
3536    struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen->spirv_version);
3537    if (spirv)
3538       obj = zink_shader_spirv_compile(screen, zs, spirv, separate);
3539
3540    /* TODO: determine if there's any reason to cache spirv output? */
3541    if (zs->info.stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
3542       zs->spirv = spirv;
3543    else
3544       ralloc_free(spirv);
3545    return obj;
3546 }
3547
3548 VkShaderModule
3549 zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs,
3550                     nir_shader *nir, const struct zink_shader_key *key, const void *extra_data)
3551 {
3552    struct zink_shader_info *sinfo = &zs->sinfo;
3553    bool need_optimize = false;
3554    bool inlined_uniforms = false;
3555
3556    if (key) {
3557       if (key->inline_uniforms) {
3558          NIR_PASS_V(nir, nir_inline_uniforms,
3559                     nir->info.num_inlinable_uniforms,
3560                     key->base.inlined_uniform_values,
3561                     nir->info.inlinable_uniform_dw_offsets);
3562
3563          inlined_uniforms = true;
3564       }
3565
3566       /* TODO: use a separate mem ctx here for ralloc */
3567
3568       if (!screen->optimal_keys) {
3569          switch (zs->info.stage) {
3570          case MESA_SHADER_VERTEX: {
3571             uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
3572             const struct zink_vs_key *vs_key = zink_vs_key(key);
3573             switch (vs_key->size) {
3574             case 4:
3575                decomposed_attrs = vs_key->u32.decomposed_attrs;
3576                decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w;
3577                break;
3578             case 2:
3579                decomposed_attrs = vs_key->u16.decomposed_attrs;
3580                decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w;
3581                break;
3582             case 1:
3583                decomposed_attrs = vs_key->u8.decomposed_attrs;
3584                decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w;
3585                break;
3586             default: break;
3587             }
3588             if (decomposed_attrs || decomposed_attrs_without_w)
3589                NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
3590             break;
3591          }
3592
3593          case MESA_SHADER_GEOMETRY:
3594             if (zink_gs_key(key)->lower_line_stipple) {
3595                NIR_PASS_V(nir, lower_line_stipple_gs, zink_gs_key(key)->line_rectangular);
3596                NIR_PASS_V(nir, nir_lower_var_copies);
3597                need_optimize = true;
3598             }
3599
3600             if (zink_gs_key(key)->lower_line_smooth) {
3601                NIR_PASS_V(nir, lower_line_smooth_gs);
3602                NIR_PASS_V(nir, nir_lower_var_copies);
3603                need_optimize = true;
3604             }
3605
3606             if (zink_gs_key(key)->lower_gl_point) {
3607                NIR_PASS_V(nir, lower_gl_point_gs);
3608                need_optimize = true;
3609             }
3610
3611             if (zink_gs_key(key)->lower_pv_mode) {
3612                NIR_PASS_V(nir, lower_pv_mode_gs, zink_gs_key(key)->lower_pv_mode);
3613                need_optimize = true; //TODO verify that this is required
3614             }
3615             break;
3616
3617          default:
3618             break;
3619          }
3620       }
3621
3622       switch (zs->info.stage) {
3623       case MESA_SHADER_VERTEX:
3624       case MESA_SHADER_TESS_EVAL:
3625       case MESA_SHADER_GEOMETRY:
3626          if (zink_vs_key_base(key)->last_vertex_stage) {
3627             if (zs->sinfo.have_xfb)
3628                sinfo->last_vertex = true;
3629
3630             if (!zink_vs_key_base(key)->clip_halfz && !screen->info.have_EXT_depth_clip_control) {
3631                NIR_PASS_V(nir, nir_lower_clip_halfz);
3632             }
3633             if (zink_vs_key_base(key)->push_drawid) {
3634                NIR_PASS_V(nir, lower_drawid);
3635             }
3636          }
3637          if (zink_vs_key_base(key)->robust_access)
3638             NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3639          break;
3640       case MESA_SHADER_FRAGMENT:
3641          if (zink_fs_key(key)->lower_line_smooth) {
3642             NIR_PASS_V(nir, lower_line_smooth_fs,
3643                        zink_fs_key(key)->lower_line_stipple);
3644             need_optimize = true;
3645          } else if (zink_fs_key(key)->lower_line_stipple)
3646                NIR_PASS_V(nir, lower_line_stipple_fs);
3647
3648          if (zink_fs_key(key)->lower_point_smooth) {
3649             NIR_PASS_V(nir, nir_lower_point_smooth);
3650             NIR_PASS_V(nir, nir_lower_discard_if, nir_lower_discard_if_to_cf);
3651             nir->info.fs.uses_discard = true;
3652             need_optimize = true;
3653          }
3654
3655          if (zink_fs_key(key)->robust_access)
3656             NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3657
3658          if (!zink_fs_key_base(key)->samples &&
3659             nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
3660             /* VK will always use gl_SampleMask[] values even if sample count is 0,
3661             * so we need to skip this write here to mimic GL's behavior of ignoring it
3662             */
3663             nir_foreach_shader_out_variable(var, nir) {
3664                if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
3665                   var->data.mode = nir_var_shader_temp;
3666             }
3667             nir_fixup_deref_modes(nir);
3668             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3669             need_optimize = true;
3670          }
3671          if (zink_fs_key_base(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
3672             NIR_PASS_V(nir, lower_dual_blend);
3673          }
3674          if (zink_fs_key_base(key)->single_sample) {
3675             NIR_PASS_V(nir, nir_lower_single_sampled);
3676          }
3677          if (zink_fs_key_base(key)->coord_replace_bits)
3678             NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key_base(key)->coord_replace_bits, false, false);
3679          if (zink_fs_key_base(key)->point_coord_yinvert)
3680             NIR_PASS_V(nir, invert_point_coord);
3681          if (zink_fs_key_base(key)->force_persample_interp || zink_fs_key_base(key)->fbfetch_ms) {
3682             nir_foreach_shader_in_variable(var, nir)
3683                var->data.sample = true;
3684             nir->info.fs.uses_sample_qualifier = true;
3685             nir->info.fs.uses_sample_shading = true;
3686          }
3687          if (zs->fs.legacy_shadow_mask && !key->base.needs_zs_shader_swizzle)
3688             NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, zink_fs_key_base(key)->shadow_needs_shader_swizzle ? extra_data : NULL, true);
3689          if (nir->info.fs.uses_fbfetch_output) {
3690             nir_variable *fbfetch = NULL;
3691             NIR_PASS_V(nir, lower_fbfetch, &fbfetch, zink_fs_key_base(key)->fbfetch_ms);
3692             /* old variable must be deleted to avoid spirv errors */
3693             fbfetch->data.mode = nir_var_shader_temp;
3694             nir_fixup_deref_modes(nir);
3695             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3696             need_optimize = true;
3697          }
3698          nir_foreach_shader_in_variable_safe(var, nir) {
3699             if (!is_texcoord(MESA_SHADER_FRAGMENT, var) || var->data.driver_location != -1)
3700                continue;
3701             nir_shader_instructions_pass(nir, rewrite_read_as_0, nir_metadata_dominance, var);
3702             var->data.mode = nir_var_shader_temp;
3703             nir_fixup_deref_modes(nir);
3704             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3705             need_optimize = true;
3706          }
3707          break;
3708       case MESA_SHADER_COMPUTE:
3709          if (zink_cs_key(key)->robust_access)
3710             NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3711          break;
3712       default: break;
3713       }
3714       if (key->base.needs_zs_shader_swizzle) {
3715          assert(extra_data);
3716          NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, extra_data, false);
3717       }
3718       if (key->base.nonseamless_cube_mask) {
3719          NIR_PASS_V(nir, zink_lower_cubemap_to_array, key->base.nonseamless_cube_mask);
3720          need_optimize = true;
3721       }
3722    }
3723    if (screen->driconf.inline_uniforms) {
3724       NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
3725       NIR_PASS_V(nir, rewrite_bo_access, screen);
3726       NIR_PASS_V(nir, remove_bo_access, zs);
3727       need_optimize = true;
3728    }
3729    if (inlined_uniforms) {
3730       optimize_nir(nir, zs);
3731
3732       /* This must be done again. */
3733       NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
3734                                                        nir_var_shader_out);
3735
3736       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
3737       if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT)
3738          zs->can_inline = false;
3739    } else if (need_optimize)
3740       optimize_nir(nir, zs);
3741    
3742    struct zink_shader_object obj = compile_module(screen, zs, nir, false);
3743    ralloc_free(nir);
3744    return obj.mod;
3745 }
3746
3747 struct zink_shader_object
3748 zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
3749 {
3750    nir_shader *nir = zink_shader_deserialize(screen, zs);
3751    int set = nir->info.stage == MESA_SHADER_FRAGMENT;
3752    unsigned offsets[4];
3753    zink_descriptor_shader_get_binding_offsets(zs, offsets);
3754    nir_foreach_variable_with_modes(var, nir, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_uniform | nir_var_image) {
3755       if (var->data.bindless)
3756          continue;
3757       var->data.descriptor_set = set;
3758       switch (var->data.mode) {
3759       case nir_var_mem_ubo:
3760             var->data.binding = !!var->data.driver_location;
3761             break;
3762       case nir_var_uniform:
3763          if (glsl_type_is_sampler(glsl_without_array(var->type)))
3764             var->data.binding += offsets[1];
3765          break;
3766       case nir_var_mem_ssbo:
3767          var->data.binding += offsets[2];
3768          break;
3769       case nir_var_image:
3770          var->data.binding += offsets[3];
3771          break;
3772       default: break;
3773       }
3774    }
3775    if (screen->driconf.inline_uniforms) {
3776       NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
3777       NIR_PASS_V(nir, rewrite_bo_access, screen);
3778       NIR_PASS_V(nir, remove_bo_access, zs);
3779    }
3780    optimize_nir(nir, zs);
3781    zink_descriptor_shader_init(screen, zs);
3782    struct zink_shader_object obj = compile_module(screen, zs, nir, true);
3783    ralloc_free(nir);
3784    return obj;
3785 }
3786
3787 static bool
3788 lower_baseinstance_instr(nir_builder *b, nir_instr *instr, void *data)
3789 {
3790    if (instr->type != nir_instr_type_intrinsic)
3791       return false;
3792    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3793    if (intr->intrinsic != nir_intrinsic_load_instance_id)
3794       return false;
3795    b->cursor = nir_after_instr(instr);
3796    nir_ssa_def *def = nir_isub(b, &intr->dest.ssa, nir_load_base_instance(b));
3797    nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
3798    return true;
3799 }
3800
3801 static bool
3802 lower_baseinstance(nir_shader *shader)
3803 {
3804    if (shader->info.stage != MESA_SHADER_VERTEX)
3805       return false;
3806    return nir_shader_instructions_pass(shader, lower_baseinstance_instr, nir_metadata_dominance, NULL);
3807 }
3808
3809 /* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
3810  * so instead we delete all those broken variables and just make new ones
3811  */
3812 static bool
3813 unbreak_bos(nir_shader *shader, struct zink_shader *zs, bool needs_size)
3814 {
3815    uint64_t max_ssbo_size = 0;
3816    uint64_t max_ubo_size = 0;
3817    uint64_t max_uniform_size = 0;
3818
3819    if (!shader->info.num_ssbos && !shader->info.num_ubos)
3820       return false;
3821
3822    nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
3823       const struct glsl_type *type = glsl_without_array(var->type);
3824       if (type_is_counter(type))
3825          continue;
3826       /* be conservative: use the bigger of the interface and variable types to ensure in-bounds access */
3827       unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
3828       const struct glsl_type *interface_type = var->interface_type ? glsl_without_array(var->interface_type) : NULL;
3829       if (interface_type) {
3830          unsigned block_size = glsl_get_explicit_size(interface_type, true);
3831          if (glsl_get_length(interface_type) == 1) {
3832             /* handle bare unsized ssbo arrays: glsl_get_explicit_size always returns type-aligned sizes */
3833             const struct glsl_type *f = glsl_get_struct_field(interface_type, 0);
3834             if (glsl_type_is_array(f) && !glsl_array_size(f))
3835                block_size = 0;
3836          }
3837          if (block_size) {
3838             block_size = DIV_ROUND_UP(block_size, sizeof(float) * 4);
3839             size = MAX2(size, block_size);
3840          }
3841       }
3842       if (var->data.mode == nir_var_mem_ubo) {
3843          if (var->data.driver_location)
3844             max_ubo_size = MAX2(max_ubo_size, size);
3845          else
3846             max_uniform_size = MAX2(max_uniform_size, size);
3847       } else {
3848          max_ssbo_size = MAX2(max_ssbo_size, size);
3849          if (interface_type) {
3850             if (glsl_type_is_unsized_array(glsl_get_struct_field(interface_type, glsl_get_length(interface_type) - 1)))
3851                needs_size = true;
3852          }
3853       }
3854       var->data.mode = nir_var_shader_temp;
3855    }
3856    nir_fixup_deref_modes(shader);
3857    NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3858    optimize_nir(shader, NULL);
3859
3860    struct glsl_struct_field field = {0};
3861    field.name = ralloc_strdup(shader, "base");
3862    if (shader->info.num_ubos) {
3863       if (shader->num_uniforms && zs->ubos_used & BITFIELD_BIT(0)) {
3864          field.type = glsl_array_type(glsl_uint_type(), max_uniform_size * 4, 4);
3865          nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
3866                                                  glsl_array_type(glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430, false, "struct"), 1, 0),
3867                                                  "uniform_0@32");
3868          var->interface_type = var->type;
3869          var->data.mode = nir_var_mem_ubo;
3870          var->data.driver_location = 0;
3871       }
3872
3873       unsigned num_ubos = shader->info.num_ubos - !!shader->info.first_ubo_is_default_ubo;
3874       uint32_t ubos_used = zs->ubos_used & ~BITFIELD_BIT(0);
3875       if (num_ubos && ubos_used) {
3876          field.type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
3877          /* shrink array as much as possible */
3878          unsigned first_ubo = ffs(ubos_used) - 2;
3879          assert(first_ubo < PIPE_MAX_CONSTANT_BUFFERS);
3880          num_ubos -= first_ubo;
3881          assert(num_ubos);
3882          nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
3883                                    glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ubos, 0),
3884                                    "ubos@32");
3885          var->interface_type = var->type;
3886          var->data.mode = nir_var_mem_ubo;
3887          var->data.driver_location = first_ubo + !!shader->info.first_ubo_is_default_ubo;
3888       }
3889    }
3890    if (shader->info.num_ssbos && zs->ssbos_used) {
3891       /* shrink array as much as possible */
3892       unsigned first_ssbo = ffs(zs->ssbos_used) - 1;
3893       assert(first_ssbo < PIPE_MAX_SHADER_BUFFERS);
3894       unsigned num_ssbos = shader->info.num_ssbos - first_ssbo;
3895       assert(num_ssbos);
3896       const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), needs_size ? 0 : max_ssbo_size * 4, 4);
3897       field.type = ssbo_type;
3898       nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
3899                                               glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ssbos, 0),
3900                                               "ssbos@32");
3901       var->interface_type = var->type;
3902       var->data.mode = nir_var_mem_ssbo;
3903       var->data.driver_location = first_ssbo;
3904    }
3905    return true;
3906 }
3907
3908 static uint32_t
3909 get_src_mask_ssbo(unsigned total, nir_src src)
3910 {
3911    if (nir_src_is_const(src))
3912       return BITFIELD_BIT(nir_src_as_uint(src));
3913    return BITFIELD_MASK(total);
3914 }
3915
3916 static uint32_t
3917 get_src_mask_ubo(unsigned total, nir_src src)
3918 {
3919    if (nir_src_is_const(src))
3920       return BITFIELD_BIT(nir_src_as_uint(src));
3921    return BITFIELD_MASK(total) & ~BITFIELD_BIT(0);
3922 }
3923
3924 static bool
3925 analyze_io(struct zink_shader *zs, nir_shader *shader)
3926 {
3927    bool ret = false;
3928    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
3929    nir_foreach_block(block, impl) {
3930       nir_foreach_instr(instr, block) {
3931          if (shader->info.stage != MESA_SHADER_KERNEL && instr->type == nir_instr_type_tex) {
3932             /* gl_nir_lower_samplers_as_deref is where this would normally be set, but zink doesn't use it */
3933             nir_tex_instr *tex = nir_instr_as_tex(instr);
3934             nir_foreach_variable_with_modes(img, shader, nir_var_uniform) {
3935                if (glsl_type_is_sampler(glsl_without_array(img->type))) {
3936                   unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
3937                   if (tex->texture_index >= img->data.driver_location &&
3938                      tex->texture_index < img->data.driver_location + size) {
3939                      BITSET_SET_RANGE(shader->info.textures_used, img->data.driver_location, img->data.driver_location + (size - 1));
3940                      break;
3941                   }
3942                }
3943             }
3944             continue;
3945          }
3946          if (instr->type != nir_instr_type_intrinsic)
3947             continue;
3948  
3949          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3950          switch (intrin->intrinsic) {
3951          case nir_intrinsic_store_ssbo:
3952             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[1]);
3953             break;
3954  
3955          case nir_intrinsic_get_ssbo_size: {
3956             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
3957             ret = true;
3958             break;
3959          }
3960          case nir_intrinsic_ssbo_atomic_fadd:
3961          case nir_intrinsic_ssbo_atomic_add:
3962          case nir_intrinsic_ssbo_atomic_imin:
3963          case nir_intrinsic_ssbo_atomic_umin:
3964          case nir_intrinsic_ssbo_atomic_imax:
3965          case nir_intrinsic_ssbo_atomic_umax:
3966          case nir_intrinsic_ssbo_atomic_and:
3967          case nir_intrinsic_ssbo_atomic_or:
3968          case nir_intrinsic_ssbo_atomic_xor:
3969          case nir_intrinsic_ssbo_atomic_exchange:
3970          case nir_intrinsic_ssbo_atomic_comp_swap:
3971          case nir_intrinsic_ssbo_atomic_fmin:
3972          case nir_intrinsic_ssbo_atomic_fmax:
3973          case nir_intrinsic_ssbo_atomic_fcomp_swap:
3974          case nir_intrinsic_load_ssbo:
3975             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
3976             break;
3977          case nir_intrinsic_load_ubo:
3978          case nir_intrinsic_load_ubo_vec4:
3979             zs->ubos_used |= get_src_mask_ubo(shader->info.num_ubos, intrin->src[0]);
3980             break;
3981          default:
3982             break;
3983          }
3984       }
3985    }
3986    return ret;
3987 }
3988
3989 struct zink_bindless_info {
3990    nir_variable *bindless[4];
3991    unsigned bindless_set;
3992 };
3993
3994 /* this is a "default" bindless texture used if the shader has no texture variables */
3995 static nir_variable *
3996 create_bindless_texture(nir_shader *nir, nir_tex_instr *tex, unsigned descriptor_set)
3997 {
3998    unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0;
3999    nir_variable *var;
4000
4001    const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT);
4002    var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture");
4003    var->data.descriptor_set = descriptor_set;
4004    var->data.driver_location = var->data.binding = binding;
4005    return var;
4006 }
4007
4008 /* this is a "default" bindless image used if the shader has no image variables */
4009 static nir_variable *
4010 create_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim, unsigned descriptor_set)
4011 {
4012    unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2;
4013    nir_variable *var;
4014
4015    const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
4016    var = nir_variable_create(nir, nir_var_image, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image");
4017    var->data.descriptor_set = descriptor_set;
4018    var->data.driver_location = var->data.binding = binding;
4019    var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
4020    return var;
4021 }
4022
4023 /* rewrite bindless instructions as array deref instructions */
4024 static bool
4025 lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
4026 {
4027    struct zink_bindless_info *bindless = data;
4028
4029    if (in->type == nir_instr_type_tex) {
4030       nir_tex_instr *tex = nir_instr_as_tex(in);
4031       int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4032       if (idx == -1)
4033          return false;
4034
4035       nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[1] : bindless->bindless[0];
4036       if (!var)
4037          var = create_bindless_texture(b->shader, tex, bindless->bindless_set);
4038       b->cursor = nir_before_instr(in);
4039       nir_deref_instr *deref = nir_build_deref_var(b, var);
4040       if (glsl_type_is_array(var->type))
4041          deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32));
4042       nir_instr_rewrite_src_ssa(in, &tex->src[idx].src, &deref->dest.ssa);
4043
4044       /* bindless sampling uses the variable type directly, which means the tex instr has to exactly
4045        * match up with it in contrast to normal sampler ops where things are a bit more flexible;
4046        * this results in cases where a shader is passed with sampler2DArray but the tex instr only has
4047        * 2 components, which explodes spirv compilation even though it doesn't trigger validation errors
4048        *
4049        * to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing
4050        * - Warhammer 40k: Dawn of War III
4051        */
4052       unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type));
4053       unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord);
4054       unsigned coord_components = nir_src_num_components(tex->src[c].src);
4055       if (coord_components < needed_components) {
4056          nir_ssa_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
4057          nir_instr_rewrite_src_ssa(in, &tex->src[c].src, def);
4058          tex->coord_components = needed_components;
4059       }
4060       return true;
4061    }
4062    if (in->type != nir_instr_type_intrinsic)
4063       return false;
4064    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4065
4066    nir_intrinsic_op op;
4067 #define OP_SWAP(OP) \
4068    case nir_intrinsic_bindless_image_##OP: \
4069       op = nir_intrinsic_image_deref_##OP; \
4070       break;
4071
4072
4073    /* convert bindless intrinsics to deref intrinsics */
4074    switch (instr->intrinsic) {
4075    OP_SWAP(atomic_add)
4076    OP_SWAP(atomic_and)
4077    OP_SWAP(atomic_comp_swap)
4078    OP_SWAP(atomic_dec_wrap)
4079    OP_SWAP(atomic_exchange)
4080    OP_SWAP(atomic_fadd)
4081    OP_SWAP(atomic_fmax)
4082    OP_SWAP(atomic_fmin)
4083    OP_SWAP(atomic_imax)
4084    OP_SWAP(atomic_imin)
4085    OP_SWAP(atomic_inc_wrap)
4086    OP_SWAP(atomic_or)
4087    OP_SWAP(atomic_umax)
4088    OP_SWAP(atomic_umin)
4089    OP_SWAP(atomic_xor)
4090    OP_SWAP(format)
4091    OP_SWAP(load)
4092    OP_SWAP(order)
4093    OP_SWAP(samples)
4094    OP_SWAP(size)
4095    OP_SWAP(store)
4096    default:
4097       return false;
4098    }
4099
4100    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
4101    nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[3] : bindless->bindless[2];
4102    if (!var)
4103       var = create_bindless_image(b->shader, dim, bindless->bindless_set);
4104    instr->intrinsic = op;
4105    b->cursor = nir_before_instr(in);
4106    nir_deref_instr *deref = nir_build_deref_var(b, var);
4107    if (glsl_type_is_array(var->type))
4108       deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32));
4109    nir_instr_rewrite_src_ssa(in, &instr->src[0], &deref->dest.ssa);
4110    return true;
4111 }
4112
4113 static bool
4114 lower_bindless(nir_shader *shader, struct zink_bindless_info *bindless)
4115 {
4116    if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless))
4117       return false;
4118    nir_fixup_deref_modes(shader);
4119    NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4120    optimize_nir(shader, NULL);
4121    return true;
4122 }
4123
4124 /* convert shader image/texture io variables to int64 handles for bindless indexing */
4125 static bool
4126 lower_bindless_io_instr(nir_builder *b, nir_instr *in, void *data)
4127 {
4128    if (in->type != nir_instr_type_intrinsic)
4129       return false;
4130    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4131    if (instr->intrinsic != nir_intrinsic_load_deref &&
4132        instr->intrinsic != nir_intrinsic_store_deref)
4133       return false;
4134
4135    nir_deref_instr *src_deref = nir_src_as_deref(instr->src[0]);
4136    nir_variable *var = nir_deref_instr_get_variable(src_deref);
4137    if (var->data.bindless)
4138       return false;
4139    if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out)
4140       return false;
4141    if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type))
4142       return false;
4143
4144    var->type = glsl_int64_t_type();
4145    var->data.bindless = 1;
4146    b->cursor = nir_before_instr(in);
4147    nir_deref_instr *deref = nir_build_deref_var(b, var);
4148    if (instr->intrinsic == nir_intrinsic_load_deref) {
4149        nir_ssa_def *def = nir_load_deref(b, deref);
4150        nir_instr_rewrite_src_ssa(in, &instr->src[0], def);
4151        nir_ssa_def_rewrite_uses(&instr->dest.ssa, def);
4152    } else {
4153       nir_store_deref(b, deref, instr->src[1].ssa, nir_intrinsic_write_mask(instr));
4154    }
4155    nir_instr_remove(in);
4156    nir_instr_remove(&src_deref->instr);
4157    return true;
4158 }
4159
4160 static bool
4161 lower_bindless_io(nir_shader *shader)
4162 {
4163    return nir_shader_instructions_pass(shader, lower_bindless_io_instr, nir_metadata_dominance, NULL);
4164 }
4165
4166 static uint32_t
4167 zink_binding(gl_shader_stage stage, VkDescriptorType type, int index, bool compact_descriptors)
4168 {
4169    if (stage == MESA_SHADER_NONE) {
4170       unreachable("not supported");
4171    } else {
4172       unsigned base = stage;
4173       /* clamp compute bindings for better driver efficiency */
4174       if (gl_shader_stage_is_compute(stage))
4175          base = 0;
4176       switch (type) {
4177       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4178       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
4179          return base * 2 + !!index;
4180
4181       case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
4182          assert(stage == MESA_SHADER_KERNEL);
4183          FALLTHROUGH;
4184       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4185          if (stage == MESA_SHADER_KERNEL) {
4186             assert(index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
4187             return index + PIPE_MAX_SAMPLERS;
4188          }
4189          FALLTHROUGH;
4190       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4191          assert(index < PIPE_MAX_SAMPLERS);
4192          assert(stage != MESA_SHADER_KERNEL);
4193          return (base * PIPE_MAX_SAMPLERS) + index;
4194
4195       case VK_DESCRIPTOR_TYPE_SAMPLER:
4196          assert(index < PIPE_MAX_SAMPLERS);
4197          assert(stage == MESA_SHADER_KERNEL);
4198          return index;
4199
4200       case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4201          return base + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * 2));
4202
4203       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4204       case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4205          assert(index < ZINK_MAX_SHADER_IMAGES);
4206          if (stage == MESA_SHADER_KERNEL)
4207             return index + (compact_descriptors ? (PIPE_MAX_SAMPLERS + PIPE_MAX_SHADER_SAMPLER_VIEWS) : 0);
4208          return (base * ZINK_MAX_SHADER_IMAGES) + index + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * PIPE_MAX_SAMPLERS));
4209
4210       default:
4211          unreachable("unexpected type");
4212       }
4213    }
4214 }
4215
4216 static void
4217 handle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, struct zink_bindless_info *bindless)
4218 {
4219    if (glsl_type_is_struct(type)) {
4220       for (unsigned i = 0; i < glsl_get_length(type); i++)
4221          handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless);
4222       return;
4223    }
4224
4225    /* just a random scalar in a struct */
4226    if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type))
4227       return;
4228
4229    VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
4230    unsigned binding;
4231    switch (vktype) {
4232       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4233          binding = 0;
4234          break;
4235       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4236          binding = 1;
4237          break;
4238       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4239          binding = 2;
4240          break;
4241       case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4242          binding = 3;
4243          break;
4244       default:
4245          unreachable("unknown");
4246    }
4247    if (!bindless->bindless[binding]) {
4248       bindless->bindless[binding] = nir_variable_clone(var, nir);
4249       bindless->bindless[binding]->data.bindless = 0;
4250       bindless->bindless[binding]->data.descriptor_set = bindless->bindless_set;
4251       bindless->bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0);
4252       bindless->bindless[binding]->data.driver_location = bindless->bindless[binding]->data.binding = binding;
4253       if (!bindless->bindless[binding]->data.image.format)
4254          bindless->bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
4255       nir_shader_add_variable(nir, bindless->bindless[binding]);
4256    } else {
4257       assert(glsl_get_sampler_dim(glsl_without_array(bindless->bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type)));
4258    }
4259    var->data.mode = nir_var_shader_temp;
4260 }
4261
4262 static bool
4263 convert_1d_shadow_tex(nir_builder *b, nir_instr *instr, void *data)
4264 {
4265    struct zink_screen *screen = data;
4266    if (instr->type != nir_instr_type_tex)
4267       return false;
4268    nir_tex_instr *tex = nir_instr_as_tex(instr);
4269    if (tex->sampler_dim != GLSL_SAMPLER_DIM_1D || !tex->is_shadow)
4270       return false;
4271    if (tex->is_sparse && screen->need_2D_sparse) {
4272       /* no known case of this exists: only nvidia can hit it, and nothing uses it */
4273       mesa_loge("unhandled/unsupported 1D sparse texture!");
4274       abort();
4275    }
4276    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
4277    b->cursor = nir_before_instr(instr);
4278    tex->coord_components++;
4279    unsigned srcs[] = {
4280       nir_tex_src_coord,
4281       nir_tex_src_offset,
4282       nir_tex_src_ddx,
4283       nir_tex_src_ddy,
4284    };
4285    for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++) {
4286       unsigned c = nir_tex_instr_src_index(tex, srcs[i]);
4287       if (c == -1)
4288          continue;
4289       if (tex->src[c].src.ssa->num_components == tex->coord_components)
4290          continue;
4291       nir_ssa_def *def;
4292       nir_ssa_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size);
4293       if (tex->src[c].src.ssa->num_components == 1)
4294          def = nir_vec2(b, tex->src[c].src.ssa, zero);
4295       else
4296          def = nir_vec3(b, nir_channel(b, tex->src[c].src.ssa, 0), zero, nir_channel(b, tex->src[c].src.ssa, 1));
4297       nir_instr_rewrite_src_ssa(instr, &tex->src[c].src, def);
4298    }
4299    b->cursor = nir_after_instr(instr);
4300    unsigned needed_components = nir_tex_instr_dest_size(tex);
4301    unsigned num_components = tex->dest.ssa.num_components;
4302    if (needed_components > num_components) {
4303       tex->dest.ssa.num_components = needed_components;
4304       assert(num_components < 3);
4305       /* take either xz or just x since this is promoted to 2D from 1D */
4306       uint32_t mask = num_components == 2 ? (1|4) : 1;
4307       nir_ssa_def *dst = nir_channels(b, &tex->dest.ssa, mask);
4308       nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dst, dst->parent_instr);
4309    }
4310    return true;
4311 }
4312
4313 static bool
4314 lower_1d_shadow(nir_shader *shader, struct zink_screen *screen)
4315 {
4316    bool found = false;
4317    nir_foreach_variable_with_modes(var, shader, nir_var_uniform | nir_var_image) {
4318       const struct glsl_type *type = glsl_without_array(var->type);
4319       unsigned length = glsl_get_length(var->type);
4320       if (!glsl_type_is_sampler(type) || !glsl_sampler_type_is_shadow(type) || glsl_get_sampler_dim(type) != GLSL_SAMPLER_DIM_1D)
4321          continue;
4322       const struct glsl_type *sampler = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, true, glsl_sampler_type_is_array(type), glsl_get_sampler_result_type(type));
4323       var->type = type != var->type ? glsl_array_type(sampler, length, glsl_get_explicit_stride(var->type)) : sampler;
4324
4325       found = true;
4326    }
4327    if (found)
4328       nir_shader_instructions_pass(shader, convert_1d_shadow_tex, nir_metadata_dominance, screen);
4329    return found;
4330 }
4331
4332 static void
4333 scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
4334 {
4335    nir_foreach_function(function, shader) {
4336       if (!function->impl)
4337          continue;
4338       nir_foreach_block_safe(block, function->impl) {
4339          nir_foreach_instr_safe(instr, block) {
4340             if (instr->type == nir_instr_type_tex) {
4341                nir_tex_instr *tex = nir_instr_as_tex(instr);
4342                zs->sinfo.have_sparse |= tex->is_sparse;
4343             }
4344             if (instr->type != nir_instr_type_intrinsic)
4345                continue;
4346             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4347             if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4348                 intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4349                 intr->intrinsic == nir_intrinsic_image_deref_store ||
4350                 intr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
4351                 intr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
4352                 intr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
4353                 intr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
4354                 intr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
4355                 intr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
4356                 intr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
4357                 intr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
4358                 intr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
4359                 intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
4360                 intr->intrinsic == nir_intrinsic_image_deref_atomic_fadd ||
4361                 intr->intrinsic == nir_intrinsic_image_deref_size ||
4362                 intr->intrinsic == nir_intrinsic_image_deref_samples ||
4363                 intr->intrinsic == nir_intrinsic_image_deref_format ||
4364                 intr->intrinsic == nir_intrinsic_image_deref_order) {
4365
4366                 nir_variable *var =
4367                    nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
4368
4369                 /* Structs have been lowered already, so get_aoa_size is sufficient. */
4370                 const unsigned size =
4371                    glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
4372                 BITSET_SET_RANGE(shader->info.images_used, var->data.binding,
4373                                  var->data.binding + (MAX2(size, 1) - 1));
4374             }
4375             if (intr->intrinsic == nir_intrinsic_is_sparse_texels_resident ||
4376                 intr->intrinsic == nir_intrinsic_image_deref_sparse_load)
4377                zs->sinfo.have_sparse = true;
4378
4379             static bool warned = false;
4380             if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) {
4381                switch (intr->intrinsic) {
4382                case nir_intrinsic_image_deref_atomic_add: {
4383                   nir_variable *var = nir_intrinsic_get_var(intr, 0);
4384                   if (util_format_is_float(var->data.image.format))
4385                      fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n");
4386                   break;
4387                }
4388                default:
4389                   break;
4390                }
4391             }
4392          }
4393       }
4394    }
4395 }
4396
4397 static bool
4398 is_residency_code(nir_ssa_def *src)
4399 {
4400    nir_instr *parent = src->parent_instr;
4401    while (1) {
4402       if (parent->type == nir_instr_type_intrinsic) {
4403          ASSERTED nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
4404          assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
4405          return false;
4406       }
4407       if (parent->type == nir_instr_type_tex)
4408          return true;
4409       assert(parent->type == nir_instr_type_alu);
4410       nir_alu_instr *alu = nir_instr_as_alu(parent);
4411       parent = alu->src[0].src.ssa->parent_instr;
4412    }
4413 }
4414
4415 static bool
4416 lower_sparse_instr(nir_builder *b, nir_instr *in, void *data)
4417 {
4418    if (in->type != nir_instr_type_intrinsic)
4419       return false;
4420    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4421    if (instr->intrinsic == nir_intrinsic_sparse_residency_code_and) {
4422       b->cursor = nir_before_instr(&instr->instr);
4423       nir_ssa_def *src0;
4424       if (is_residency_code(instr->src[0].ssa))
4425          src0 = nir_is_sparse_texels_resident(b, 1, instr->src[0].ssa);
4426       else
4427          src0 = instr->src[0].ssa;
4428       nir_ssa_def *src1;
4429       if (is_residency_code(instr->src[1].ssa))
4430          src1 = nir_is_sparse_texels_resident(b, 1, instr->src[1].ssa);
4431       else
4432          src1 = instr->src[1].ssa;
4433       nir_ssa_def *def = nir_iand(b, src0, src1);
4434       nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, def, in);
4435       nir_instr_remove(in);
4436       return true;
4437    }
4438    if (instr->intrinsic != nir_intrinsic_is_sparse_texels_resident)
4439       return false;
4440
4441    /* vulkan vec can only be a vec4, but this is (maybe) vec5,
4442     * so just rewrite as the first component since ntv is going to use a different
4443     * method for storing the residency value anyway
4444     */
4445    b->cursor = nir_before_instr(&instr->instr);
4446    nir_instr *parent = instr->src[0].ssa->parent_instr;
4447    if (is_residency_code(instr->src[0].ssa)) {
4448       assert(parent->type == nir_instr_type_alu);
4449       nir_alu_instr *alu = nir_instr_as_alu(parent);
4450       nir_ssa_def_rewrite_uses_after(instr->src[0].ssa, nir_channel(b, alu->src[0].src.ssa, 0), parent);
4451       nir_instr_remove(parent);
4452    } else {
4453       nir_ssa_def *src;
4454       if (parent->type == nir_instr_type_intrinsic) {
4455          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
4456          assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
4457          src = intr->src[0].ssa;
4458       } else {
4459          assert(parent->type == nir_instr_type_alu);
4460          nir_alu_instr *alu = nir_instr_as_alu(parent);
4461          src = alu->src[0].src.ssa;
4462       }
4463       if (instr->dest.ssa.bit_size != 32) {
4464          if (instr->dest.ssa.bit_size == 1)
4465             src = nir_ieq_imm(b, src, 1);
4466          else
4467             src = nir_u2uN(b, src, instr->dest.ssa.bit_size);
4468       }
4469       nir_ssa_def_rewrite_uses(&instr->dest.ssa, src);
4470       nir_instr_remove(in);
4471    }
4472    return true;
4473 }
4474
4475 static bool
4476 lower_sparse(nir_shader *shader)
4477 {
4478    return nir_shader_instructions_pass(shader, lower_sparse_instr, nir_metadata_dominance, NULL);
4479 }
4480
4481 static bool
4482 match_tex_dests_instr(nir_builder *b, nir_instr *in, void *data)
4483 {
4484    if (in->type != nir_instr_type_tex)
4485       return false;
4486    nir_tex_instr *tex = nir_instr_as_tex(in);
4487    if (tex->op == nir_texop_txs || tex->op == nir_texop_lod)
4488       return false;
4489    int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4490    nir_variable *var = NULL;
4491    if (handle != -1) {
4492       var = nir_deref_instr_get_variable(nir_src_as_deref(tex->src[handle].src));
4493    } else {
4494       nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
4495          if (glsl_type_is_sampler(glsl_without_array(img->type))) {
4496             unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
4497             if (tex->texture_index >= img->data.driver_location &&
4498                 tex->texture_index < img->data.driver_location + size) {
4499                var = img;
4500                break;
4501             }
4502          }
4503       }
4504    }
4505    return !!rewrite_tex_dest(b, tex, var, data);
4506 }
4507
4508 static bool
4509 match_tex_dests(nir_shader *shader, struct zink_shader *zs)
4510 {
4511    return nir_shader_instructions_pass(shader, match_tex_dests_instr, nir_metadata_dominance, zs);
4512 }
4513
4514 static bool
4515 split_bitfields_instr(nir_builder *b, nir_instr *in, void *data)
4516 {
4517    if (in->type != nir_instr_type_alu)
4518       return false;
4519    nir_alu_instr *alu = nir_instr_as_alu(in);
4520    switch (alu->op) {
4521    case nir_op_ubitfield_extract:
4522    case nir_op_ibitfield_extract:
4523    case nir_op_bitfield_insert:
4524       break;
4525    default:
4526       return false;
4527    }
4528    unsigned num_components = nir_dest_num_components(alu->dest.dest);
4529    if (num_components == 1)
4530       return false;
4531    b->cursor = nir_before_instr(in);
4532    nir_ssa_def *dests[NIR_MAX_VEC_COMPONENTS];
4533    for (unsigned i = 0; i < num_components; i++) {
4534       if (alu->op == nir_op_bitfield_insert)
4535          dests[i] = nir_bitfield_insert(b,
4536                                         nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4537                                         nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4538                                         nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]),
4539                                         nir_channel(b, alu->src[3].src.ssa, alu->src[3].swizzle[i]));
4540       else if (alu->op == nir_op_ubitfield_extract)
4541          dests[i] = nir_ubitfield_extract(b,
4542                                           nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4543                                           nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4544                                           nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4545       else
4546          dests[i] = nir_ibitfield_extract(b,
4547                                           nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4548                                           nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4549                                           nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4550    }
4551    nir_ssa_def *dest = nir_vec(b, dests, num_components);
4552    nir_ssa_def_rewrite_uses_after(&alu->dest.dest.ssa, dest, in);
4553    nir_instr_remove(in);
4554    return true;
4555 }
4556
4557
4558 static bool
4559 split_bitfields(nir_shader *shader)
4560 {
4561    return nir_shader_instructions_pass(shader, split_bitfields_instr, nir_metadata_dominance, NULL);
4562 }
4563
4564 static void
4565 rewrite_cl_derefs(nir_shader *nir, nir_variable *var)
4566 {
4567    nir_foreach_function(function, nir) {
4568       nir_foreach_block(block, function->impl) {
4569          nir_foreach_instr_safe(instr, block) {
4570             if (instr->type != nir_instr_type_deref)
4571                continue;
4572             nir_deref_instr *deref = nir_instr_as_deref(instr);
4573             nir_variable *img = nir_deref_instr_get_variable(deref);
4574             if (img != var)
4575                continue;
4576             if (glsl_type_is_array(var->type)) {
4577                if (deref->deref_type == nir_deref_type_array)
4578                   deref->type = glsl_without_array(var->type);
4579                else
4580                   deref->type = var->type;
4581             } else {
4582                deref->type = var->type;
4583             }
4584          }
4585       }
4586    }
4587 }
4588
4589 static void
4590 type_image(nir_shader *nir, nir_variable *var)
4591 {
4592    nir_foreach_function(function, nir) {
4593       nir_foreach_block(block, function->impl) {
4594          nir_foreach_instr_safe(instr, block) {
4595             if (instr->type != nir_instr_type_intrinsic)
4596                continue;
4597             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4598             if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4599                intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4600                intr->intrinsic == nir_intrinsic_image_deref_store ||
4601                intr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
4602                intr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
4603                intr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
4604                intr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
4605                intr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
4606                intr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
4607                intr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
4608                intr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
4609                intr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
4610                intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
4611                intr->intrinsic == nir_intrinsic_image_deref_atomic_fadd ||
4612                intr->intrinsic == nir_intrinsic_image_deref_samples ||
4613                intr->intrinsic == nir_intrinsic_image_deref_format ||
4614                intr->intrinsic == nir_intrinsic_image_deref_order) {
4615                nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4616                nir_variable *img = nir_deref_instr_get_variable(deref);
4617                if (img != var)
4618                   continue;
4619                nir_alu_type alu_type = nir_intrinsic_src_type(intr);
4620                const struct glsl_type *type = glsl_without_array(var->type);
4621                if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
4622                   assert(glsl_get_sampler_result_type(type) == nir_get_glsl_base_type_for_nir_type(alu_type));
4623                   continue;
4624                }
4625                const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
4626                if (glsl_type_is_array(var->type))
4627                   img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
4628                var->type = img_type;
4629                rewrite_cl_derefs(nir, var);
4630                return;
4631             }
4632          }
4633       }
4634    }
4635    nir_foreach_function(function, nir) {
4636       nir_foreach_block(block, function->impl) {
4637          nir_foreach_instr_safe(instr, block) {
4638             if (instr->type != nir_instr_type_intrinsic)
4639                continue;
4640             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4641             if (intr->intrinsic != nir_intrinsic_image_deref_size)
4642                continue;
4643             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4644             nir_variable *img = nir_deref_instr_get_variable(deref);
4645             if (img != var)
4646                continue;
4647             nir_alu_type alu_type = nir_type_uint32;
4648             const struct glsl_type *type = glsl_without_array(var->type);
4649             if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
4650                continue;
4651             }
4652             const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
4653             if (glsl_type_is_array(var->type))
4654                img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
4655             var->type = img_type;
4656             rewrite_cl_derefs(nir, var);
4657             return;
4658          }
4659       }
4660    }
4661    var->data.mode = nir_var_shader_temp;
4662 }
4663
4664 static nir_variable *
4665 find_sampler_var(nir_shader *nir, unsigned texture_index)
4666 {
4667    nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
4668       unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4669       if ((glsl_type_is_texture(glsl_without_array(var->type)) || glsl_type_is_sampler(glsl_without_array(var->type))) &&
4670           (var->data.binding == texture_index || (var->data.binding < texture_index && var->data.binding + size > texture_index)))
4671          return var;
4672    }
4673    return NULL;
4674 }
4675
4676 static bool
4677 type_sampler_vars(nir_shader *nir, unsigned *sampler_mask)
4678 {
4679    bool progress = false;
4680    nir_foreach_function(function, nir) {
4681       nir_foreach_block(block, function->impl) {
4682          nir_foreach_instr(instr, block) {
4683             if (instr->type != nir_instr_type_tex)
4684                continue;
4685             nir_tex_instr *tex = nir_instr_as_tex(instr);
4686             switch (tex->op) {
4687             case nir_texop_lod:
4688             case nir_texop_txs:
4689             case nir_texop_query_levels:
4690             case nir_texop_texture_samples:
4691             case nir_texop_samples_identical:
4692                continue;
4693             default:
4694                break;
4695             }
4696             *sampler_mask |= BITFIELD_BIT(tex->sampler_index);
4697             nir_variable *var = find_sampler_var(nir, tex->texture_index);
4698             assert(var);
4699             if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID)
4700                continue;
4701             const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
4702             unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4703             if (size > 1)
4704                img_type = glsl_array_type(img_type, size, 0);
4705             var->type = img_type;
4706             progress = true;
4707          }
4708       }
4709    }
4710    nir_foreach_function(function, nir) {
4711       nir_foreach_block(block, function->impl) {
4712          nir_foreach_instr(instr, block) {
4713             if (instr->type != nir_instr_type_tex)
4714                continue;
4715             nir_tex_instr *tex = nir_instr_as_tex(instr);
4716             switch (tex->op) {
4717             case nir_texop_lod:
4718             case nir_texop_txs:
4719             case nir_texop_query_levels:
4720             case nir_texop_texture_samples:
4721             case nir_texop_samples_identical:
4722                break;
4723             default:
4724                continue;
4725             }
4726             *sampler_mask |= BITFIELD_BIT(tex->sampler_index);
4727             nir_variable *var = find_sampler_var(nir, tex->texture_index);
4728             assert(var);
4729             if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID)
4730                continue;
4731             const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
4732             unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4733             if (size > 1)
4734                img_type = glsl_array_type(img_type, size, 0);
4735             var->type = img_type;
4736             progress = true;
4737          }
4738       }
4739    }
4740    return progress;
4741 }
4742
4743 static bool
4744 delete_samplers(nir_shader *nir)
4745 {
4746    bool progress = false;
4747    nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
4748       if (glsl_type_is_sampler(glsl_without_array(var->type))) {
4749          var->data.mode = nir_var_shader_temp;
4750          progress = true;
4751       }
4752    }
4753    return progress;
4754 }
4755
4756 static bool
4757 type_images(nir_shader *nir, unsigned *sampler_mask)
4758 {
4759    bool progress = false;
4760    progress |= delete_samplers(nir);
4761    progress |= type_sampler_vars(nir, sampler_mask);
4762    nir_foreach_variable_with_modes(var, nir, nir_var_image) {
4763       type_image(nir, var);
4764       progress = true;
4765    }
4766    return progress;
4767 }
4768
4769 /* attempt to assign io for separate shaders */
4770 static bool
4771 fixup_io_locations(nir_shader *nir)
4772 {
4773    nir_variable_mode mode = nir->info.stage == MESA_SHADER_FRAGMENT ? nir_var_shader_in : nir_var_shader_out;
4774    /* i/o interface blocks are required to be EXACT matches between stages:
4775     * iterate over all locations and set locations incrementally
4776     */
4777    unsigned slot = 0;
4778    for (unsigned i = 0; i < VARYING_SLOT_MAX; i++) {
4779       if (nir_slot_is_sysval_output(i, MESA_SHADER_NONE))
4780          continue;
4781       nir_variable *var = nir_find_variable_with_location(nir, mode, i);
4782       if (!var) {
4783          /* locations used between stages are not required to be contiguous */
4784          if (i >= VARYING_SLOT_VAR0)
4785             slot++;
4786          continue;
4787       }
4788       unsigned size;
4789       /* ensure variable is given enough slots */
4790       if (nir_is_arrayed_io(var, nir->info.stage))
4791          size = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
4792       else
4793          size = glsl_count_vec4_slots(var->type, false, false);
4794       var->data.driver_location = slot;
4795       slot += size;
4796       /* ensure the consumed slots aren't double iterated */
4797       i += size - 1;
4798    }
4799    return true;
4800 }
4801
4802 static uint32_t
4803 zink_flat_flags(struct nir_shader *shader)
4804 {
4805    uint32_t flat_flags = 0, c = 0;
4806    nir_foreach_shader_in_variable(var, shader) {
4807       if (var->data.interpolation == INTERP_MODE_FLAT)
4808          flat_flags |= 1u << (c++);
4809    }
4810
4811    return flat_flags;
4812 }
4813
4814 struct zink_shader *
4815 zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
4816                    const struct pipe_stream_output_info *so_info)
4817 {
4818    struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
4819    bool have_psiz = false;
4820
4821    ret->has_edgeflags = nir->info.stage == MESA_SHADER_VERTEX &&
4822                         nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_EDGE);
4823
4824    ret->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model;
4825    ret->sinfo.bindless_set_idx = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
4826
4827    util_queue_fence_init(&ret->precompile.fence);
4828    util_dynarray_init(&ret->pipeline_libs, ret);
4829    ret->hash = _mesa_hash_pointer(ret);
4830
4831    ret->programs = _mesa_pointer_set_create(NULL);
4832    simple_mtx_init(&ret->lock, mtx_plain);
4833
4834    nir_variable_mode indirect_derefs_modes = 0;
4835    if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
4836        nir->info.stage == MESA_SHADER_TESS_EVAL)
4837       indirect_derefs_modes |= nir_var_shader_in | nir_var_shader_out;
4838
4839    NIR_PASS_V(nir, nir_lower_indirect_derefs, indirect_derefs_modes,
4840               UINT32_MAX);
4841
4842    if (nir->info.stage < MESA_SHADER_COMPUTE)
4843       create_gfx_pushconst(nir);
4844
4845    if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
4846             nir->info.stage == MESA_SHADER_TESS_EVAL)
4847       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
4848
4849    if (nir->info.stage < MESA_SHADER_FRAGMENT)
4850       have_psiz = check_psiz(nir);
4851    if (nir->info.stage == MESA_SHADER_FRAGMENT)
4852       ret->flat_flags = zink_flat_flags(nir);
4853
4854    if (!gl_shader_stage_is_compute(nir->info.stage) && nir->info.separate_shader)
4855       NIR_PASS_V(nir, fixup_io_locations);
4856
4857    NIR_PASS_V(nir, lower_basevertex);
4858    NIR_PASS_V(nir, nir_lower_regs_to_ssa);
4859    NIR_PASS_V(nir, lower_baseinstance);
4860    NIR_PASS_V(nir, lower_sparse);
4861    NIR_PASS_V(nir, split_bitfields);
4862    NIR_PASS_V(nir, nir_lower_frexp); /* TODO: Use the spirv instructions for this. */
4863
4864    if (screen->info.have_EXT_shader_demote_to_helper_invocation) {
4865       NIR_PASS_V(nir, nir_lower_discard_or_demote,
4866                  screen->driconf.glsl_correct_derivatives_after_discard ||
4867                  nir->info.use_legacy_math_rules);
4868    }
4869
4870    if (screen->need_2D_zs)
4871       NIR_PASS_V(nir, lower_1d_shadow, screen);
4872
4873    {
4874       nir_lower_subgroups_options subgroup_options = {0};
4875       subgroup_options.lower_to_scalar = true;
4876       subgroup_options.subgroup_size = screen->info.props11.subgroupSize;
4877       subgroup_options.ballot_bit_size = 32;
4878       subgroup_options.ballot_components = 4;
4879       subgroup_options.lower_subgroup_masks = true;
4880       if (!(screen->info.subgroup.supportedStages & mesa_to_vk_shader_stage(clamp_stage(&nir->info)))) {
4881          subgroup_options.subgroup_size = 1;
4882          subgroup_options.lower_vote_trivial = true;
4883       }
4884       NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
4885    }
4886
4887    if (so_info && so_info->num_outputs)
4888       NIR_PASS_V(nir, split_blocks);
4889
4890    optimize_nir(nir, NULL);
4891    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
4892    NIR_PASS_V(nir, nir_lower_discard_if, (nir_lower_discard_if_to_cf |
4893                                           nir_lower_demote_if_to_cf |
4894                                           nir_lower_terminate_if_to_cf));
4895    NIR_PASS_V(nir, nir_lower_fragcolor,
4896          nir->info.fs.color_is_dual_source ? 1 : 8);
4897    NIR_PASS_V(nir, lower_64bit_vertex_attribs);
4898    bool needs_size = analyze_io(ret, nir);
4899    NIR_PASS_V(nir, unbreak_bos, ret, needs_size);
4900    /* run in compile if there could be inlined uniforms */
4901    if (!screen->driconf.inline_uniforms && !nir->info.num_inlinable_uniforms) {
4902       NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
4903       NIR_PASS_V(nir, rewrite_bo_access, screen);
4904       NIR_PASS_V(nir, remove_bo_access, ret);
4905    }
4906
4907    if (zink_debug & ZINK_DEBUG_NIR) {
4908       fprintf(stderr, "NIR shader:\n---8<---\n");
4909       nir_print_shader(nir, stderr);
4910       fprintf(stderr, "---8<---\n");
4911    }
4912
4913    struct zink_bindless_info bindless = {0};
4914    bindless.bindless_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
4915    bool has_bindless_io = false;
4916    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
4917       var->data.is_xfb = false;
4918       if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
4919          has_bindless_io = true;
4920          break;
4921       }
4922    }
4923    if (has_bindless_io)
4924       NIR_PASS_V(nir, lower_bindless_io);
4925
4926    optimize_nir(nir, NULL);
4927    prune_io(nir);
4928
4929    scan_nir(screen, nir, ret);
4930    unsigned sampler_mask = 0;
4931    if (nir->info.stage == MESA_SHADER_KERNEL) {
4932       NIR_PASS_V(nir, type_images, &sampler_mask);
4933       enum zink_descriptor_type ztype = ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW;
4934       VkDescriptorType vktype = VK_DESCRIPTOR_TYPE_SAMPLER;
4935       u_foreach_bit(s, sampler_mask) {
4936          ret->bindings[ztype][ret->num_bindings[ztype]].index = s;
4937          ret->bindings[ztype][ret->num_bindings[ztype]].binding = zink_binding(MESA_SHADER_KERNEL, vktype, s, screen->compact_descriptors);
4938          ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
4939          ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
4940          ret->num_bindings[ztype]++;
4941       }
4942       ret->sinfo.sampler_mask = sampler_mask;
4943    }
4944
4945    unsigned ubo_binding_mask = 0;
4946    unsigned ssbo_binding_mask = 0;
4947    foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) {
4948       if (_nir_shader_variable_has_mode(var, nir_var_uniform |
4949                                         nir_var_image |
4950                                         nir_var_mem_ubo |
4951                                         nir_var_mem_ssbo)) {
4952          enum zink_descriptor_type ztype;
4953          const struct glsl_type *type = glsl_without_array(var->type);
4954          if (var->data.mode == nir_var_mem_ubo) {
4955             ztype = ZINK_DESCRIPTOR_TYPE_UBO;
4956             /* buffer 0 is a push descriptor */
4957             var->data.descriptor_set = !!var->data.driver_location;
4958             var->data.binding = !var->data.driver_location ? clamp_stage(&nir->info) :
4959                                 zink_binding(nir->info.stage,
4960                                              VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
4961                                              var->data.driver_location,
4962                                              screen->compact_descriptors);
4963             assert(var->data.driver_location || var->data.binding < 10);
4964             VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
4965             int binding = var->data.binding;
4966
4967             if (!var->data.driver_location) {
4968                ret->has_uniforms = true;
4969             } else if (!(ubo_binding_mask & BITFIELD_BIT(binding))) {
4970                ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
4971                ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding;
4972                ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
4973                ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
4974                assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
4975                ret->num_bindings[ztype]++;
4976                ubo_binding_mask |= BITFIELD_BIT(binding);
4977             }
4978          } else if (var->data.mode == nir_var_mem_ssbo) {
4979             ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
4980             var->data.descriptor_set = screen->desc_set_id[ztype];
4981             var->data.binding = zink_binding(nir->info.stage,
4982                                              VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
4983                                              var->data.driver_location,
4984                                              screen->compact_descriptors);
4985             if (!(ssbo_binding_mask & BITFIELD_BIT(var->data.binding))) {
4986                ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
4987                ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
4988                ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
4989                ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
4990                assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
4991                ret->num_bindings[ztype]++;
4992                ssbo_binding_mask |= BITFIELD_BIT(var->data.binding);
4993             }
4994          } else {
4995             assert(var->data.mode == nir_var_uniform ||
4996                    var->data.mode == nir_var_image);
4997             if (var->data.bindless) {
4998                ret->bindless = true;
4999                handle_bindless_var(nir, var, type, &bindless);
5000             } else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
5001                VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
5002                if (nir->info.stage == MESA_SHADER_KERNEL && vktype == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
5003                   vktype = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
5004                ztype = zink_desc_type_from_vktype(vktype);
5005                if (vktype == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)
5006                   ret->num_texel_buffers++;
5007                var->data.driver_location = var->data.binding;
5008                var->data.descriptor_set = screen->desc_set_id[ztype];
5009                var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location, screen->compact_descriptors);
5010                ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
5011                ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
5012                ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
5013                if (glsl_type_is_array(var->type))
5014                   ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_aoa_size(var->type);
5015                else
5016                   ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
5017                ret->num_bindings[ztype]++;
5018             } else if (var->data.mode == nir_var_uniform) {
5019                /* this is a dead uniform */
5020                var->data.mode = 0;
5021                exec_node_remove(&var->node);
5022             }
5023          }
5024       }
5025    }
5026    bool bindless_lowered = false;
5027    NIR_PASS(bindless_lowered, nir, lower_bindless, &bindless);
5028    ret->bindless |= bindless_lowered;
5029
5030    if (!screen->info.feats.features.shaderInt64 || !screen->info.feats.features.shaderFloat64)
5031       NIR_PASS_V(nir, lower_64bit_vars, screen->info.feats.features.shaderInt64);
5032    if (nir->info.stage != MESA_SHADER_KERNEL)
5033       NIR_PASS_V(nir, match_tex_dests, ret);
5034
5035    if (!nir->info.internal)
5036       nir_foreach_shader_out_variable(var, nir)
5037          var->data.explicit_xfb_buffer = 0;
5038    if (so_info && so_info->num_outputs)
5039       update_so_info(ret, nir, so_info, nir->info.outputs_written, have_psiz);
5040    else if (have_psiz) {
5041       bool have_fake_psiz = false;
5042       nir_variable *psiz = NULL;
5043       nir_foreach_shader_out_variable(var, nir) {
5044          if (var->data.location == VARYING_SLOT_PSIZ) {
5045             if (!var->data.explicit_location)
5046                have_fake_psiz = true;
5047             else
5048                psiz = var;
5049          }
5050       }
5051       if (have_fake_psiz && psiz) {
5052          psiz->data.mode = nir_var_shader_temp;
5053          nir_fixup_deref_modes(nir);
5054          NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
5055       }
5056    }
5057    zink_shader_serialize_blob(nir, &ret->blob);
5058    memcpy(&ret->info, &nir->info, sizeof(nir->info));
5059
5060    ret->can_inline = true;
5061
5062    return ret;
5063 }
5064
5065 char *
5066 zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr)
5067 {
5068    struct zink_screen *screen = zink_screen(pscreen);
5069    nir_shader *nir = nirptr;
5070
5071    nir_lower_tex_options tex_opts = {
5072       .lower_invalid_implicit_lod = true,
5073    };
5074    /*
5075       Sampled Image must be an object whose type is OpTypeSampledImage.
5076       The Dim operand of the underlying OpTypeImage must be 1D, 2D, 3D,
5077       or Rect, and the Arrayed and MS operands must be 0.
5078       - SPIRV, OpImageSampleProj* opcodes
5079     */
5080    tex_opts.lower_txp = BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) |
5081                         BITFIELD_BIT(GLSL_SAMPLER_DIM_MS);
5082    tex_opts.lower_txp_array = true;
5083    if (!screen->info.feats.features.shaderImageGatherExtended)
5084       tex_opts.lower_tg4_offsets = true;
5085    NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
5086    optimize_nir(nir, NULL);
5087    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
5088    if (screen->driconf.inline_uniforms)
5089       nir_find_inlinable_uniforms(nir);
5090
5091    return NULL;
5092 }
5093
5094 void
5095 zink_shader_free(struct zink_screen *screen, struct zink_shader *shader)
5096 {
5097    _mesa_set_destroy(shader->programs, NULL);
5098    util_queue_fence_wait(&shader->precompile.fence);
5099    util_queue_fence_destroy(&shader->precompile.fence);
5100    zink_descriptor_shader_deinit(screen, shader);
5101    if (screen->info.have_EXT_shader_object) {
5102       VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.obj.obj, NULL);
5103    } else {
5104       if (shader->precompile.obj.mod)
5105          VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.obj.mod, NULL);
5106       if (shader->precompile.gpl)
5107          VKSCR(DestroyPipeline)(screen->dev, shader->precompile.gpl, NULL);
5108    }
5109    blob_finish(&shader->blob);
5110    ralloc_free(shader->spirv);
5111    free(shader->precompile.bindings);
5112    ralloc_free(shader);
5113 }
5114
5115 void
5116 zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader)
5117 {
5118    assert(shader->info.stage != MESA_SHADER_COMPUTE);
5119    util_queue_fence_wait(&shader->precompile.fence);
5120    set_foreach(shader->programs, entry) {
5121       struct zink_gfx_program *prog = (void*)entry->key;
5122       gl_shader_stage stage = shader->info.stage;
5123       assert(stage < ZINK_GFX_SHADER_COUNT);
5124       unsigned stages_present = prog->stages_present;
5125       if (prog->shaders[MESA_SHADER_TESS_CTRL] &&
5126             prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated)
5127          stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
5128       unsigned idx = zink_program_cache_stages(stages_present);
5129       if (!prog->base.removed && prog->stages_present == prog->stages_remaining &&
5130           (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated)) {
5131          struct hash_table *ht = &prog->ctx->program_cache[idx];
5132          simple_mtx_lock(&prog->ctx->program_lock[idx]);
5133          struct hash_entry *he = _mesa_hash_table_search(ht, prog->shaders);
5134          assert(he && he->data == prog);
5135          _mesa_hash_table_remove(ht, he);
5136          prog->base.removed = true;
5137          simple_mtx_unlock(&prog->ctx->program_lock[idx]);
5138          util_queue_fence_wait(&prog->base.cache_fence);
5139
5140          for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
5141             for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
5142                hash_table_foreach(&prog->pipelines[r][i], entry) {
5143                   struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data;
5144
5145                   util_queue_fence_wait(&pc_entry->fence);
5146                }
5147             }
5148          }
5149
5150       }
5151       while (util_dynarray_contains(&shader->pipeline_libs, struct zink_gfx_lib_cache*)) {
5152          struct zink_gfx_lib_cache *libs = util_dynarray_pop(&shader->pipeline_libs, struct zink_gfx_lib_cache*);
5153          //this condition is equivalent to verifying that, for each bit stages_present_i in stages_present,
5154          //stages_present_i implies libs->stages_present_i
5155          if ((stages_present & ~(libs->stages_present & stages_present)) != 0)
5156             continue;
5157          if (!libs->removed) {
5158             libs->removed = true;
5159             simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
5160             _mesa_set_remove_key(&screen->pipeline_libs[idx], libs);
5161             simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
5162          }
5163          zink_gfx_lib_cache_unref(screen, libs);
5164       }
5165       if (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated) {
5166          prog->shaders[stage] = NULL;
5167          prog->stages_remaining &= ~BITFIELD_BIT(stage);
5168       }
5169       /* only remove generated tcs during parent tes destruction */
5170       if (stage == MESA_SHADER_TESS_EVAL && shader->non_fs.generated_tcs)
5171          prog->shaders[MESA_SHADER_TESS_CTRL] = NULL;
5172       if (stage != MESA_SHADER_FRAGMENT &&
5173           prog->shaders[MESA_SHADER_GEOMETRY] &&
5174           prog->shaders[MESA_SHADER_GEOMETRY]->non_fs.parent ==
5175           shader) {
5176          prog->shaders[MESA_SHADER_GEOMETRY] = NULL;
5177       }
5178       zink_gfx_program_reference(screen, &prog, NULL);
5179    }
5180    if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
5181        shader->non_fs.generated_tcs) {
5182       /* automatically destroy generated tcs shaders when tes is destroyed */
5183       zink_gfx_shader_free(screen, shader->non_fs.generated_tcs);
5184       shader->non_fs.generated_tcs = NULL;
5185    }
5186    for (unsigned int i = 0; i < ARRAY_SIZE(shader->non_fs.generated_gs); i++) {
5187       for (int j = 0; j < ARRAY_SIZE(shader->non_fs.generated_gs[0]); j++) {
5188          if (shader->info.stage != MESA_SHADER_FRAGMENT &&
5189              shader->non_fs.generated_gs[i][j]) {
5190             /* automatically destroy generated gs shaders when owner is destroyed */
5191             zink_gfx_shader_free(screen, shader->non_fs.generated_gs[i][j]);
5192             shader->non_fs.generated_gs[i][j] = NULL;
5193          }
5194       }
5195    }
5196    zink_shader_free(screen, shader);
5197 }
5198
5199
5200 struct zink_shader_object
5201 zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices)
5202 {
5203    assert(zs->info.stage == MESA_SHADER_TESS_CTRL);
5204    /* shortcut all the nir passes since we just have to change this one word */
5205    zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
5206    return zink_shader_spirv_compile(screen, zs, NULL, false);
5207 }
5208
5209 /* creating a passthrough tcs shader that's roughly:
5210
5211 #version 150
5212 #extension GL_ARB_tessellation_shader : require
5213
5214 in vec4 some_var[gl_MaxPatchVertices];
5215 out vec4 some_var_out;
5216
5217 layout(push_constant) uniform tcsPushConstants {
5218     layout(offset = 0) float TessLevelInner[2];
5219     layout(offset = 8) float TessLevelOuter[4];
5220 } u_tcsPushConstants;
5221 layout(vertices = $vertices_per_patch) out;
5222 void main()
5223 {
5224   gl_TessLevelInner = u_tcsPushConstants.TessLevelInner;
5225   gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter;
5226   some_var_out = some_var[gl_InvocationID];
5227 }
5228
5229 */
5230 struct zink_shader *
5231 zink_shader_tcs_create(struct zink_screen *screen, nir_shader *tes, unsigned vertices_per_patch, nir_shader **nir_ret)
5232 {
5233    struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
5234    util_queue_fence_init(&ret->precompile.fence);
5235    ret->hash = _mesa_hash_pointer(ret);
5236    ret->programs = _mesa_pointer_set_create(NULL);
5237    simple_mtx_init(&ret->lock, mtx_plain);
5238
5239    nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &screen->nir_options, NULL);
5240    nir_function *fn = nir_function_create(nir, "main");
5241    fn->is_entrypoint = true;
5242    nir_function_impl *impl = nir_function_impl_create(fn);
5243
5244    nir_builder b;
5245    nir_builder_init(&b, impl);
5246    b.cursor = nir_before_block(nir_start_block(impl));
5247
5248    nir_ssa_def *invocation_id = nir_load_invocation_id(&b);
5249
5250    nir_foreach_shader_in_variable(var, tes) {
5251       if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
5252          continue;
5253       const struct glsl_type *in_type = var->type;
5254       const struct glsl_type *out_type = var->type;
5255       char buf[1024];
5256       snprintf(buf, sizeof(buf), "%s_out", var->name);
5257       if (!nir_is_arrayed_io(var, MESA_SHADER_TESS_EVAL)) {
5258          const struct glsl_type *type = var->type;
5259          in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
5260          out_type = glsl_array_type(type, vertices_per_patch, 0);
5261       }
5262
5263       nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
5264       nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
5265       out->data.location = in->data.location = var->data.location;
5266       out->data.location_frac = in->data.location_frac = var->data.location_frac;
5267
5268       /* gl_in[] receives values from equivalent built-in output
5269          variables written by the vertex shader (section 2.14.7).  Each array
5270          element of gl_in[] is a structure holding values for a specific vertex of
5271          the input patch.  The length of gl_in[] is equal to the
5272          implementation-dependent maximum patch size (gl_MaxPatchVertices).
5273          - ARB_tessellation_shader
5274        */
5275       /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
5276       nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
5277       nir_deref_instr *out_value = nir_build_deref_array(&b, nir_build_deref_var(&b, out), invocation_id);
5278       copy_vars(&b, out_value, in_value);
5279    }
5280    nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
5281    gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
5282    gl_TessLevelInner->data.patch = 1;
5283    nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter");
5284    gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
5285    gl_TessLevelOuter->data.patch = 1;
5286
5287    create_gfx_pushconst(nir);
5288
5289    nir_ssa_def *load_inner = nir_load_push_constant(&b, 2, 32,
5290                                                     nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL),
5291                                                     .base = 1, .range = 8);
5292    nir_ssa_def *load_outer = nir_load_push_constant(&b, 4, 32,
5293                                                     nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL),
5294                                                     .base = 2, .range = 16);
5295
5296    for (unsigned i = 0; i < 2; i++) {
5297       nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
5298       nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff);
5299    }
5300    for (unsigned i = 0; i < 4; i++) {
5301       nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i);
5302       nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff);
5303    }
5304
5305    nir->info.tess.tcs_vertices_out = vertices_per_patch;
5306    nir_validate_shader(nir, "created");
5307
5308    NIR_PASS_V(nir, nir_lower_regs_to_ssa);
5309    optimize_nir(nir, NULL);
5310    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
5311    NIR_PASS_V(nir, nir_convert_from_ssa, true);
5312
5313    *nir_ret = nir;
5314    zink_shader_serialize_blob(nir, &ret->blob);
5315    memcpy(&ret->info, &nir->info, sizeof(nir->info));
5316    ret->non_fs.is_generated = true;
5317    return ret;
5318 }
5319
5320 bool
5321 zink_shader_has_cubes(nir_shader *nir)
5322 {
5323    nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
5324       const struct glsl_type *type = glsl_without_array(var->type);
5325       if (glsl_type_is_sampler(type) && glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE)
5326          return true;
5327    }
5328    return false;
5329 }
5330
5331 nir_shader *
5332 zink_shader_blob_deserialize(struct zink_screen *screen, struct blob *blob)
5333 {
5334    struct blob_reader blob_reader;
5335    blob_reader_init(&blob_reader, blob->data, blob->size);
5336    return nir_deserialize(NULL, &screen->nir_options, &blob_reader);
5337 }
5338
5339 nir_shader *
5340 zink_shader_deserialize(struct zink_screen *screen, struct zink_shader *zs)
5341 {
5342    return zink_shader_blob_deserialize(screen, &zs->blob);
5343 }
5344
5345 void
5346 zink_shader_serialize_blob(nir_shader *nir, struct blob *blob)
5347 {
5348    blob_init(blob);
5349 #ifndef NDEBUG
5350    bool strip = !(zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV | ZINK_DEBUG_TGSI));
5351 #else
5352    bool strip = false;
5353 #endif
5354    nir_serialize(blob, nir, strip);
5355 }
5356
5357 void
5358 zink_print_shader(struct zink_screen *screen, struct zink_shader *zs, FILE *fp)
5359 {
5360    nir_shader *nir = zink_shader_deserialize(screen, zs);
5361    nir_print_shader(nir, fp);
5362    ralloc_free(nir);
5363 }