ac765a14ac356d92f24cbfdf2668b0b5834eb264
[platform/upstream/mesa.git] / src / gallium / drivers / zink / zink_compiler.c
1 /*
2  * Copyright 2018 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23
24 #include "nir_opcodes.h"
25 #include "zink_context.h"
26 #include "zink_compiler.h"
27 #include "zink_descriptors.h"
28 #include "zink_program.h"
29 #include "zink_screen.h"
30 #include "nir_to_spirv/nir_to_spirv.h"
31
32 #include "pipe/p_state.h"
33
34 #include "nir.h"
35 #include "nir_xfb_info.h"
36 #include "nir/nir_draw_helpers.h"
37 #include "compiler/nir/nir_builder.h"
38 #include "compiler/nir/nir_serialize.h"
39 #include "compiler/nir/nir_builtin_builder.h"
40
41 #include "nir/tgsi_to_nir.h"
42 #include "tgsi/tgsi_dump.h"
43 #include "tgsi/tgsi_from_mesa.h"
44
45 #include "util/u_memory.h"
46
47 #include "compiler/spirv/nir_spirv.h"
48 #include "vulkan/util/vk_util.h"
49
50 bool
51 zink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask);
52
53
54 static void
55 copy_vars(nir_builder *b, nir_deref_instr *dst, nir_deref_instr *src)
56 {
57    assert(glsl_get_bare_type(dst->type) == glsl_get_bare_type(src->type));
58    if (glsl_type_is_struct(dst->type)) {
59       for (unsigned i = 0; i < glsl_get_length(dst->type); ++i) {
60          copy_vars(b, nir_build_deref_struct(b, dst, i), nir_build_deref_struct(b, src, i));
61       }
62    } else if (glsl_type_is_array_or_matrix(dst->type)) {
63       unsigned count = glsl_type_is_array(dst->type) ? glsl_array_size(dst->type) : glsl_get_matrix_columns(dst->type);
64       for (unsigned i = 0; i < count; i++) {
65          copy_vars(b, nir_build_deref_array_imm(b, dst, i), nir_build_deref_array_imm(b, src, i));
66       }
67    } else {
68       nir_ssa_def *load = nir_load_deref(b, src);
69       nir_store_deref(b, dst, load, BITFIELD_MASK(load->num_components));
70    }
71 }
72
73 #define SIZEOF_FIELD(type, field) sizeof(((type *)0)->field)
74
75 static void
76 create_gfx_pushconst(nir_shader *nir)
77 {
78 #define PUSHCONST_MEMBER(member_idx, field)                                                                     \
79 fields[member_idx].type =                                                                                       \
80    glsl_array_type(glsl_uint_type(), SIZEOF_FIELD(struct zink_gfx_push_constant, field) / sizeof(uint32_t), 0); \
81 fields[member_idx].name = ralloc_asprintf(nir, #field);                                                         \
82 fields[member_idx].offset = offsetof(struct zink_gfx_push_constant, field);
83
84    nir_variable *pushconst;
85    /* create compatible layout for the ntv push constant loader */
86    struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, ZINK_GFX_PUSHCONST_MAX);
87    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED, draw_mode_is_indexed);
88    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_ID, draw_id);
89    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, framebuffer_is_layered);
90    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL, default_inner_level);
91    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL, default_outer_level);
92    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN, line_stipple_pattern);
93    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_VIEWPORT_SCALE, viewport_scale);
94    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_WIDTH, line_width);
95
96    pushconst = nir_variable_create(nir, nir_var_mem_push_const,
97                                    glsl_struct_type(fields, ZINK_GFX_PUSHCONST_MAX, "struct", false),
98                                    "gfx_pushconst");
99    pushconst->data.location = INT_MAX; //doesn't really matter
100
101 #undef PUSHCONST_MEMBER
102 }
103
104 static bool
105 lower_64bit_vertex_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
106 {
107    if (instr->type != nir_instr_type_intrinsic)
108       return false;
109    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
110    if (intr->intrinsic != nir_intrinsic_load_deref)
111       return false;
112    nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
113    if (var->data.mode != nir_var_shader_in)
114       return false;
115    if (!glsl_type_is_64bit(var->type) || !glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) < 3)
116       return false;
117
118    /* create second variable for the split */
119    nir_variable *var2 = nir_variable_clone(var, b->shader);
120    /* split new variable into second slot */
121    var2->data.driver_location++;
122    nir_shader_add_variable(b->shader, var2);
123
124    unsigned total_num_components = glsl_get_vector_elements(var->type);
125    /* new variable is the second half of the dvec */
126    var2->type = glsl_vector_type(glsl_get_base_type(var->type), glsl_get_vector_elements(var->type) - 2);
127    /* clamp original variable to a dvec2 */
128    var->type = glsl_vector_type(glsl_get_base_type(var->type), 2);
129
130    b->cursor = nir_after_instr(instr);
131
132    /* this is the first load instruction for the first half of the dvec3/4 components */
133    nir_ssa_def *load = nir_load_var(b, var);
134    /* this is the second load instruction for the second half of the dvec3/4 components */
135    nir_ssa_def *load2 = nir_load_var(b, var2);
136
137    nir_ssa_def *def[4];
138    /* create a new dvec3/4 comprised of all the loaded components from both variables */
139    def[0] = nir_vector_extract(b, load, nir_imm_int(b, 0));
140    def[1] = nir_vector_extract(b, load, nir_imm_int(b, 1));
141    def[2] = nir_vector_extract(b, load2, nir_imm_int(b, 0));
142    if (total_num_components == 4)
143       def[3] = nir_vector_extract(b, load2, nir_imm_int(b, 1));
144    nir_ssa_def *new_vec = nir_vec(b, def, total_num_components);
145    /* use the assembled dvec3/4 for all other uses of the load */
146    nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, new_vec,
147                                   new_vec->parent_instr);
148
149    /* remove the original instr and its deref chain */
150    nir_instr *parent = intr->src[0].ssa->parent_instr;
151    nir_instr_remove(instr);
152    nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
153
154    return true;
155 }
156
157 /* mesa/gallium always provides UINT versions of 64bit formats:
158  * - rewrite loads as 32bit vec loads
159  * - cast back to 64bit
160  */
161 static bool
162 lower_64bit_uint_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
163 {
164    if (instr->type != nir_instr_type_intrinsic)
165       return false;
166    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
167    if (intr->intrinsic != nir_intrinsic_load_deref)
168       return false;
169    nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
170    if (var->data.mode != nir_var_shader_in)
171       return false;
172    if (glsl_get_bit_size(var->type) != 64 || glsl_get_base_type(var->type) >= GLSL_TYPE_SAMPLER)
173       return false;
174
175    unsigned num_components = glsl_get_vector_elements(var->type);
176    enum glsl_base_type base_type;
177    switch (glsl_get_base_type(var->type)) {
178    case GLSL_TYPE_UINT64:
179       base_type = GLSL_TYPE_UINT;
180       break;
181    case GLSL_TYPE_INT64:
182       base_type = GLSL_TYPE_INT;
183       break;
184    case GLSL_TYPE_DOUBLE:
185       base_type = GLSL_TYPE_FLOAT;
186       break;
187    default:
188       unreachable("unknown 64-bit vertex attribute format!");
189    }
190    var->type = glsl_vector_type(base_type, num_components * 2);
191
192    b->cursor = nir_after_instr(instr);
193
194    nir_ssa_def *load = nir_load_var(b, var);
195    nir_ssa_def *casted[2];
196    for (unsigned i = 0; i < num_components; i++)
197      casted[i] = nir_pack_64_2x32(b, nir_channels(b, load, BITFIELD_RANGE(i * 2, 2)));
198    nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec(b, casted, num_components));
199
200    /* remove the original instr and its deref chain */
201    nir_instr *parent = intr->src[0].ssa->parent_instr;
202    nir_instr_remove(instr);
203    nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
204
205    return true;
206 }
207
208 /* "64-bit three- and four-component vectors consume two consecutive locations."
209  *  - 14.1.4. Location Assignment
210  *
211  * this pass splits dvec3 and dvec4 vertex inputs into a dvec2 and a double/dvec2 which
212  * are assigned to consecutive locations, loaded separately, and then assembled back into a
213  * composite value that's used in place of the original loaded ssa src
214  */
215 static bool
216 lower_64bit_vertex_attribs(nir_shader *shader)
217 {
218    if (shader->info.stage != MESA_SHADER_VERTEX)
219       return false;
220
221    bool progress = nir_shader_instructions_pass(shader, lower_64bit_vertex_attribs_instr, nir_metadata_dominance, NULL);
222    progress |= nir_shader_instructions_pass(shader, lower_64bit_uint_attribs_instr, nir_metadata_dominance, NULL);
223    return progress;
224 }
225
226 static bool
227 lower_basevertex_instr(nir_builder *b, nir_instr *in, void *data)
228 {
229    if (in->type != nir_instr_type_intrinsic)
230       return false;
231    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
232    if (instr->intrinsic != nir_intrinsic_load_base_vertex)
233       return false;
234
235    b->cursor = nir_after_instr(&instr->instr);
236    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
237    load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED));
238    nir_intrinsic_set_range(load, 4);
239    load->num_components = 1;
240    nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_mode_is_indexed");
241    nir_builder_instr_insert(b, &load->instr);
242
243    nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel,
244                                           nir_build_alu(b, nir_op_ieq, &load->dest.ssa, nir_imm_int(b, 1), NULL, NULL),
245                                           &instr->dest.ssa,
246                                           nir_imm_int(b, 0),
247                                           NULL);
248
249    nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, composite,
250                                   composite->parent_instr);
251    return true;
252 }
253
254 static bool
255 lower_basevertex(nir_shader *shader)
256 {
257    if (shader->info.stage != MESA_SHADER_VERTEX)
258       return false;
259
260    if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
261       return false;
262
263    return nir_shader_instructions_pass(shader, lower_basevertex_instr, nir_metadata_dominance, NULL);
264 }
265
266
267 static bool
268 lower_drawid_instr(nir_builder *b, nir_instr *in, void *data)
269 {
270    if (in->type != nir_instr_type_intrinsic)
271       return false;
272    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
273    if (instr->intrinsic != nir_intrinsic_load_draw_id)
274       return false;
275
276    b->cursor = nir_before_instr(&instr->instr);
277    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
278    load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_ID));
279    nir_intrinsic_set_range(load, 4);
280    load->num_components = 1;
281    nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_id");
282    nir_builder_instr_insert(b, &load->instr);
283
284    nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
285
286    return true;
287 }
288
289 static bool
290 lower_drawid(nir_shader *shader)
291 {
292    if (shader->info.stage != MESA_SHADER_VERTEX)
293       return false;
294
295    if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
296       return false;
297
298    return nir_shader_instructions_pass(shader, lower_drawid_instr, nir_metadata_dominance, NULL);
299 }
300
301 struct lower_gl_point_state {
302    nir_variable *gl_pos_out;
303    nir_variable *gl_point_size;
304 };
305
306 static bool
307 lower_gl_point_gs_instr(nir_builder *b, nir_instr *instr, void *data)
308 {
309    struct lower_gl_point_state *state = data;
310    nir_ssa_def *vp_scale, *pos;
311
312    if (instr->type != nir_instr_type_intrinsic)
313       return false;
314
315    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
316    if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
317        intrin->intrinsic != nir_intrinsic_emit_vertex)
318       return false;
319
320    if (nir_intrinsic_stream_id(intrin) != 0)
321       return false;
322
323    if (intrin->intrinsic == nir_intrinsic_end_primitive_with_counter ||
324          intrin->intrinsic == nir_intrinsic_end_primitive) {
325       nir_instr_remove(&intrin->instr);
326       return true;
327    }
328
329    b->cursor = nir_before_instr(instr);
330
331    // viewport-map endpoints
332    nir_ssa_def *vp_const_pos = nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE);
333    vp_scale = nir_load_push_constant(b, 2, 32, vp_const_pos, .base = 1, .range = 2);
334
335    // Load point info values
336    nir_ssa_def *point_size = nir_load_var(b, state->gl_point_size);
337    nir_ssa_def *point_pos = nir_load_var(b, state->gl_pos_out);
338
339    // w_delta = gl_point_size / width_viewport_size_scale * gl_Position.w
340    nir_ssa_def *w_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 0));
341    w_delta = nir_fmul(b, w_delta, nir_channel(b, point_pos, 3));
342    // halt_w_delta = w_delta / 2
343    nir_ssa_def *half_w_delta = nir_fmul(b, w_delta, nir_imm_float(b, 0.5));
344
345    // h_delta = gl_point_size / height_viewport_size_scale * gl_Position.w
346    nir_ssa_def *h_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 1));
347    h_delta = nir_fmul(b, h_delta, nir_channel(b, point_pos, 3));
348    // halt_h_delta = h_delta / 2
349    nir_ssa_def *half_h_delta = nir_fmul(b, h_delta, nir_imm_float(b, 0.5));
350
351    nir_ssa_def *point_dir[4][2] = {
352       { nir_imm_float(b, -1), nir_imm_float(b, -1) },
353       { nir_imm_float(b, -1), nir_imm_float(b, 1) },
354       { nir_imm_float(b, 1), nir_imm_float(b, -1) },
355       { nir_imm_float(b, 1), nir_imm_float(b, 1) }
356    };
357
358    nir_ssa_def *point_pos_x = nir_channel(b, point_pos, 0);
359    nir_ssa_def *point_pos_y = nir_channel(b, point_pos, 1);
360
361    for (size_t i = 0; i < 4; i++) {
362       pos = nir_vec4(b,
363                      nir_ffma(b, half_w_delta, point_dir[i][0], point_pos_x),
364                      nir_ffma(b, half_h_delta, point_dir[i][1], point_pos_y),
365                      nir_channel(b, point_pos, 2),
366                      nir_channel(b, point_pos, 3));
367
368       nir_store_var(b, state->gl_pos_out, pos, 0xf);
369
370       nir_emit_vertex(b);
371    }
372
373    nir_end_primitive(b);
374
375    nir_instr_remove(&intrin->instr);
376
377    return true;
378 }
379
380 static bool
381 lower_gl_point_gs(nir_shader *shader)
382 {
383    struct lower_gl_point_state state;
384    nir_builder b;
385
386    shader->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
387    shader->info.gs.vertices_out *= 4;
388
389    // Gets the gl_Position in and out
390    state.gl_pos_out =
391       nir_find_variable_with_location(shader, nir_var_shader_out,
392                                       VARYING_SLOT_POS);
393    state.gl_point_size =
394       nir_find_variable_with_location(shader, nir_var_shader_out,
395                                       VARYING_SLOT_PSIZ);
396
397    // if position in or gl_PointSize aren't written, we have nothing to do
398    if (!state.gl_pos_out || !state.gl_point_size)
399       return false;
400
401    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
402    nir_builder_init(&b, entry);
403    b.cursor = nir_before_cf_list(&entry->body);
404
405    return nir_shader_instructions_pass(shader, lower_gl_point_gs_instr,
406                                        nir_metadata_dominance, &state);
407 }
408
409 struct lower_pv_mode_state {
410    nir_variable *varyings[VARYING_SLOT_MAX];
411    nir_variable *pos_counter;
412    nir_variable *out_pos_counter;
413    nir_variable *ring_offset;
414    unsigned ring_size;
415    unsigned primitive_vert_count;
416    unsigned prim;
417 };
418
419 static nir_ssa_def*
420 lower_pv_mode_gs_ring_index(nir_builder *b,
421                             struct lower_pv_mode_state *state,
422                             nir_ssa_def *index)
423 {
424    nir_ssa_def *ring_offset = nir_load_var(b, state->ring_offset);
425    return nir_imod(b, nir_iadd(b, index, ring_offset),
426                       nir_imm_int(b, state->ring_size));
427 }
428
429 static bool
430 lower_pv_mode_gs_store(nir_builder *b,
431                        nir_intrinsic_instr *intrin,
432                        struct lower_pv_mode_state *state)
433 {
434    b->cursor = nir_before_instr(&intrin->instr);
435    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
436    if (nir_deref_mode_is(deref, nir_var_shader_out)) {
437       nir_variable *var = nir_deref_instr_get_variable(deref);
438
439       gl_varying_slot location = var->data.location;
440       assert(state->varyings[location]);
441       assert(intrin->src[1].is_ssa);
442       nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
443       nir_ssa_def *index = lower_pv_mode_gs_ring_index(b, state, pos_counter);
444       nir_store_array_var(b, state->varyings[location],
445                              index, intrin->src[1].ssa,
446                              nir_intrinsic_write_mask(intrin));
447       nir_instr_remove(&intrin->instr);
448       return true;
449    }
450
451    return false;
452 }
453
454 static void
455 lower_pv_mode_emit_rotated_prim(nir_builder *b,
456                                 struct lower_pv_mode_state *state,
457                                 nir_ssa_def *current_vertex)
458 {
459    nir_ssa_def *two = nir_imm_int(b, 2);
460    nir_ssa_def *three = nir_imm_int(b, 3);
461    bool is_triangle = state->primitive_vert_count == 3;
462    /* This shader will always see the last three vertices emitted by the user gs.
463     * The following table is used to to rotate primitives within a strip generated
464     * by the user gs such that the last vertex becomes the first.
465     *
466     * [lines, tris][even/odd index][vertex mod 3]
467     */
468    static const unsigned vert_maps[2][2][3] = {
469       {{1, 0, 0}, {1, 0, 0}},
470       {{2, 0, 1}, {2, 1, 0}}
471    };
472    /* When the primive supplied to the gs comes from a strip, the last provoking vertex
473     * is either the last or the second, depending on whether the triangle is at an odd
474     * or even position within the strip.
475     *
476     * odd or even primitive within draw
477     */
478    nir_ssa_def *odd_prim = nir_imod(b, nir_load_primitive_id(b), two);
479    for (unsigned i = 0; i < state->primitive_vert_count; i++) {
480       /* odd or even triangle within strip emitted by user GS
481        * this is handled using the table
482        */
483       nir_ssa_def *odd_user_prim = nir_imod(b, current_vertex, two);
484       unsigned offset_even = vert_maps[is_triangle][0][i];
485       unsigned offset_odd = vert_maps[is_triangle][1][i];
486       nir_ssa_def *offset_even_value = nir_imm_int(b, offset_even);
487       nir_ssa_def *offset_odd_value = nir_imm_int(b, offset_odd);
488       nir_ssa_def *rotated_i = nir_bcsel(b, nir_b2b1(b, odd_user_prim),
489                                             offset_odd_value, offset_even_value);
490       /* Here we account for how triangles are provided to the gs from a strip.
491        * For even primitives we rotate by 3, meaning we do nothing.
492        * For odd primitives we rotate by 2, combined with the previous rotation this
493        * means the second vertex becomes the last.
494        */
495       if (state->prim == ZINK_PVE_PRIMITIVE_TRISTRIP)
496         rotated_i = nir_imod(b, nir_iadd(b, rotated_i,
497                                             nir_isub(b, three,
498                                                         odd_prim)),
499                                             three);
500       /* Triangles that come from fans are provided to the gs the same way as
501        * odd triangles from a strip so always rotate by 2.
502        */
503       else if (state->prim == ZINK_PVE_PRIMITIVE_FAN)
504         rotated_i = nir_imod(b, nir_iadd_imm(b, rotated_i, 2),
505                                 three);
506       rotated_i = nir_iadd(b, rotated_i, current_vertex);
507       nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
508          gl_varying_slot location = var->data.location;
509          if (state->varyings[location]) {
510             nir_ssa_def *index = lower_pv_mode_gs_ring_index(b, state, rotated_i);
511             nir_deref_instr *value = nir_build_deref_array(b, nir_build_deref_var(b, state->varyings[location]), index);
512             copy_vars(b, nir_build_deref_var(b, var), value);
513          }
514       }
515       nir_emit_vertex(b);
516    }
517 }
518
519 static bool
520 lower_pv_mode_gs_emit_vertex(nir_builder *b,
521                              nir_intrinsic_instr *intrin,
522                              struct lower_pv_mode_state *state)
523 {
524    b->cursor = nir_before_instr(&intrin->instr);
525
526    // increment pos_counter
527    nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
528    nir_store_var(b, state->pos_counter, nir_iadd_imm(b, pos_counter, 1), 1);
529
530    nir_instr_remove(&intrin->instr);
531    return true;
532 }
533
534 static bool
535 lower_pv_mode_gs_end_primitive(nir_builder *b,
536                                nir_intrinsic_instr *intrin,
537                                struct lower_pv_mode_state *state)
538 {
539    b->cursor = nir_before_instr(&intrin->instr);
540
541    nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
542    nir_push_loop(b);
543    {
544       nir_ssa_def *out_pos_counter = nir_load_var(b, state->out_pos_counter);
545       nir_push_if(b, nir_ilt(b, nir_isub(b, pos_counter, out_pos_counter),
546                                 nir_imm_int(b, state->primitive_vert_count)));
547       nir_jump(b, nir_jump_break);
548       nir_pop_if(b, NULL);
549
550       lower_pv_mode_emit_rotated_prim(b, state, out_pos_counter);
551       nir_end_primitive(b);
552
553       nir_store_var(b, state->out_pos_counter, nir_iadd_imm(b, out_pos_counter, 1), 1);
554    }
555    nir_pop_loop(b, NULL);
556    /* Set the ring offset such that when position 0 is
557     * read we get the last value written
558     */
559    nir_store_var(b, state->ring_offset, pos_counter, 1);
560    nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
561    nir_store_var(b, state->out_pos_counter, nir_imm_int(b, 0), 1);
562
563    nir_instr_remove(&intrin->instr);
564    return true;
565 }
566
567 static bool
568 lower_pv_mode_gs_instr(nir_builder *b, nir_instr *instr, void *data)
569 {
570    if (instr->type != nir_instr_type_intrinsic)
571       return false;
572
573    struct lower_pv_mode_state *state = data;
574    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
575
576    switch (intrin->intrinsic) {
577    case nir_intrinsic_store_deref:
578       return lower_pv_mode_gs_store(b, intrin, state);
579    case nir_intrinsic_copy_deref:
580       unreachable("should be lowered");
581    case nir_intrinsic_emit_vertex_with_counter:
582    case nir_intrinsic_emit_vertex:
583       return lower_pv_mode_gs_emit_vertex(b, intrin, state);
584    case nir_intrinsic_end_primitive:
585    case nir_intrinsic_end_primitive_with_counter:
586       return lower_pv_mode_gs_end_primitive(b, intrin, state);
587    default:
588       return false;
589    }
590 }
591
592 static unsigned int
593 lower_pv_mode_vertices_for_prim(enum shader_prim prim)
594 {
595    switch (prim) {
596    case SHADER_PRIM_POINTS:
597       return 1;
598    case SHADER_PRIM_LINE_STRIP:
599       return 2;
600    case SHADER_PRIM_TRIANGLE_STRIP:
601       return 3;
602    default:
603       unreachable("unsupported primitive for gs output");
604    }
605 }
606
607 static bool
608 lower_pv_mode_gs(nir_shader *shader, unsigned prim)
609 {
610    nir_builder b;
611    struct lower_pv_mode_state state;
612    memset(state.varyings, 0, sizeof(state.varyings));
613
614    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
615    nir_builder_init(&b, entry);
616    b.cursor = nir_before_cf_list(&entry->body);
617
618    state.primitive_vert_count =
619       lower_pv_mode_vertices_for_prim(shader->info.gs.output_primitive);
620    state.ring_size = shader->info.gs.vertices_out;
621
622    nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
623       gl_varying_slot location = var->data.location;
624
625       char name[100];
626       snprintf(name, sizeof(name), "__tmp_primverts_%d", location);
627       state.varyings[location] =
628          nir_local_variable_create(entry,
629                                    glsl_array_type(var->type,
630                                                    state.ring_size,
631                                                    false),
632                                    name);
633    }
634
635    state.pos_counter = nir_local_variable_create(entry,
636                                                  glsl_uint_type(),
637                                                  "__pos_counter");
638
639    state.out_pos_counter = nir_local_variable_create(entry,
640                                                      glsl_uint_type(),
641                                                      "__out_pos_counter");
642
643    state.ring_offset = nir_local_variable_create(entry,
644                                                  glsl_uint_type(),
645                                                  "__ring_offset");
646
647    state.prim = prim;
648
649    // initialize pos_counter and out_pos_counter
650    nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
651    nir_store_var(&b, state.out_pos_counter, nir_imm_int(&b, 0), 1);
652    nir_store_var(&b, state.ring_offset, nir_imm_int(&b, 0), 1);
653
654    shader->info.gs.vertices_out = (shader->info.gs.vertices_out -
655                                    (state.primitive_vert_count - 1)) *
656                                   state.primitive_vert_count;
657    return nir_shader_instructions_pass(shader, lower_pv_mode_gs_instr,
658                                        nir_metadata_dominance, &state);
659 }
660
661 struct lower_line_stipple_state {
662    nir_variable *pos_out;
663    nir_variable *stipple_out;
664    nir_variable *prev_pos;
665    nir_variable *pos_counter;
666    nir_variable *stipple_counter;
667    bool line_rectangular;
668 };
669
670 static nir_ssa_def *
671 viewport_map(nir_builder *b, nir_ssa_def *vert,
672              nir_ssa_def *scale)
673 {
674    nir_ssa_def *w_recip = nir_frcp(b, nir_channel(b, vert, 3));
675    nir_ssa_def *ndc_point = nir_fmul(b, nir_channels(b, vert, 0x3),
676                                         w_recip);
677    return nir_fmul(b, ndc_point, scale);
678 }
679
680 static bool
681 lower_line_stipple_gs_instr(nir_builder *b, nir_instr *instr, void *data)
682 {
683    struct lower_line_stipple_state *state = data;
684    if (instr->type != nir_instr_type_intrinsic)
685       return false;
686
687    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
688    if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
689        intrin->intrinsic != nir_intrinsic_emit_vertex)
690       return false;
691
692    b->cursor = nir_before_instr(instr);
693
694    nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
695    // viewport-map endpoints
696    nir_ssa_def *vp_scale = nir_load_push_constant(b, 2, 32,
697                                                   nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE),
698                                                   .base = 1,
699                                                   .range = 2);
700    nir_ssa_def *prev = nir_load_var(b, state->prev_pos);
701    nir_ssa_def *curr = nir_load_var(b, state->pos_out);
702    prev = viewport_map(b, prev, vp_scale);
703    curr = viewport_map(b, curr, vp_scale);
704
705    // calculate length of line
706    nir_ssa_def *len;
707    if (state->line_rectangular)
708       len = nir_fast_distance(b, prev, curr);
709    else {
710       nir_ssa_def *diff = nir_fabs(b, nir_fsub(b, prev, curr));
711       len = nir_fmax(b, nir_channel(b, diff, 0), nir_channel(b, diff, 1));
712    }
713    // update stipple_counter
714    nir_store_var(b, state->stipple_counter,
715                     nir_fadd(b, nir_load_var(b, state->stipple_counter),
716                                 len), 1);
717    nir_pop_if(b, NULL);
718    // emit stipple out
719    nir_copy_var(b, state->stipple_out, state->stipple_counter);
720    nir_copy_var(b, state->prev_pos, state->pos_out);
721
722    // update prev_pos and pos_counter for next vertex
723    b->cursor = nir_after_instr(instr);
724    nir_store_var(b, state->pos_counter,
725                     nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
726                                     1), 1);
727
728    return true;
729 }
730
731 static bool
732 lower_line_stipple_gs(nir_shader *shader, bool line_rectangular)
733 {
734    nir_builder b;
735    struct lower_line_stipple_state state;
736
737    state.pos_out =
738       nir_find_variable_with_location(shader, nir_var_shader_out,
739                                       VARYING_SLOT_POS);
740
741    // if position isn't written, we have nothing to do
742    if (!state.pos_out)
743       return false;
744
745    state.stipple_out = nir_variable_create(shader, nir_var_shader_out,
746                                            glsl_float_type(),
747                                            "__stipple");
748    state.stipple_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
749    state.stipple_out->data.driver_location = shader->num_outputs++;
750    state.stipple_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
751    shader->info.outputs_written |= BITFIELD64_BIT(state.stipple_out->data.location);
752
753    // create temp variables
754    state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
755                                         glsl_vec4_type(),
756                                         "__prev_pos");
757    state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
758                                            glsl_uint_type(),
759                                            "__pos_counter");
760    state.stipple_counter = nir_variable_create(shader, nir_var_shader_temp,
761                                                glsl_float_type(),
762                                                "__stipple_counter");
763
764    state.line_rectangular = line_rectangular;
765    // initialize pos_counter and stipple_counter
766    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
767    nir_builder_init(&b, entry);
768    b.cursor = nir_before_cf_list(&entry->body);
769    nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
770    nir_store_var(&b, state.stipple_counter, nir_imm_float(&b, 0), 1);
771
772    return nir_shader_instructions_pass(shader, lower_line_stipple_gs_instr,
773                                        nir_metadata_dominance, &state);
774 }
775
776 static bool
777 lower_line_stipple_fs(nir_shader *shader)
778 {
779    nir_builder b;
780    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
781    nir_builder_init(&b, entry);
782
783    // create stipple counter
784    nir_variable *stipple = nir_variable_create(shader, nir_var_shader_in,
785                                                glsl_float_type(),
786                                                "__stipple");
787    stipple->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
788    stipple->data.driver_location = shader->num_inputs++;
789    stipple->data.location = MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
790    shader->info.inputs_read |= BITFIELD64_BIT(stipple->data.location);
791
792    nir_variable *sample_mask_out =
793       nir_find_variable_with_location(shader, nir_var_shader_out,
794                                       FRAG_RESULT_SAMPLE_MASK);
795    if (!sample_mask_out) {
796       sample_mask_out = nir_variable_create(shader, nir_var_shader_out,
797                                         glsl_uint_type(), "sample_mask");
798       sample_mask_out->data.driver_location = shader->num_outputs++;
799       sample_mask_out->data.location = FRAG_RESULT_SAMPLE_MASK;
800    }
801
802    b.cursor = nir_after_cf_list(&entry->body);
803
804    nir_ssa_def *pattern = nir_load_push_constant(&b, 1, 32,
805                                                  nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN),
806                                                  .base = 1);
807    nir_ssa_def *factor = nir_i2f32(&b, nir_ishr_imm(&b, pattern, 16));
808    pattern = nir_iand_imm(&b, pattern, 0xffff);
809
810    nir_ssa_def *sample_mask_in = nir_load_sample_mask_in(&b);
811    nir_variable *v = nir_local_variable_create(entry, glsl_uint_type(), NULL);
812    nir_variable *sample_mask = nir_local_variable_create(entry, glsl_uint_type(), NULL);
813    nir_store_var(&b, v, sample_mask_in, 1);
814    nir_store_var(&b, sample_mask, sample_mask_in, 1);
815    nir_push_loop(&b);
816    {
817       nir_ssa_def *value = nir_load_var(&b, v);
818       nir_ssa_def *index = nir_ufind_msb(&b, value);
819       nir_ssa_def *index_mask = nir_ishl(&b, nir_imm_int(&b, 1), index);
820       nir_ssa_def *new_value = nir_ixor(&b, value, index_mask);
821       nir_store_var(&b, v, new_value,  1);
822       nir_push_if(&b, nir_ieq_imm(&b, value, 0));
823       nir_jump(&b, nir_jump_break);
824       nir_pop_if(&b, NULL);
825
826       nir_ssa_def *stipple_pos =
827          nir_interp_deref_at_sample(&b, 1, 32,
828             &nir_build_deref_var(&b, stipple)->dest.ssa, index);
829       stipple_pos = nir_fmod(&b, nir_fdiv(&b, stipple_pos, factor),
830                                  nir_imm_float(&b, 16.0));
831       stipple_pos = nir_f2i32(&b, stipple_pos);
832       nir_ssa_def *bit =
833          nir_iand_imm(&b, nir_ishr(&b, pattern, stipple_pos), 1);
834       nir_push_if(&b, nir_ieq_imm(&b, bit, 0));
835       {
836          nir_ssa_def *value = nir_load_var(&b, sample_mask);
837          value = nir_ixor(&b, value, index_mask);
838          nir_store_var(&b, sample_mask, value, 1);
839       }
840       nir_pop_if(&b, NULL);
841    }
842    nir_pop_loop(&b, NULL);
843    nir_store_var(&b, sample_mask_out, nir_load_var(&b, sample_mask), 1);
844
845    return true;
846 }
847
848 struct lower_line_smooth_state {
849    nir_variable *pos_out;
850    nir_variable *line_coord_out;
851    nir_variable *prev_pos;
852    nir_variable *pos_counter;
853    nir_variable *prev_varyings[VARYING_SLOT_MAX][4],
854                 *varyings[VARYING_SLOT_MAX][4]; // location_frac
855 };
856
857 static bool
858 lower_line_smooth_gs_store(nir_builder *b,
859                            nir_intrinsic_instr *intrin,
860                            struct lower_line_smooth_state *state)
861 {
862    b->cursor = nir_before_instr(&intrin->instr);
863    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
864    if (nir_deref_mode_is(deref, nir_var_shader_out)) {
865       nir_variable *var = nir_deref_instr_get_variable(deref);
866
867       // we take care of position elsewhere
868       gl_varying_slot location = var->data.location;
869       unsigned location_frac = var->data.location_frac;
870       if (location != VARYING_SLOT_POS) {
871          assert(state->varyings[location]);
872          assert(intrin->src[1].is_ssa);
873          nir_store_var(b, state->varyings[location][location_frac],
874                        intrin->src[1].ssa,
875                        nir_intrinsic_write_mask(intrin));
876          nir_instr_remove(&intrin->instr);
877          return true;
878       }
879    }
880
881    return false;
882 }
883
884 static bool
885 lower_line_smooth_gs_emit_vertex(nir_builder *b,
886                                  nir_intrinsic_instr *intrin,
887                                  struct lower_line_smooth_state *state)
888 {
889    b->cursor = nir_before_instr(&intrin->instr);
890
891    nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
892    nir_ssa_def *vp_scale = nir_load_push_constant(b, 2, 32,
893                                                   nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE),
894                                                   .base = 1,
895                                                   .range = 2);
896    nir_ssa_def *prev = nir_load_var(b, state->prev_pos);
897    nir_ssa_def *curr = nir_load_var(b, state->pos_out);
898    nir_ssa_def *prev_vp = viewport_map(b, prev, vp_scale);
899    nir_ssa_def *curr_vp = viewport_map(b, curr, vp_scale);
900
901    nir_ssa_def *width = nir_load_push_constant(b, 1, 32,
902                                                nir_imm_int(b, ZINK_GFX_PUSHCONST_LINE_WIDTH),
903                                                .base = 1);
904    nir_ssa_def *half_width = nir_fadd_imm(b, nir_fmul_imm(b, width, 0.5), 0.5);
905
906    const unsigned yx[2] = { 1, 0 };
907    nir_ssa_def *vec = nir_fsub(b, curr_vp, prev_vp);
908    nir_ssa_def *len = nir_fast_length(b, vec);
909    nir_ssa_def *dir = nir_normalize(b, vec);
910    nir_ssa_def *half_length = nir_fmul_imm(b, len, 0.5);
911    half_length = nir_fadd_imm(b, half_length, 0.5);
912
913    nir_ssa_def *vp_scale_rcp = nir_frcp(b, vp_scale);
914    nir_ssa_def *tangent =
915       nir_fmul(b,
916                nir_fmul(b,
917                         nir_swizzle(b, dir, yx, 2),
918                         nir_imm_vec2(b, 1.0, -1.0)),
919                vp_scale_rcp);
920    tangent = nir_fmul(b, tangent, half_width);
921    tangent = nir_pad_vector_imm_int(b, tangent, 0, 4);
922    dir = nir_fmul_imm(b, nir_fmul(b, dir, vp_scale_rcp), 0.5);
923
924    nir_ssa_def *line_offets[8] = {
925       nir_fadd(b, tangent, nir_fneg(b, dir)),
926       nir_fadd(b, nir_fneg(b, tangent), nir_fneg(b, dir)),
927       tangent,
928       nir_fneg(b, tangent),
929       tangent,
930       nir_fneg(b, tangent),
931       nir_fadd(b, tangent, dir),
932       nir_fadd(b, nir_fneg(b, tangent), dir),
933    };
934    nir_ssa_def *line_coord =
935       nir_vec4(b, half_width, half_width, half_length, half_length);
936    nir_ssa_def *line_coords[8] = {
937       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,  -1,  1)),
938       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,  -1,  1)),
939       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,   0,  1)),
940       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,   0,  1)),
941       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,   0,  1)),
942       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,   0,  1)),
943       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,   1,  1)),
944       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,   1,  1)),
945    };
946
947    /* emit first end-cap, and start line */
948    for (int i = 0; i < 4; ++i) {
949       nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
950          gl_varying_slot location = var->data.location;
951          unsigned location_frac = var->data.location_frac;
952          if (state->prev_varyings[location][location_frac])
953             nir_copy_var(b, var, state->prev_varyings[location][location_frac]);
954       }
955       nir_store_var(b, state->pos_out,
956                     nir_fadd(b, prev, nir_fmul(b, line_offets[i],
957                              nir_channel(b, prev, 3))), 0xf);
958       nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
959       nir_emit_vertex(b);
960    }
961
962    /* finish line and emit last end-cap */
963    for (int i = 4; i < 8; ++i) {
964       nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
965          gl_varying_slot location = var->data.location;
966          unsigned location_frac = var->data.location_frac;
967          if (state->varyings[location][location_frac])
968             nir_copy_var(b, var, state->varyings[location][location_frac]);
969       }
970       nir_store_var(b, state->pos_out,
971                     nir_fadd(b, curr, nir_fmul(b, line_offets[i],
972                              nir_channel(b, curr, 3))), 0xf);
973       nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
974       nir_emit_vertex(b);
975    }
976    nir_end_primitive(b);
977
978    nir_pop_if(b, NULL);
979
980    nir_copy_var(b, state->prev_pos, state->pos_out);
981    nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
982       gl_varying_slot location = var->data.location;
983       unsigned location_frac = var->data.location_frac;
984       if (state->varyings[location][location_frac])
985          nir_copy_var(b, state->prev_varyings[location][location_frac], state->varyings[location][location_frac]);
986    }
987
988    // update prev_pos and pos_counter for next vertex
989    b->cursor = nir_after_instr(&intrin->instr);
990    nir_store_var(b, state->pos_counter,
991                     nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
992                                     1), 1);
993
994    nir_instr_remove(&intrin->instr);
995    return true;
996 }
997
998 static bool
999 lower_line_smooth_gs_end_primitive(nir_builder *b,
1000                                    nir_intrinsic_instr *intrin,
1001                                    struct lower_line_smooth_state *state)
1002 {
1003    b->cursor = nir_before_instr(&intrin->instr);
1004
1005    // reset line counter
1006    nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
1007
1008    nir_instr_remove(&intrin->instr);
1009    return true;
1010 }
1011
1012 static bool
1013 lower_line_smooth_gs_instr(nir_builder *b, nir_instr *instr, void *data)
1014 {
1015    if (instr->type != nir_instr_type_intrinsic)
1016       return false;
1017
1018    struct lower_line_smooth_state *state = data;
1019    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1020
1021    switch (intrin->intrinsic) {
1022    case nir_intrinsic_store_deref:
1023       return lower_line_smooth_gs_store(b, intrin, state);
1024    case nir_intrinsic_copy_deref:
1025       unreachable("should be lowered");
1026    case nir_intrinsic_emit_vertex_with_counter:
1027    case nir_intrinsic_emit_vertex:
1028       return lower_line_smooth_gs_emit_vertex(b, intrin, state);
1029    case nir_intrinsic_end_primitive:
1030    case nir_intrinsic_end_primitive_with_counter:
1031       return lower_line_smooth_gs_end_primitive(b, intrin, state);
1032    default:
1033       return false;
1034    }
1035 }
1036
1037 static bool
1038 lower_line_smooth_gs(nir_shader *shader)
1039 {
1040    nir_builder b;
1041    struct lower_line_smooth_state state;
1042
1043    memset(state.varyings, 0, sizeof(state.varyings));
1044    memset(state.prev_varyings, 0, sizeof(state.prev_varyings));
1045    nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
1046       gl_varying_slot location = var->data.location;
1047       unsigned location_frac = var->data.location_frac;
1048       if (location == VARYING_SLOT_POS)
1049          continue;
1050
1051       char name[100];
1052       snprintf(name, sizeof(name), "__tmp_%d_%d", location, location_frac);
1053       state.varyings[location][location_frac] =
1054          nir_variable_create(shader, nir_var_shader_temp,
1055                               var->type, name);
1056
1057       snprintf(name, sizeof(name), "__tmp_prev_%d_%d", location, location_frac);
1058       state.prev_varyings[location][location_frac] =
1059          nir_variable_create(shader, nir_var_shader_temp,
1060                               var->type, name);
1061    }
1062
1063    state.pos_out =
1064       nir_find_variable_with_location(shader, nir_var_shader_out,
1065                                       VARYING_SLOT_POS);
1066
1067    // if position isn't written, we have nothing to do
1068    if (!state.pos_out)
1069       return false;
1070
1071    state.line_coord_out =
1072       nir_variable_create(shader, nir_var_shader_out, glsl_vec4_type(),
1073                           "__line_coord");
1074    state.line_coord_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
1075    state.line_coord_out->data.driver_location = shader->num_outputs++;
1076    state.line_coord_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
1077    shader->info.outputs_written |= BITFIELD64_BIT(state.line_coord_out->data.location);
1078
1079    // create temp variables
1080    state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
1081                                         glsl_vec4_type(),
1082                                         "__prev_pos");
1083    state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
1084                                            glsl_uint_type(),
1085                                            "__pos_counter");
1086
1087    // initialize pos_counter
1088    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
1089    nir_builder_init(&b, entry);
1090    b.cursor = nir_before_cf_list(&entry->body);
1091    nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
1092
1093    shader->info.gs.vertices_out = 8 * shader->info.gs.vertices_out;
1094    shader->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
1095
1096    return nir_shader_instructions_pass(shader, lower_line_smooth_gs_instr,
1097                                        nir_metadata_dominance, &state);
1098 }
1099
1100 static bool
1101 lower_line_smooth_fs(nir_shader *shader, bool lower_stipple)
1102 {
1103    int dummy;
1104    nir_builder b;
1105
1106    nir_variable *stipple_counter = NULL, *stipple_pattern = NULL;
1107    if (lower_stipple) {
1108       stipple_counter = nir_variable_create(shader, nir_var_shader_in,
1109                                             glsl_float_type(),
1110                                             "__stipple");
1111       stipple_counter->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
1112       stipple_counter->data.driver_location = shader->num_inputs++;
1113       stipple_counter->data.location =
1114          MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
1115       shader->info.inputs_read |= BITFIELD64_BIT(stipple_counter->data.location);
1116
1117       stipple_pattern = nir_variable_create(shader, nir_var_shader_temp,
1118                                             glsl_uint_type(),
1119                                             "stipple_pattern");
1120
1121       // initialize stipple_pattern
1122       nir_function_impl *entry = nir_shader_get_entrypoint(shader);
1123       nir_builder_init(&b, entry);
1124       b.cursor = nir_before_cf_list(&entry->body);
1125       nir_ssa_def *pattern = nir_load_push_constant(&b, 1, 32,
1126                                                    nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN),
1127                                                    .base = 1);
1128       nir_store_var(&b, stipple_pattern, pattern, 1);
1129    }
1130
1131    nir_lower_aaline_fs(shader, &dummy, stipple_counter, stipple_pattern);
1132    return true;
1133 }
1134
1135 static bool
1136 lower_dual_blend(nir_shader *shader)
1137 {
1138    bool progress = false;
1139    nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1);
1140    if (var) {
1141       var->data.location = FRAG_RESULT_DATA0;
1142       var->data.index = 1;
1143       progress = true;
1144    }
1145    nir_shader_preserve_all_metadata(shader);
1146    return progress;
1147 }
1148
1149 static bool
1150 lower_64bit_pack_instr(nir_builder *b, nir_instr *instr, void *data)
1151 {
1152    if (instr->type != nir_instr_type_alu)
1153       return false;
1154    nir_alu_instr *alu_instr = (nir_alu_instr *) instr;
1155    if (alu_instr->op != nir_op_pack_64_2x32 &&
1156        alu_instr->op != nir_op_unpack_64_2x32)
1157       return false;
1158    b->cursor = nir_before_instr(&alu_instr->instr);
1159    nir_ssa_def *src = nir_ssa_for_alu_src(b, alu_instr, 0);
1160    nir_ssa_def *dest;
1161    switch (alu_instr->op) {
1162    case nir_op_pack_64_2x32:
1163       dest = nir_pack_64_2x32_split(b, nir_channel(b, src, 0), nir_channel(b, src, 1));
1164       break;
1165    case nir_op_unpack_64_2x32:
1166       dest = nir_vec2(b, nir_unpack_64_2x32_split_x(b, src), nir_unpack_64_2x32_split_y(b, src));
1167       break;
1168    default:
1169       unreachable("Impossible opcode");
1170    }
1171    nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa, dest);
1172    nir_instr_remove(&alu_instr->instr);
1173    return true;
1174 }
1175
1176 static bool
1177 lower_64bit_pack(nir_shader *shader)
1178 {
1179    return nir_shader_instructions_pass(shader, lower_64bit_pack_instr,
1180                                        nir_metadata_block_index | nir_metadata_dominance, NULL);
1181 }
1182
1183 nir_shader *
1184 zink_create_quads_emulation_gs(const nir_shader_compiler_options *options,
1185                                const nir_shader *prev_stage,
1186                                int last_pv_vert_offset)
1187 {
1188    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY,
1189                                                   options,
1190                                                   "filled quad gs");
1191
1192    nir_shader *nir = b.shader;
1193    nir->info.gs.input_primitive = SHADER_PRIM_LINES_ADJACENCY;
1194    nir->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
1195    nir->info.gs.vertices_in = 4;
1196    nir->info.gs.vertices_out = 6;
1197    nir->info.gs.invocations = 1;
1198    nir->info.gs.active_stream_mask = 1;
1199
1200    nir->info.has_transform_feedback_varyings = prev_stage->info.has_transform_feedback_varyings;
1201    memcpy(nir->info.xfb_stride, prev_stage->info.xfb_stride, sizeof(prev_stage->info.xfb_stride));
1202    if (prev_stage->xfb_info) {
1203       nir->xfb_info = mem_dup(prev_stage->xfb_info, sizeof(nir_xfb_info));
1204    }
1205
1206    nir_variable *in_vars[VARYING_SLOT_MAX];
1207    nir_variable *out_vars[VARYING_SLOT_MAX];
1208    unsigned num_vars = 0;
1209
1210    /* Create input/output variables. */
1211    nir_foreach_shader_out_variable(var, prev_stage) {
1212       assert(!var->data.patch);
1213
1214       char name[100];
1215       if (var->name)
1216          snprintf(name, sizeof(name), "in_%s", var->name);
1217       else
1218          snprintf(name, sizeof(name), "in_%d", var->data.driver_location);
1219
1220       nir_variable *in = nir_variable_clone(var, nir);
1221       ralloc_free(in->name);
1222       in->name = ralloc_strdup(in, name);
1223       in->type = glsl_array_type(var->type, 4, false);
1224       in->data.mode = nir_var_shader_in;
1225       nir_shader_add_variable(nir, in);
1226
1227       if (var->name)
1228          snprintf(name, sizeof(name), "out_%s", var->name);
1229       else
1230          snprintf(name, sizeof(name), "out_%d", var->data.driver_location);
1231
1232       nir_variable *out = nir_variable_clone(var, nir);
1233       ralloc_free(out->name);
1234       out->name = ralloc_strdup(out, name);
1235       out->data.mode = nir_var_shader_out;
1236       nir_shader_add_variable(nir, out);
1237
1238       in_vars[num_vars] = in;
1239       out_vars[num_vars++] = out;
1240    }
1241
1242    int mapping_first[] = {0, 1, 2, 0, 2, 3};
1243    int mapping_last[] = {0, 1, 3, 1, 2, 3};
1244    nir_ssa_def *last_pv_vert_def = nir_load_ubo(&b, 1, 32,
1245                                                 nir_imm_int(&b, 0), nir_imm_int(&b, last_pv_vert_offset),
1246                                                 .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
1247    last_pv_vert_def = nir_ine_imm(&b, last_pv_vert_def, 0);
1248    for (unsigned i = 0; i < 6; ++i) {
1249       /* swap indices 2 and 3 */
1250       nir_ssa_def *idx = nir_bcsel(&b, last_pv_vert_def,
1251                                    nir_imm_int(&b, mapping_last[i]),
1252                                    nir_imm_int(&b, mapping_first[i]));
1253       /* Copy inputs to outputs. */
1254       for (unsigned j = 0; j < num_vars; ++j) {
1255          if (in_vars[j]->data.location == VARYING_SLOT_EDGE) {
1256             continue;
1257          }
1258          nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in_vars[j]), idx);
1259          copy_vars(&b, nir_build_deref_var(&b, out_vars[j]), in_value);
1260       }
1261       nir_emit_vertex(&b, 0);
1262       if (i == 2)
1263         nir_end_primitive(&b, 0);
1264    }
1265
1266    nir_end_primitive(&b, 0);
1267    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1268    nir_validate_shader(nir, "in zink_create_quads_emulation_gs");
1269    return nir;
1270 }
1271
1272 void
1273 zink_screen_init_compiler(struct zink_screen *screen)
1274 {
1275    static const struct nir_shader_compiler_options
1276    default_options = {
1277       .lower_ffma16 = true,
1278       .lower_ffma32 = true,
1279       .lower_ffma64 = true,
1280       .lower_scmp = true,
1281       .lower_fdph = true,
1282       .lower_flrp32 = true,
1283       .lower_fpow = true,
1284       .lower_fsat = true,
1285       .lower_extract_byte = true,
1286       .lower_extract_word = true,
1287       .lower_insert_byte = true,
1288       .lower_insert_word = true,
1289
1290       /* We can only support 32-bit ldexp, but NIR doesn't have a flag
1291        * distinguishing 64-bit ldexp support (radeonsi *does* support 64-bit
1292        * ldexp, so we don't just always lower it in NIR).  Given that ldexp is
1293        * effectively unused (no instances in shader-db), it's not worth the
1294        * effort to do so.
1295        * */
1296       .lower_ldexp = true,
1297
1298       .lower_mul_high = true,
1299       .lower_rotate = true,
1300       .lower_uadd_carry = true,
1301       .lower_usub_borrow = true,
1302       .lower_uadd_sat = true,
1303       .lower_usub_sat = true,
1304       .lower_vector_cmp = true,
1305       .lower_int64_options = 0,
1306       .lower_doubles_options = 0,
1307       .lower_uniforms_to_ubo = true,
1308       .has_fsub = true,
1309       .has_isub = true,
1310       .has_txs = true,
1311       .lower_mul_2x32_64 = true,
1312       .support_16bit_alu = true, /* not quite what it sounds like */
1313       .max_unroll_iterations = 0,
1314    };
1315
1316    screen->nir_options = default_options;
1317
1318    if (!screen->info.feats.features.shaderInt64)
1319       screen->nir_options.lower_int64_options = ~0;
1320
1321    if (!screen->info.feats.features.shaderFloat64) {
1322       screen->nir_options.lower_doubles_options = ~0;
1323       screen->nir_options.lower_flrp64 = true;
1324       screen->nir_options.lower_ffma64 = true;
1325       /* soft fp64 function inlining will blow up loop bodies and effectively
1326        * stop Vulkan drivers from unrolling the loops.
1327        */
1328       screen->nir_options.max_unroll_iterations_fp64 = 32;
1329    }
1330
1331    /*
1332        The OpFRem and OpFMod instructions use cheap approximations of remainder,
1333        and the error can be large due to the discontinuity in trunc() and floor().
1334        This can produce mathematically unexpected results in some cases, such as
1335        FMod(x,x) computing x rather than 0, and can also cause the result to have
1336        a different sign than the infinitely precise result.
1337
1338        -Table 84. Precision of core SPIR-V Instructions
1339        * for drivers that are known to have imprecise fmod for doubles, lower dmod
1340     */
1341    if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV ||
1342        screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
1343        screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY)
1344       screen->nir_options.lower_doubles_options = nir_lower_dmod;
1345 }
1346
1347 const void *
1348 zink_get_compiler_options(struct pipe_screen *pscreen,
1349                           enum pipe_shader_ir ir,
1350                           gl_shader_stage shader)
1351 {
1352    assert(ir == PIPE_SHADER_IR_NIR);
1353    return &zink_screen(pscreen)->nir_options;
1354 }
1355
1356 struct nir_shader *
1357 zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
1358 {
1359    if (zink_debug & ZINK_DEBUG_TGSI) {
1360       fprintf(stderr, "TGSI shader:\n---8<---\n");
1361       tgsi_dump_to_file(tokens, 0, stderr);
1362       fprintf(stderr, "---8<---\n\n");
1363    }
1364
1365    return tgsi_to_nir(tokens, screen, false);
1366 }
1367
1368
1369 static bool
1370 dest_is_64bit(nir_dest *dest, void *state)
1371 {
1372    bool *lower = (bool *)state;
1373    if (dest && (nir_dest_bit_size(*dest) == 64)) {
1374       *lower = true;
1375       return false;
1376    }
1377    return true;
1378 }
1379
1380 static bool
1381 src_is_64bit(nir_src *src, void *state)
1382 {
1383    bool *lower = (bool *)state;
1384    if (src && (nir_src_bit_size(*src) == 64)) {
1385       *lower = true;
1386       return false;
1387    }
1388    return true;
1389 }
1390
1391 static bool
1392 filter_64_bit_instr(const nir_instr *const_instr, UNUSED const void *data)
1393 {
1394    bool lower = false;
1395    /* lower_alu_to_scalar required nir_instr to be const, but nir_foreach_*
1396     * doesn't have const variants, so do the ugly const_cast here. */
1397    nir_instr *instr = (nir_instr *)const_instr;
1398
1399    nir_foreach_dest(instr, dest_is_64bit, &lower);
1400    if (lower)
1401       return true;
1402    nir_foreach_src(instr, src_is_64bit, &lower);
1403    return lower;
1404 }
1405
1406 static bool
1407 filter_pack_instr(const nir_instr *const_instr, UNUSED const void *data)
1408 {
1409    nir_instr *instr = (nir_instr *)const_instr;
1410    nir_alu_instr *alu = nir_instr_as_alu(instr);
1411    switch (alu->op) {
1412    case nir_op_pack_64_2x32_split:
1413    case nir_op_pack_32_2x16_split:
1414    case nir_op_unpack_32_2x16_split_x:
1415    case nir_op_unpack_32_2x16_split_y:
1416    case nir_op_unpack_64_2x32_split_x:
1417    case nir_op_unpack_64_2x32_split_y:
1418       return true;
1419    default:
1420       break;
1421    }
1422    return false;
1423 }
1424
1425
1426 struct bo_vars {
1427    nir_variable *uniforms[5];
1428    nir_variable *ubo[5];
1429    nir_variable *ssbo[5];
1430    uint32_t first_ubo;
1431    uint32_t first_ssbo;
1432 };
1433
1434 static struct bo_vars
1435 get_bo_vars(struct zink_shader *zs, nir_shader *shader)
1436 {
1437    struct bo_vars bo;
1438    memset(&bo, 0, sizeof(bo));
1439    if (zs->ubos_used)
1440       bo.first_ubo = ffs(zs->ubos_used & ~BITFIELD_BIT(0)) - 2;
1441    assert(bo.first_ssbo < PIPE_MAX_CONSTANT_BUFFERS);
1442    if (zs->ssbos_used)
1443       bo.first_ssbo = ffs(zs->ssbos_used) - 1;
1444    assert(bo.first_ssbo < PIPE_MAX_SHADER_BUFFERS);
1445    nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
1446       unsigned idx = glsl_get_explicit_stride(glsl_get_struct_field(glsl_without_array(var->type), 0)) >> 1;
1447       if (var->data.mode == nir_var_mem_ssbo) {
1448          assert(!bo.ssbo[idx]);
1449          bo.ssbo[idx] = var;
1450       } else {
1451          if (var->data.driver_location) {
1452             assert(!bo.ubo[idx]);
1453             bo.ubo[idx] = var;
1454          } else {
1455             assert(!bo.uniforms[idx]);
1456             bo.uniforms[idx] = var;
1457          }
1458       }
1459    }
1460    return bo;
1461 }
1462
1463 static bool
1464 bound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
1465 {
1466    struct bo_vars *bo = data;
1467    if (instr->type != nir_instr_type_intrinsic)
1468       return false;
1469    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1470    nir_variable *var = NULL;
1471    nir_ssa_def *offset = NULL;
1472    bool is_load = true;
1473    b->cursor = nir_before_instr(instr);
1474
1475    switch (intr->intrinsic) {
1476    case nir_intrinsic_store_ssbo:
1477       var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
1478       offset = intr->src[2].ssa;
1479       is_load = false;
1480       break;
1481    case nir_intrinsic_load_ssbo:
1482       var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
1483       offset = intr->src[1].ssa;
1484       break;
1485    case nir_intrinsic_load_ubo:
1486       if (nir_src_is_const(intr->src[0]) && nir_src_as_const_value(intr->src[0])->u32 == 0)
1487          var = bo->uniforms[nir_dest_bit_size(intr->dest) >> 4];
1488       else
1489          var = bo->ubo[nir_dest_bit_size(intr->dest) >> 4];
1490       offset = intr->src[1].ssa;
1491       break;
1492    default:
1493       return false;
1494    }
1495    nir_src offset_src = nir_src_for_ssa(offset);
1496    if (!nir_src_is_const(offset_src))
1497       return false;
1498
1499    unsigned offset_bytes = nir_src_as_const_value(offset_src)->u32;
1500    const struct glsl_type *strct_type = glsl_get_array_element(var->type);
1501    unsigned size = glsl_array_size(glsl_get_struct_field(strct_type, 0));
1502    bool has_unsized = glsl_array_size(glsl_get_struct_field(strct_type, glsl_get_length(strct_type) - 1)) == 0;
1503    if (has_unsized || offset_bytes + intr->num_components - 1 < size)
1504       return false;
1505
1506    unsigned rewrites = 0;
1507    nir_ssa_def *result[2];
1508    for (unsigned i = 0; i < intr->num_components; i++) {
1509       if (offset_bytes + i >= size) {
1510          rewrites++;
1511          if (is_load)
1512             result[i] = nir_imm_zero(b, 1, nir_dest_bit_size(intr->dest));
1513       }
1514    }
1515    assert(rewrites == intr->num_components);
1516    if (is_load) {
1517       nir_ssa_def *load = nir_vec(b, result, intr->num_components);
1518       nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1519    }
1520    nir_instr_remove(instr);
1521    return true;
1522 }
1523
1524 static bool
1525 bound_bo_access(nir_shader *shader, struct zink_shader *zs)
1526 {
1527    struct bo_vars bo = get_bo_vars(zs, shader);
1528    return nir_shader_instructions_pass(shader, bound_bo_access_instr, nir_metadata_dominance, &bo);
1529 }
1530
1531 static void
1532 optimize_nir(struct nir_shader *s, struct zink_shader *zs)
1533 {
1534    bool progress;
1535    do {
1536       progress = false;
1537       if (s->options->lower_int64_options)
1538          NIR_PASS_V(s, nir_lower_int64);
1539       if (s->options->lower_doubles_options & nir_lower_fp64_full_software)
1540          NIR_PASS_V(s, lower_64bit_pack);
1541       NIR_PASS_V(s, nir_lower_vars_to_ssa);
1542       NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_pack_instr, NULL);
1543       NIR_PASS(progress, s, nir_opt_copy_prop_vars);
1544       NIR_PASS(progress, s, nir_copy_prop);
1545       NIR_PASS(progress, s, nir_opt_remove_phis);
1546       if (s->options->lower_int64_options) {
1547          NIR_PASS(progress, s, nir_lower_64bit_phis);
1548          NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_64_bit_instr, NULL);
1549       }
1550       NIR_PASS(progress, s, nir_opt_dce);
1551       NIR_PASS(progress, s, nir_opt_dead_cf);
1552       NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
1553       NIR_PASS(progress, s, nir_opt_cse);
1554       NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
1555       NIR_PASS(progress, s, nir_opt_algebraic);
1556       NIR_PASS(progress, s, nir_opt_constant_folding);
1557       NIR_PASS(progress, s, nir_opt_undef);
1558       NIR_PASS(progress, s, zink_nir_lower_b2b);
1559       if (zs)
1560          NIR_PASS(progress, s, bound_bo_access, zs);
1561    } while (progress);
1562
1563    do {
1564       progress = false;
1565       NIR_PASS(progress, s, nir_opt_algebraic_late);
1566       if (progress) {
1567          NIR_PASS_V(s, nir_copy_prop);
1568          NIR_PASS_V(s, nir_opt_dce);
1569          NIR_PASS_V(s, nir_opt_cse);
1570       }
1571    } while (progress);
1572 }
1573
1574 /* - copy the lowered fbfetch variable
1575  * - set the new one up as an input attachment for descriptor 0.6
1576  * - load it as an image
1577  * - overwrite the previous load
1578  */
1579 static bool
1580 lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
1581 {
1582    bool ms = data != NULL;
1583    if (instr->type != nir_instr_type_intrinsic)
1584       return false;
1585    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1586    if (intr->intrinsic != nir_intrinsic_load_deref)
1587       return false;
1588    nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
1589    if (!var->data.fb_fetch_output)
1590       return false;
1591    b->cursor = nir_after_instr(instr);
1592    nir_variable *fbfetch = nir_variable_clone(var, b->shader);
1593    /* If Dim is SubpassData, ... Image Format must be Unknown
1594     * - SPIRV OpTypeImage specification
1595     */
1596    fbfetch->data.image.format = 0;
1597    fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */
1598    fbfetch->data.mode = nir_var_uniform;
1599    fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1600    fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1601    fbfetch->data.sample = ms;
1602    enum glsl_sampler_dim dim = ms ? GLSL_SAMPLER_DIM_SUBPASS_MS : GLSL_SAMPLER_DIM_SUBPASS;
1603    fbfetch->type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
1604    nir_shader_add_variable(b->shader, fbfetch);
1605    nir_ssa_def *deref = &nir_build_deref_var(b, fbfetch)->dest.ssa;
1606    nir_ssa_def *sample = ms ? nir_load_sample_id(b) : nir_ssa_undef(b, 1, 32);
1607    nir_ssa_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0));
1608    nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1609    return true;
1610 }
1611
1612 static bool
1613 lower_fbfetch(nir_shader *shader, nir_variable **fbfetch, bool ms)
1614 {
1615    nir_foreach_shader_out_variable(var, shader) {
1616       if (var->data.fb_fetch_output) {
1617          *fbfetch = var;
1618          break;
1619       }
1620    }
1621    assert(*fbfetch);
1622    if (!*fbfetch)
1623       return false;
1624    return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, (void*)ms);
1625 }
1626
1627 /*
1628  * Add a check for out of bounds LOD for every texel fetch op
1629  * It boils down to:
1630  * - if (lod < query_levels(tex))
1631  * -    res = txf(tex)
1632  * - else
1633  * -    res = (0, 0, 0, 1)
1634  */
1635 static bool
1636 lower_txf_lod_robustness_instr(nir_builder *b, nir_instr *in, void *data)
1637 {
1638    if (in->type != nir_instr_type_tex)
1639       return false;
1640    nir_tex_instr *txf = nir_instr_as_tex(in);
1641    if (txf->op != nir_texop_txf)
1642       return false;
1643
1644    b->cursor = nir_before_instr(in);
1645    int lod_idx = nir_tex_instr_src_index(txf, nir_tex_src_lod);
1646    assert(lod_idx >= 0);
1647    nir_src lod_src = txf->src[lod_idx].src;
1648    if (nir_src_is_const(lod_src) && nir_src_as_const_value(lod_src)->u32 == 0)
1649       return false;
1650
1651    assert(lod_src.is_ssa);
1652    nir_ssa_def *lod = lod_src.ssa;
1653
1654    int offset_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_offset);
1655    int handle_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_handle);
1656    nir_tex_instr *levels = nir_tex_instr_create(b->shader,
1657                                                 !!(offset_idx >= 0) + !!(handle_idx >= 0));
1658    levels->op = nir_texop_query_levels;
1659    levels->texture_index = txf->texture_index;
1660    levels->dest_type = nir_type_int | lod->bit_size;
1661    if (offset_idx >= 0) {
1662       levels->src[0].src_type = nir_tex_src_texture_offset;
1663       nir_src_copy(&levels->src[0].src, &txf->src[offset_idx].src, &levels->instr);
1664    }
1665    if (handle_idx >= 0) {
1666       levels->src[!!(offset_idx >= 0)].src_type = nir_tex_src_texture_handle;
1667       nir_src_copy(&levels->src[!!(offset_idx >= 0)].src, &txf->src[handle_idx].src, &levels->instr);
1668    }
1669    nir_ssa_dest_init(&levels->instr, &levels->dest,
1670                      nir_tex_instr_dest_size(levels), 32, NULL);
1671    nir_builder_instr_insert(b, &levels->instr);
1672
1673    nir_if *lod_oob_if = nir_push_if(b, nir_ilt(b, lod, &levels->dest.ssa));
1674    nir_tex_instr *new_txf = nir_instr_as_tex(nir_instr_clone(b->shader, in));
1675    nir_builder_instr_insert(b, &new_txf->instr);
1676
1677    nir_if *lod_oob_else = nir_push_else(b, lod_oob_if);
1678    nir_const_value oob_values[4] = {0};
1679    unsigned bit_size = nir_alu_type_get_type_size(txf->dest_type);
1680    oob_values[3] = (txf->dest_type & nir_type_float) ?
1681                    nir_const_value_for_float(1.0, bit_size) : nir_const_value_for_uint(1, bit_size);
1682    nir_ssa_def *oob_val = nir_build_imm(b, nir_tex_instr_dest_size(txf), bit_size, oob_values);
1683
1684    nir_pop_if(b, lod_oob_else);
1685    nir_ssa_def *robust_txf = nir_if_phi(b, &new_txf->dest.ssa, oob_val);
1686
1687    nir_ssa_def_rewrite_uses(&txf->dest.ssa, robust_txf);
1688    nir_instr_remove_v(in);
1689    return true;
1690 }
1691
1692 /* This pass is used to workaround the lack of out of bounds LOD robustness
1693  * for texel fetch ops in VK_EXT_image_robustness.
1694  */
1695 static bool
1696 lower_txf_lod_robustness(nir_shader *shader)
1697 {
1698    return nir_shader_instructions_pass(shader, lower_txf_lod_robustness_instr, nir_metadata_none, NULL);
1699 }
1700
1701 /* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
1702 static bool
1703 check_psiz(struct nir_shader *s)
1704 {
1705    bool have_psiz = false;
1706    nir_foreach_shader_out_variable(var, s) {
1707       if (var->data.location == VARYING_SLOT_PSIZ) {
1708          /* genuine PSIZ outputs will have this set */
1709          have_psiz |= !!var->data.explicit_location;
1710       }
1711    }
1712    return have_psiz;
1713 }
1714
1715 static nir_variable *
1716 find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz)
1717 {
1718    assert((int)location >= 0);
1719
1720    unsigned found = 0;
1721    if (!location_frac && location != VARYING_SLOT_PSIZ) {
1722       nir_foreach_shader_out_variable(var, nir) {
1723          if (var->data.location == location)
1724             found++;
1725       }
1726    }
1727    if (found) {
1728       /* multiple variables found for this location: find the biggest one */
1729       nir_variable *out = NULL;
1730       unsigned slots = 0;
1731       nir_foreach_shader_out_variable(var, nir) {
1732          if (var->data.location == location) {
1733             unsigned count_slots = glsl_count_vec4_slots(var->type, false, false);
1734             if (count_slots > slots) {
1735                slots = count_slots;
1736                out = var;
1737             }
1738          }
1739       }
1740       return out;
1741    } else {
1742       /* only one variable found or this is location_frac */
1743       nir_foreach_shader_out_variable(var, nir) {
1744          if (var->data.location == location &&
1745              (var->data.location_frac == location_frac ||
1746               (glsl_type_is_array(var->type) ? glsl_array_size(var->type) : glsl_get_vector_elements(var->type)) >= location_frac + 1)) {
1747             if (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location)
1748                return var;
1749          }
1750       }
1751    }
1752    return NULL;
1753 }
1754
1755 static bool
1756 is_inlined(const bool *inlined, const struct pipe_stream_output *output)
1757 {
1758    for (unsigned i = 0; i < output->num_components; i++)
1759       if (!inlined[output->start_component + i])
1760          return false;
1761    return true;
1762 }
1763
1764 static void
1765 update_psiz_location(nir_shader *nir, nir_variable *psiz)
1766 {
1767    uint32_t last_output = util_last_bit64(nir->info.outputs_written);
1768    if (last_output < VARYING_SLOT_VAR0)
1769       last_output = VARYING_SLOT_VAR0;
1770    else
1771       last_output++;
1772    /* this should get fixed up by slot remapping */
1773    psiz->data.location = last_output;
1774 }
1775
1776 static const struct glsl_type *
1777 clamp_slot_type(const struct glsl_type *type, unsigned slot)
1778 {
1779    /* could be dvec/dmat/mat: each member is the same */
1780    const struct glsl_type *plain = glsl_without_array_or_matrix(type);
1781    /* determine size of each member type */
1782    unsigned slot_count = glsl_count_vec4_slots(plain, false, false);
1783    /* normalize slot idx to current type's size */
1784    slot %= slot_count;
1785    unsigned slot_components = glsl_get_components(plain);
1786    if (glsl_base_type_is_64bit(glsl_get_base_type(plain)))
1787       slot_components *= 2;
1788    /* create a vec4 mask of the selected slot's components out of all the components */
1789    uint32_t mask = BITFIELD_MASK(slot_components) & BITFIELD_RANGE(slot * 4, 4);
1790    /* return a vecN of the selected components */
1791    slot_components = util_bitcount(mask);
1792    return glsl_vec_type(slot_components);
1793 }
1794
1795 static const struct glsl_type *
1796 unroll_struct_type(const struct glsl_type *slot_type, unsigned *slot_idx)
1797 {
1798    const struct glsl_type *type = slot_type;
1799    unsigned slot_count = 0;
1800    unsigned cur_slot = 0;
1801    /* iterate over all the members in the struct, stopping once the slot idx is reached */
1802    for (unsigned i = 0; i < glsl_get_length(slot_type) && cur_slot <= *slot_idx; i++, cur_slot += slot_count) {
1803       /* use array type for slot counting but return array member type for unroll */
1804       const struct glsl_type *arraytype = glsl_get_struct_field(slot_type, i);
1805       type = glsl_without_array(arraytype);
1806       slot_count = glsl_count_vec4_slots(arraytype, false, false);
1807    }
1808    *slot_idx -= (cur_slot - slot_count);
1809    if (!glsl_type_is_struct_or_ifc(type))
1810       /* this is a fully unrolled struct: find the number of vec components to output */
1811       type = clamp_slot_type(type, *slot_idx);
1812    return type;
1813 }
1814
1815 static unsigned
1816 get_slot_components(nir_variable *var, unsigned slot, unsigned so_slot)
1817 {
1818    assert(var && slot < var->data.location + glsl_count_vec4_slots(var->type, false, false));
1819    const struct glsl_type *orig_type = var->type;
1820    const struct glsl_type *type = glsl_without_array(var->type);
1821    unsigned slot_idx = slot - so_slot;
1822    if (type != orig_type)
1823       slot_idx %= glsl_count_vec4_slots(type, false, false);
1824    /* need to find the vec4 that's being exported by this slot */
1825    while (glsl_type_is_struct_or_ifc(type))
1826       type = unroll_struct_type(type, &slot_idx);
1827
1828    /* arrays here are already fully unrolled from their structs, so slot handling is implicit */
1829    unsigned num_components = glsl_get_components(glsl_without_array(type));
1830    /* special handling: clip/cull distance are arrays with vector semantics */
1831    if (var->data.location == VARYING_SLOT_CLIP_DIST0 || var->data.location == VARYING_SLOT_CULL_DIST0) {
1832       num_components = glsl_array_size(type);
1833       if (slot_idx)
1834          /* this is the second vec4 */
1835          num_components %= 4;
1836       else
1837          /* this is the first vec4 */
1838          num_components = MIN2(num_components, 4);
1839    }
1840    assert(num_components);
1841    /* gallium handles xfb in terms of 32bit units */
1842    if (glsl_base_type_is_64bit(glsl_get_base_type(glsl_without_array(type))))
1843       num_components *= 2;
1844    return num_components;
1845 }
1846
1847 static const struct pipe_stream_output *
1848 find_packed_output(const struct pipe_stream_output_info *so_info, uint8_t *reverse_map, unsigned slot)
1849 {
1850    for (unsigned i = 0; i < so_info->num_outputs; i++) {
1851       const struct pipe_stream_output *packed_output = &so_info->output[i];
1852       if (reverse_map[packed_output->register_index] == slot)
1853          return packed_output;
1854    }
1855    return NULL;
1856 }
1857
1858 static void
1859 update_so_info(struct zink_shader *zs, nir_shader *nir, const struct pipe_stream_output_info *so_info,
1860                uint64_t outputs_written, bool have_psiz)
1861 {
1862    uint8_t reverse_map[VARYING_SLOT_MAX] = {0};
1863    unsigned slot = 0;
1864    /* semi-copied from iris */
1865    while (outputs_written) {
1866       int bit = u_bit_scan64(&outputs_written);
1867       /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */
1868       if (bit == VARYING_SLOT_PSIZ && !have_psiz)
1869          continue;
1870       reverse_map[slot++] = bit;
1871    }
1872
1873    bool have_fake_psiz = false;
1874    nir_foreach_shader_out_variable(var, nir) {
1875       if (var->data.location == VARYING_SLOT_PSIZ && !var->data.explicit_location)
1876          have_fake_psiz = true;
1877    }
1878
1879    bool inlined[VARYING_SLOT_MAX][4] = {0};
1880    uint64_t packed = 0;
1881    uint8_t packed_components[VARYING_SLOT_MAX] = {0};
1882    uint8_t packed_streams[VARYING_SLOT_MAX] = {0};
1883    uint8_t packed_buffers[VARYING_SLOT_MAX] = {0};
1884    uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0};
1885    nir_variable *psiz = NULL;
1886    for (unsigned i = 0; i < so_info->num_outputs; i++) {
1887       const struct pipe_stream_output *output = &so_info->output[i];
1888       unsigned slot = reverse_map[output->register_index];
1889       /* always set stride to be used during draw */
1890       zs->sinfo.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer];
1891       if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
1892          nir_variable *var = NULL;
1893          unsigned so_slot;
1894          while (!var)
1895             var = find_var_with_location_frac(nir, slot--, output->start_component, have_psiz);
1896          if (var->data.location == VARYING_SLOT_PSIZ)
1897             psiz = var;
1898          so_slot = slot + 1;
1899          slot = reverse_map[output->register_index];
1900          if (var->data.explicit_xfb_buffer) {
1901             /* handle dvec3 where gallium splits streamout over 2 registers */
1902             for (unsigned j = 0; j < output->num_components; j++)
1903                inlined[slot][output->start_component + j] = true;
1904          }
1905          if (is_inlined(inlined[slot], output))
1906             continue;
1907          bool is_struct = glsl_type_is_struct_or_ifc(glsl_without_array(var->type));
1908          unsigned num_components = get_slot_components(var, slot, so_slot);
1909          /* if this is the entire variable, try to blast it out during the initial declaration
1910           * structs must be handled later to ensure accurate analysis
1911           */
1912          if (!is_struct && (num_components == output->num_components || (num_components > output->num_components && output->num_components == 4))) {
1913             var->data.explicit_xfb_buffer = 1;
1914             var->data.xfb.buffer = output->output_buffer;
1915             var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
1916             var->data.offset = output->dst_offset * 4;
1917             var->data.stream = output->stream;
1918             for (unsigned j = 0; j < output->num_components; j++)
1919                inlined[slot][output->start_component + j] = true;
1920          } else {
1921             /* otherwise store some metadata for later */
1922             packed |= BITFIELD64_BIT(slot);
1923             packed_components[slot] += output->num_components;
1924             packed_streams[slot] |= BITFIELD_BIT(output->stream);
1925             packed_buffers[slot] |= BITFIELD_BIT(output->output_buffer);
1926             for (unsigned j = 0; j < output->num_components; j++)
1927                packed_offsets[output->register_index][j + output->start_component] = output->dst_offset + j;
1928          }
1929       }
1930    }
1931
1932    /* if this was flagged as a packed output before, and if all the components are
1933     * being output with the same stream on the same buffer with increasing offsets, this entire variable
1934     * can be consolidated into a single output to conserve locations
1935     */
1936    for (unsigned i = 0; i < so_info->num_outputs; i++) {
1937       const struct pipe_stream_output *output = &so_info->output[i];
1938       unsigned slot = reverse_map[output->register_index];
1939       if (is_inlined(inlined[slot], output))
1940          continue;
1941       if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
1942          nir_variable *var = NULL;
1943          while (!var)
1944             var = find_var_with_location_frac(nir, slot--, output->start_component, have_psiz);
1945          /* this is a lowered 64bit variable that can't be exported due to packing */
1946          if (var->data.is_xfb)
1947             goto out;
1948
1949          unsigned num_slots = glsl_count_vec4_slots(var->type, false, false);
1950          /* for each variable, iterate over all the variable's slots and inline the outputs */
1951          for (unsigned j = 0; j < num_slots; j++) {
1952             slot = var->data.location + j;
1953             const struct pipe_stream_output *packed_output = find_packed_output(so_info, reverse_map, slot);
1954             if (!packed_output)
1955                goto out;
1956
1957             /* if this slot wasn't packed or isn't in the same stream/buffer, skip consolidation */
1958             if (!(packed & BITFIELD64_BIT(slot)) ||
1959                 util_bitcount(packed_streams[slot]) != 1 ||
1960                 util_bitcount(packed_buffers[slot]) != 1)
1961                goto out;
1962
1963             /* if all the components the variable exports to this slot aren't captured, skip consolidation */
1964             unsigned num_components = get_slot_components(var, slot, var->data.location);
1965             if (num_components != packed_components[slot])
1966                goto out;
1967
1968             /* in order to pack the xfb output, all the offsets must be sequentially incrementing */
1969             uint32_t prev_offset = packed_offsets[packed_output->register_index][0];
1970             for (unsigned k = 1; k < num_components; k++) {
1971                /* if the offsets are not incrementing as expected, skip consolidation */
1972                if (packed_offsets[packed_output->register_index][k] != prev_offset + 1)
1973                   goto out;
1974                prev_offset = packed_offsets[packed_output->register_index][k + packed_output->start_component];
1975             }
1976          }
1977          /* this output can be consolidated: blast out all the data inlined */
1978          var->data.explicit_xfb_buffer = 1;
1979          var->data.xfb.buffer = output->output_buffer;
1980          var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
1981          var->data.offset = output->dst_offset * 4;
1982          var->data.stream = output->stream;
1983          /* GLSL specifies that interface blocks are split per-buffer in XFB */
1984          if (glsl_type_is_array(var->type) && glsl_array_size(var->type) > 1 && glsl_type_is_interface(glsl_without_array(var->type)))
1985             zs->sinfo.so_propagate |= BITFIELD_BIT(var->data.location - VARYING_SLOT_VAR0);
1986          /* mark all slot components inlined to skip subsequent loop iterations */
1987          for (unsigned j = 0; j < num_slots; j++) {
1988             slot = var->data.location + j;
1989             for (unsigned k = 0; k < packed_components[slot]; k++)
1990                inlined[slot][k] = true;
1991             packed &= ~BITFIELD64_BIT(slot);
1992          }
1993          continue;
1994       }
1995 out:
1996       /* these are packed/explicit varyings which can't be exported with normal output */
1997       zs->sinfo.so_info.output[zs->sinfo.so_info.num_outputs] = *output;
1998       /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
1999       zs->sinfo.so_info_slots[zs->sinfo.so_info.num_outputs++] = reverse_map[output->register_index];
2000    }
2001    zs->sinfo.have_xfb = zs->sinfo.so_info.num_outputs || zs->sinfo.so_propagate;
2002    /* ensure this doesn't get output in the shader by unsetting location */
2003    if (have_fake_psiz && psiz)
2004       update_psiz_location(nir, psiz);
2005 }
2006
2007 struct decompose_state {
2008   nir_variable **split;
2009   bool needs_w;
2010 };
2011
2012 static bool
2013 lower_attrib(nir_builder *b, nir_instr *instr, void *data)
2014 {
2015    struct decompose_state *state = data;
2016    nir_variable **split = state->split;
2017    if (instr->type != nir_instr_type_intrinsic)
2018       return false;
2019    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2020    if (intr->intrinsic != nir_intrinsic_load_deref)
2021       return false;
2022    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2023    nir_variable *var = nir_deref_instr_get_variable(deref);
2024    if (var != split[0])
2025       return false;
2026    unsigned num_components = glsl_get_vector_elements(split[0]->type);
2027    b->cursor = nir_after_instr(instr);
2028    nir_ssa_def *loads[4];
2029    for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
2030       loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
2031    if (state->needs_w) {
2032       /* oob load w comopnent to get correct value for int/float */
2033       loads[3] = nir_channel(b, loads[0], 3);
2034       loads[0] = nir_channel(b, loads[0], 0);
2035    }
2036    nir_ssa_def *new_load = nir_vec(b, loads, num_components);
2037    nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load);
2038    nir_instr_remove_v(instr);
2039    return true;
2040 }
2041
2042 static bool
2043 decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w)
2044 {
2045    uint32_t bits = 0;
2046    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in)
2047       bits |= BITFIELD_BIT(var->data.driver_location);
2048    bits = ~bits;
2049    u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) {
2050       nir_variable *split[5];
2051       struct decompose_state state;
2052       state.split = split;
2053       nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location);
2054       assert(var);
2055       split[0] = var;
2056       bits |= BITFIELD_BIT(var->data.driver_location);
2057       const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type);
2058       unsigned num_components = glsl_get_vector_elements(var->type);
2059       state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4;
2060       for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) {
2061          split[i+1] = nir_variable_clone(var, nir);
2062          split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i);
2063          if (decomposed_attrs_without_w & BITFIELD_BIT(location))
2064             split[i+1]->type = !i && num_components == 4 ? var->type : new_type;
2065          else
2066             split[i+1]->type = new_type;
2067          split[i+1]->data.driver_location = ffs(bits) - 1;
2068          bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location);
2069          nir_shader_add_variable(nir, split[i+1]);
2070       }
2071       var->data.mode = nir_var_shader_temp;
2072       nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state);
2073    }
2074    nir_fixup_deref_modes(nir);
2075    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2076    optimize_nir(nir, NULL);
2077    return true;
2078 }
2079
2080 static bool
2081 rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2082 {
2083    struct zink_screen *screen = data;
2084    const bool has_int64 = screen->info.feats.features.shaderInt64;
2085    if (instr->type != nir_instr_type_intrinsic)
2086       return false;
2087    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2088    b->cursor = nir_before_instr(instr);
2089    switch (intr->intrinsic) {
2090    case nir_intrinsic_ssbo_atomic_fadd:
2091    case nir_intrinsic_ssbo_atomic_add:
2092    case nir_intrinsic_ssbo_atomic_umin:
2093    case nir_intrinsic_ssbo_atomic_imin:
2094    case nir_intrinsic_ssbo_atomic_umax:
2095    case nir_intrinsic_ssbo_atomic_imax:
2096    case nir_intrinsic_ssbo_atomic_and:
2097    case nir_intrinsic_ssbo_atomic_or:
2098    case nir_intrinsic_ssbo_atomic_xor:
2099    case nir_intrinsic_ssbo_atomic_exchange:
2100    case nir_intrinsic_ssbo_atomic_comp_swap: {
2101       /* convert offset to uintN_t[idx] */
2102       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, nir_dest_bit_size(intr->dest) / 8);
2103       nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2104       return true;
2105    }
2106    case nir_intrinsic_load_ssbo:
2107    case nir_intrinsic_load_ubo: {
2108       /* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */
2109       bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo &&
2110                         nir_src_is_const(intr->src[0]) &&
2111                         nir_src_as_uint(intr->src[0]) == 0 &&
2112                         nir_dest_bit_size(intr->dest) == 64 &&
2113                         nir_intrinsic_align_offset(intr) % 8 != 0;
2114       force_2x32 |= nir_dest_bit_size(intr->dest) == 64 && !has_int64;
2115       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
2116       nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2117       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2118       if (force_2x32) {
2119          /* this is always scalarized */
2120          assert(intr->dest.ssa.num_components == 1);
2121          /* rewrite as 2x32 */
2122          nir_ssa_def *load[2];
2123          for (unsigned i = 0; i < 2; i++) {
2124             if (intr->intrinsic == nir_intrinsic_load_ssbo)
2125                load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2126             else
2127                load[i] = nir_load_ubo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0, .range = 4);
2128             nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr));
2129          }
2130          /* cast back to 64bit */
2131          nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2132          nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
2133          nir_instr_remove(instr);
2134       }
2135       return true;
2136    }
2137    case nir_intrinsic_load_shared:
2138       b->cursor = nir_before_instr(instr);
2139       bool force_2x32 = nir_dest_bit_size(intr->dest) == 64 && !has_int64;
2140       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
2141       nir_instr_rewrite_src_ssa(instr, &intr->src[0], offset);
2142       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2143       if (force_2x32) {
2144          /* this is always scalarized */
2145          assert(intr->dest.ssa.num_components == 1);
2146          /* rewrite as 2x32 */
2147          nir_ssa_def *load[2];
2148          for (unsigned i = 0; i < 2; i++)
2149             load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0);
2150          /* cast back to 64bit */
2151          nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2152          nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
2153          nir_instr_remove(instr);
2154          return true;
2155       }
2156       break;
2157    case nir_intrinsic_store_ssbo: {
2158       b->cursor = nir_before_instr(instr);
2159       bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2160       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2161       nir_instr_rewrite_src_ssa(instr, &intr->src[2], offset);
2162       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2163       if (force_2x32) {
2164          /* this is always scalarized */
2165          assert(intr->src[0].ssa->num_components == 1);
2166          nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2167          for (unsigned i = 0; i < 2; i++)
2168             nir_store_ssbo(b, vals[i], intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0);
2169          nir_instr_remove(instr);
2170       }
2171       return true;
2172    }
2173    case nir_intrinsic_store_shared: {
2174       b->cursor = nir_before_instr(instr);
2175       bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2176       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2177       nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2178       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2179       if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) {
2180          /* this is always scalarized */
2181          assert(intr->src[0].ssa->num_components == 1);
2182          nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2183          for (unsigned i = 0; i < 2; i++)
2184             nir_store_shared(b, vals[i], nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2185          nir_instr_remove(instr);
2186       }
2187       return true;
2188    }
2189    default:
2190       break;
2191    }
2192    return false;
2193 }
2194
2195 static bool
2196 rewrite_bo_access(nir_shader *shader, struct zink_screen *screen)
2197 {
2198    return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen);
2199 }
2200
2201 static nir_variable *
2202 get_bo_var(nir_shader *shader, struct bo_vars *bo, bool ssbo, nir_src *src, unsigned bit_size)
2203 {
2204    nir_variable *var, **ptr;
2205    unsigned idx = ssbo || (nir_src_is_const(*src) && !nir_src_as_uint(*src)) ? 0 : 1;
2206
2207    if (ssbo)
2208       ptr = &bo->ssbo[bit_size >> 4];
2209    else {
2210       if (!idx) {
2211          ptr = &bo->uniforms[bit_size >> 4];
2212       } else
2213          ptr = &bo->ubo[bit_size >> 4];
2214    }
2215    var = *ptr;
2216    if (!var) {
2217       if (ssbo)
2218          var = bo->ssbo[32 >> 4];
2219       else {
2220          if (!idx)
2221             var = bo->uniforms[32 >> 4];
2222          else
2223             var = bo->ubo[32 >> 4];
2224       }
2225       var = nir_variable_clone(var, shader);
2226       if (ssbo)
2227          var->name = ralloc_asprintf(shader, "%s@%u", "ssbos", bit_size);
2228       else
2229          var->name = ralloc_asprintf(shader, "%s@%u", idx ? "ubos" : "uniform_0", bit_size);
2230       *ptr = var;
2231       nir_shader_add_variable(shader, var);
2232
2233       struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
2234       fields[0].name = ralloc_strdup(shader, "base");
2235       fields[1].name = ralloc_strdup(shader, "unsized");
2236       unsigned array_size = glsl_get_length(var->type);
2237       const struct glsl_type *bare_type = glsl_without_array(var->type);
2238       const struct glsl_type *array_type = glsl_get_struct_field(bare_type, 0);
2239       unsigned length = glsl_get_length(array_type);
2240       const struct glsl_type *type;
2241       const struct glsl_type *unsized = glsl_array_type(glsl_uintN_t_type(bit_size), 0, bit_size / 8);
2242       if (bit_size > 32) {
2243          assert(bit_size == 64);
2244          type = glsl_array_type(glsl_uintN_t_type(bit_size), length / 2, bit_size / 8);
2245       } else {
2246          type = glsl_array_type(glsl_uintN_t_type(bit_size), length * (32 / bit_size), bit_size / 8);
2247       }
2248       fields[0].type = type;
2249       fields[1].type = unsized;
2250       var->type = glsl_array_type(glsl_struct_type(fields, glsl_get_length(bare_type), "struct", false), array_size, 0);
2251       var->data.driver_location = idx;
2252    }
2253    return var;
2254 }
2255
2256 static void
2257 rewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo)
2258 {
2259    nir_intrinsic_op op;
2260    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2261    switch (intr->intrinsic) {
2262    case nir_intrinsic_ssbo_atomic_fadd:
2263       op = nir_intrinsic_deref_atomic_fadd;
2264       break;
2265    case nir_intrinsic_ssbo_atomic_fmin:
2266       op = nir_intrinsic_deref_atomic_fmin;
2267       break;
2268    case nir_intrinsic_ssbo_atomic_fmax:
2269       op = nir_intrinsic_deref_atomic_fmax;
2270       break;
2271    case nir_intrinsic_ssbo_atomic_fcomp_swap:
2272       op = nir_intrinsic_deref_atomic_fcomp_swap;
2273       break;
2274    case nir_intrinsic_ssbo_atomic_add:
2275       op = nir_intrinsic_deref_atomic_add;
2276       break;
2277    case nir_intrinsic_ssbo_atomic_umin:
2278       op = nir_intrinsic_deref_atomic_umin;
2279       break;
2280    case nir_intrinsic_ssbo_atomic_imin:
2281       op = nir_intrinsic_deref_atomic_imin;
2282       break;
2283    case nir_intrinsic_ssbo_atomic_umax:
2284       op = nir_intrinsic_deref_atomic_umax;
2285       break;
2286    case nir_intrinsic_ssbo_atomic_imax:
2287       op = nir_intrinsic_deref_atomic_imax;
2288       break;
2289    case nir_intrinsic_ssbo_atomic_and:
2290       op = nir_intrinsic_deref_atomic_and;
2291       break;
2292    case nir_intrinsic_ssbo_atomic_or:
2293       op = nir_intrinsic_deref_atomic_or;
2294       break;
2295    case nir_intrinsic_ssbo_atomic_xor:
2296       op = nir_intrinsic_deref_atomic_xor;
2297       break;
2298    case nir_intrinsic_ssbo_atomic_exchange:
2299       op = nir_intrinsic_deref_atomic_exchange;
2300       break;
2301    case nir_intrinsic_ssbo_atomic_comp_swap:
2302       op = nir_intrinsic_deref_atomic_comp_swap;
2303       break;
2304    default:
2305       unreachable("unknown intrinsic");
2306    }
2307    nir_ssa_def *offset = intr->src[1].ssa;
2308    nir_src *src = &intr->src[0];
2309    nir_variable *var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
2310    nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2311    nir_ssa_def *idx = src->ssa;
2312    if (bo->first_ssbo)
2313       idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2314    nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx);
2315    nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2316
2317    /* generate new atomic deref ops for every component */
2318    nir_ssa_def *result[4];
2319    unsigned num_components = nir_dest_num_components(intr->dest);
2320    for (unsigned i = 0; i < num_components; i++) {
2321       nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
2322       nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op);
2323       nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, 1, nir_dest_bit_size(intr->dest), "");
2324       new_instr->src[0] = nir_src_for_ssa(&deref_arr->dest.ssa);
2325       /* deref ops have no offset src, so copy the srcs after it */
2326       for (unsigned i = 2; i < nir_intrinsic_infos[intr->intrinsic].num_srcs; i++)
2327          nir_src_copy(&new_instr->src[i - 1], &intr->src[i], &new_instr->instr);
2328       nir_builder_instr_insert(b, &new_instr->instr);
2329
2330       result[i] = &new_instr->dest.ssa;
2331       offset = nir_iadd_imm(b, offset, 1);
2332    }
2333
2334    nir_ssa_def *load = nir_vec(b, result, num_components);
2335    nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
2336    nir_instr_remove(instr);
2337 }
2338
2339 static bool
2340 remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2341 {
2342    struct bo_vars *bo = data;
2343    if (instr->type != nir_instr_type_intrinsic)
2344       return false;
2345    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2346    nir_variable *var = NULL;
2347    nir_ssa_def *offset = NULL;
2348    bool is_load = true;
2349    b->cursor = nir_before_instr(instr);
2350    nir_src *src;
2351    bool ssbo = true;
2352    switch (intr->intrinsic) {
2353    case nir_intrinsic_ssbo_atomic_fadd:
2354    case nir_intrinsic_ssbo_atomic_fmin:
2355    case nir_intrinsic_ssbo_atomic_fmax:
2356    case nir_intrinsic_ssbo_atomic_fcomp_swap:
2357    case nir_intrinsic_ssbo_atomic_add:
2358    case nir_intrinsic_ssbo_atomic_umin:
2359    case nir_intrinsic_ssbo_atomic_imin:
2360    case nir_intrinsic_ssbo_atomic_umax:
2361    case nir_intrinsic_ssbo_atomic_imax:
2362    case nir_intrinsic_ssbo_atomic_and:
2363    case nir_intrinsic_ssbo_atomic_or:
2364    case nir_intrinsic_ssbo_atomic_xor:
2365    case nir_intrinsic_ssbo_atomic_exchange:
2366    case nir_intrinsic_ssbo_atomic_comp_swap:
2367       rewrite_atomic_ssbo_instr(b, instr, bo);
2368       return true;
2369    case nir_intrinsic_store_ssbo:
2370       src = &intr->src[1];
2371       var = get_bo_var(b->shader, bo, true, src, nir_src_bit_size(intr->src[0]));
2372       offset = intr->src[2].ssa;
2373       is_load = false;
2374       break;
2375    case nir_intrinsic_load_ssbo:
2376       src = &intr->src[0];
2377       var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
2378       offset = intr->src[1].ssa;
2379       break;
2380    case nir_intrinsic_load_ubo:
2381       src = &intr->src[0];
2382       var = get_bo_var(b->shader, bo, false, src, nir_dest_bit_size(intr->dest));
2383       offset = intr->src[1].ssa;
2384       ssbo = false;
2385       break;
2386    default:
2387       return false;
2388    }
2389    assert(var);
2390    assert(offset);
2391    nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2392    nir_ssa_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa;
2393    if (!ssbo && bo->first_ubo && var->data.driver_location)
2394       idx = nir_iadd_imm(b, idx, -bo->first_ubo);
2395    else if (ssbo && bo->first_ssbo)
2396       idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2397    nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, nir_i2iN(b, idx, nir_dest_bit_size(deref_var->dest)));
2398    nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2399    assert(intr->num_components <= 2);
2400    if (is_load) {
2401       nir_ssa_def *result[2];
2402       for (unsigned i = 0; i < intr->num_components; i++) {
2403          nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, nir_i2iN(b, offset, nir_dest_bit_size(deref_struct->dest)));
2404          result[i] = nir_load_deref(b, deref_arr);
2405          if (intr->intrinsic == nir_intrinsic_load_ssbo)
2406             nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr));
2407          offset = nir_iadd_imm(b, offset, 1);
2408       }
2409       nir_ssa_def *load = nir_vec(b, result, intr->num_components);
2410       nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
2411    } else {
2412       nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, nir_i2iN(b, offset, nir_dest_bit_size(deref_struct->dest)));
2413       nir_build_store_deref(b, &deref_arr->dest.ssa, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr));
2414    }
2415    nir_instr_remove(instr);
2416    return true;
2417 }
2418
2419 static bool
2420 remove_bo_access(nir_shader *shader, struct zink_shader *zs)
2421 {
2422    struct bo_vars bo = get_bo_vars(zs, shader);
2423    return nir_shader_instructions_pass(shader, remove_bo_access_instr, nir_metadata_dominance, &bo);
2424 }
2425
2426 static bool
2427 find_var_deref(nir_shader *nir, nir_variable *var)
2428 {
2429    nir_foreach_function(function, nir) {
2430       if (!function->impl)
2431          continue;
2432
2433       nir_foreach_block(block, function->impl) {
2434          nir_foreach_instr(instr, block) {
2435             if (instr->type != nir_instr_type_deref)
2436                continue;
2437             nir_deref_instr *deref = nir_instr_as_deref(instr);
2438             if (deref->deref_type == nir_deref_type_var && deref->var == var)
2439                return true;
2440          }
2441       }
2442    }
2443    return false;
2444 }
2445
2446 struct clamp_layer_output_state {
2447    nir_variable *original;
2448    nir_variable *clamped;
2449 };
2450
2451 static void
2452 clamp_layer_output_emit(nir_builder *b, struct clamp_layer_output_state *state)
2453 {
2454    nir_ssa_def *is_layered = nir_load_push_constant(b, 1, 32,
2455                                                     nir_imm_int(b, ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED),
2456                                                     .base = ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, .range = 4);
2457    nir_deref_instr *original_deref = nir_build_deref_var(b, state->original);
2458    nir_deref_instr *clamped_deref = nir_build_deref_var(b, state->clamped);
2459    nir_ssa_def *layer = nir_bcsel(b, nir_ieq_imm(b, is_layered, 1),
2460                                   nir_load_deref(b, original_deref),
2461                                   nir_imm_int(b, 0));
2462    nir_store_deref(b, clamped_deref, layer, 0);
2463 }
2464
2465 static bool
2466 clamp_layer_output_instr(nir_builder *b, nir_instr *instr, void *data)
2467 {
2468    struct clamp_layer_output_state *state = data;
2469    switch (instr->type) {
2470    case nir_instr_type_intrinsic: {
2471       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2472       if (intr->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
2473           intr->intrinsic != nir_intrinsic_emit_vertex)
2474          return false;
2475       b->cursor = nir_before_instr(instr);
2476       clamp_layer_output_emit(b, state);
2477       return true;
2478    }
2479    default: return false;
2480    }
2481 }
2482
2483 static bool
2484 clamp_layer_output(nir_shader *vs, nir_shader *fs, unsigned *next_location)
2485 {
2486    switch (vs->info.stage) {
2487    case MESA_SHADER_VERTEX:
2488    case MESA_SHADER_GEOMETRY:
2489    case MESA_SHADER_TESS_EVAL:
2490       break;
2491    default:
2492       unreachable("invalid last vertex stage!");
2493    }
2494    struct clamp_layer_output_state state = {0};
2495    state.original = nir_find_variable_with_location(vs, nir_var_shader_out, VARYING_SLOT_LAYER);
2496    if (!state.original || !find_var_deref(vs, state.original))
2497       return false;
2498    state.clamped = nir_variable_create(vs, nir_var_shader_out, glsl_int_type(), "layer_clamped");
2499    state.clamped->data.location = VARYING_SLOT_LAYER;
2500    nir_variable *fs_var = nir_find_variable_with_location(fs, nir_var_shader_in, VARYING_SLOT_LAYER);
2501    if ((state.original->data.explicit_xfb_buffer || fs_var) && *next_location < MAX_VARYING) {
2502       state.original->data.location = VARYING_SLOT_VAR0; // Anything but a built-in slot
2503       state.original->data.driver_location = (*next_location)++;
2504       if (fs_var) {
2505          fs_var->data.location = state.original->data.location;
2506          fs_var->data.driver_location = state.original->data.driver_location;
2507       }
2508    } else {
2509       if (state.original->data.explicit_xfb_buffer) {
2510          /* Will xfb the clamped output but still better than nothing */
2511          state.clamped->data.explicit_xfb_buffer = state.original->data.explicit_xfb_buffer;
2512          state.clamped->data.xfb.buffer = state.original->data.xfb.buffer;
2513          state.clamped->data.xfb.stride = state.original->data.xfb.stride;
2514          state.clamped->data.offset = state.original->data.offset;
2515          state.clamped->data.stream = state.original->data.stream;
2516       }
2517       state.original->data.mode = nir_var_shader_temp;
2518       nir_fixup_deref_modes(vs);
2519    }
2520    if (vs->info.stage == MESA_SHADER_GEOMETRY) {
2521       nir_shader_instructions_pass(vs, clamp_layer_output_instr, nir_metadata_dominance, &state);
2522    } else {
2523       nir_builder b;
2524       nir_function_impl *impl = nir_shader_get_entrypoint(vs);
2525       nir_builder_init(&b, impl);
2526       assert(impl->end_block->predecessors->entries == 1);
2527       b.cursor = nir_after_cf_list(&impl->body);
2528       clamp_layer_output_emit(&b, &state);
2529       nir_metadata_preserve(impl, nir_metadata_dominance);
2530    }
2531    optimize_nir(vs, NULL);
2532    NIR_PASS_V(vs, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2533    return true;
2534 }
2535
2536 static void
2537 assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
2538 {
2539    unsigned slot = var->data.location;
2540    switch (slot) {
2541    case -1:
2542    case VARYING_SLOT_POS:
2543    case VARYING_SLOT_PNTC:
2544    case VARYING_SLOT_PSIZ:
2545    case VARYING_SLOT_LAYER:
2546    case VARYING_SLOT_PRIMITIVE_ID:
2547    case VARYING_SLOT_CLIP_DIST0:
2548    case VARYING_SLOT_CULL_DIST0:
2549    case VARYING_SLOT_VIEWPORT:
2550    case VARYING_SLOT_FACE:
2551    case VARYING_SLOT_TESS_LEVEL_OUTER:
2552    case VARYING_SLOT_TESS_LEVEL_INNER:
2553       /* use a sentinel value to avoid counting later */
2554       var->data.driver_location = UINT_MAX;
2555       break;
2556
2557    default:
2558       if (var->data.patch) {
2559          assert(slot >= VARYING_SLOT_PATCH0);
2560          slot -= VARYING_SLOT_PATCH0;
2561       }
2562       if (slot_map[slot] == 0xff) {
2563          assert(*reserved < MAX_VARYING);
2564          unsigned num_slots;
2565          if (nir_is_arrayed_io(var, stage))
2566             num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
2567          else
2568             num_slots = glsl_count_vec4_slots(var->type, false, false);
2569          assert(*reserved + num_slots <= MAX_VARYING);
2570          for (unsigned i = 0; i < num_slots; i++)
2571             slot_map[slot + i] = (*reserved)++;
2572       }
2573       slot = slot_map[slot];
2574       assert(slot < MAX_VARYING);
2575       var->data.driver_location = slot;
2576    }
2577 }
2578
2579 ALWAYS_INLINE static bool
2580 is_texcoord(gl_shader_stage stage, const nir_variable *var)
2581 {
2582    if (stage != MESA_SHADER_FRAGMENT)
2583       return false;
2584    return var->data.location >= VARYING_SLOT_TEX0 && 
2585           var->data.location <= VARYING_SLOT_TEX7;
2586 }
2587
2588 static bool
2589 assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
2590 {
2591    unsigned slot = var->data.location;
2592    switch (slot) {
2593    case VARYING_SLOT_POS:
2594    case VARYING_SLOT_PNTC:
2595    case VARYING_SLOT_PSIZ:
2596    case VARYING_SLOT_LAYER:
2597    case VARYING_SLOT_PRIMITIVE_ID:
2598    case VARYING_SLOT_CLIP_DIST0:
2599    case VARYING_SLOT_CULL_DIST0:
2600    case VARYING_SLOT_VIEWPORT:
2601    case VARYING_SLOT_FACE:
2602    case VARYING_SLOT_TESS_LEVEL_OUTER:
2603    case VARYING_SLOT_TESS_LEVEL_INNER:
2604       /* use a sentinel value to avoid counting later */
2605       var->data.driver_location = UINT_MAX;
2606       break;
2607    default:
2608       if (var->data.patch) {
2609          assert(slot >= VARYING_SLOT_PATCH0);
2610          slot -= VARYING_SLOT_PATCH0;
2611       }
2612       if (slot_map[slot] == (unsigned char)-1) {
2613          /* texcoords can't be eliminated in fs due to GL_COORD_REPLACE,
2614           * so keep for now and eliminate later
2615           */
2616          if (is_texcoord(stage, var)) {
2617             var->data.driver_location = -1;
2618             return true;
2619          }
2620          if (stage != MESA_SHADER_TESS_CTRL)
2621             /* dead io */
2622             return false;
2623          /* patch variables may be read in the workgroup */
2624          slot_map[slot] = (*reserved)++;
2625       }
2626       var->data.driver_location = slot_map[slot];
2627    }
2628    return true;
2629 }
2630
2631
2632 static bool
2633 rewrite_read_as_0(nir_builder *b, nir_instr *instr, void *data)
2634 {
2635    nir_variable *var = data;
2636    if (instr->type != nir_instr_type_intrinsic)
2637       return false;
2638
2639    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2640    if (intr->intrinsic != nir_intrinsic_load_deref)
2641       return false;
2642    nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
2643    if (deref_var != var)
2644       return false;
2645    b->cursor = nir_before_instr(instr);
2646    nir_ssa_def *zero = nir_imm_zero(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest));
2647    if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
2648       switch (var->data.location) {
2649       case VARYING_SLOT_COL0:
2650       case VARYING_SLOT_COL1:
2651       case VARYING_SLOT_BFC0:
2652       case VARYING_SLOT_BFC1:
2653          /* default color is 0,0,0,1 */
2654          if (nir_dest_num_components(intr->dest) == 4)
2655             zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
2656          break;
2657       default:
2658          break;
2659       }
2660    }
2661    nir_ssa_def_rewrite_uses(&intr->dest.ssa, zero);
2662    nir_instr_remove(instr);
2663    return true;
2664 }
2665
2666 void
2667 zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_shader *consumer)
2668 {
2669    unsigned reserved = 0;
2670    unsigned char slot_map[VARYING_SLOT_MAX];
2671    memset(slot_map, -1, sizeof(slot_map));
2672    bool do_fixup = false;
2673    nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
2674    if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
2675       /* remove injected pointsize from all but the last vertex stage */
2676       nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
2677       if (var && !var->data.explicit_location) {
2678          var->data.mode = nir_var_shader_temp;
2679          nir_fixup_deref_modes(producer);
2680          NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2681          optimize_nir(producer, NULL);
2682       }
2683    }
2684    if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
2685       /* never assign from tcs -> tes, always invert */
2686       nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in)
2687          assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map);
2688       nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) {
2689          if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map))
2690             /* this is an output, nothing more needs to be done for it to be dropped */
2691             do_fixup = true;
2692       }
2693    } else {
2694       nir_foreach_variable_with_modes(var, producer, nir_var_shader_out)
2695          assign_producer_var_io(producer->info.stage, var, &reserved, slot_map);
2696       nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) {
2697          if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) {
2698             do_fixup = true;
2699             /* input needs to be rewritten */
2700             nir_shader_instructions_pass(consumer, rewrite_read_as_0, nir_metadata_dominance, var);
2701          }
2702       }
2703       if (consumer->info.stage == MESA_SHADER_FRAGMENT && screen->driver_workarounds.needs_sanitised_layer)
2704          do_fixup |= clamp_layer_output(producer, consumer, &reserved);
2705    }
2706    if (!do_fixup)
2707       return;
2708    nir_fixup_deref_modes(nir);
2709    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2710    optimize_nir(nir, NULL);
2711 }
2712
2713 /* all types that hit this function contain something that is 64bit */
2714 static const struct glsl_type *
2715 rewrite_64bit_type(nir_shader *nir, const struct glsl_type *type, nir_variable *var, bool doubles_only)
2716 {
2717    if (glsl_type_is_array(type)) {
2718       const struct glsl_type *child = glsl_get_array_element(type);
2719       unsigned elements = glsl_array_size(type);
2720       unsigned stride = glsl_get_explicit_stride(type);
2721       return glsl_array_type(rewrite_64bit_type(nir, child, var, doubles_only), elements, stride);
2722    }
2723    /* rewrite structs recursively */
2724    if (glsl_type_is_struct_or_ifc(type)) {
2725       unsigned nmembers = glsl_get_length(type);
2726       struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, nmembers * 2);
2727       unsigned xfb_offset = 0;
2728       for (unsigned i = 0; i < nmembers; i++) {
2729          const struct glsl_struct_field *f = glsl_get_struct_field_data(type, i);
2730          fields[i] = *f;
2731          xfb_offset += glsl_get_component_slots(fields[i].type) * 4;
2732          if (i < nmembers - 1 && xfb_offset % 8 &&
2733              (glsl_contains_double(glsl_get_struct_field(type, i + 1)) ||
2734               (glsl_type_contains_64bit(glsl_get_struct_field(type, i + 1)) && !doubles_only))) {
2735             var->data.is_xfb = true;
2736          }
2737          fields[i].type = rewrite_64bit_type(nir, f->type, var, doubles_only);
2738       }
2739       return glsl_struct_type(fields, nmembers, glsl_get_type_name(type), glsl_struct_type_is_packed(type));
2740    }
2741    if (!glsl_type_is_64bit(type) || (!glsl_contains_double(type) && doubles_only))
2742       return type;
2743    if (doubles_only && glsl_type_is_vector_or_scalar(type))
2744       return glsl_vector_type(GLSL_TYPE_UINT64, glsl_get_vector_elements(type));
2745    enum glsl_base_type base_type;
2746    switch (glsl_get_base_type(type)) {
2747    case GLSL_TYPE_UINT64:
2748       base_type = GLSL_TYPE_UINT;
2749       break;
2750    case GLSL_TYPE_INT64:
2751       base_type = GLSL_TYPE_INT;
2752       break;
2753    case GLSL_TYPE_DOUBLE:
2754       base_type = GLSL_TYPE_FLOAT;
2755       break;
2756    default:
2757       unreachable("unknown 64-bit vertex attribute format!");
2758    }
2759    if (glsl_type_is_scalar(type))
2760       return glsl_vector_type(base_type, 2);
2761    unsigned num_components;
2762    if (glsl_type_is_matrix(type)) {
2763       /* align to vec4 size: dvec3-composed arrays are arrays of dvec3s */
2764       unsigned vec_components = glsl_get_vector_elements(type);
2765       if (vec_components == 3)
2766          vec_components = 4;
2767       num_components = vec_components * 2 * glsl_get_matrix_columns(type);
2768    } else {
2769       num_components = glsl_get_vector_elements(type) * 2;
2770       if (num_components <= 4)
2771          return glsl_vector_type(base_type, num_components);
2772    }
2773    /* dvec3/dvec4/dmatX: rewrite as struct { vec4, vec4, vec4, ... [vec2] } */
2774    struct glsl_struct_field fields[8] = {0};
2775    unsigned remaining = num_components;
2776    unsigned nfields = 0;
2777    for (unsigned i = 0; remaining; i++, remaining -= MIN2(4, remaining), nfields++) {
2778       assert(i < ARRAY_SIZE(fields));
2779       fields[i].name = "";
2780       fields[i].offset = i * 16;
2781       fields[i].type = glsl_vector_type(base_type, MIN2(4, remaining));
2782    }
2783    char buf[64];
2784    snprintf(buf, sizeof(buf), "struct(%s)", glsl_get_type_name(type));
2785    return glsl_struct_type(fields, nfields, buf, true);
2786 }
2787
2788 static const struct glsl_type *
2789 deref_is_matrix(nir_deref_instr *deref)
2790 {
2791    if (glsl_type_is_matrix(deref->type))
2792       return deref->type;
2793    nir_deref_instr *parent = nir_deref_instr_parent(deref);
2794    if (parent)
2795       return deref_is_matrix(parent);
2796    return NULL;
2797 }
2798
2799 static bool
2800 lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variable *var,
2801                           struct hash_table *derefs, struct set *deletes, bool doubles_only)
2802 {
2803    bool func_progress = false;
2804    if (!function->impl)
2805       return false;
2806    nir_builder b;
2807    nir_builder_init(&b, function->impl);
2808    nir_foreach_block(block, function->impl) {
2809       nir_foreach_instr_safe(instr, block) {
2810          switch (instr->type) {
2811          case nir_instr_type_deref: {
2812             nir_deref_instr *deref = nir_instr_as_deref(instr);
2813             if (!(deref->modes & var->data.mode))
2814                continue;
2815             if (nir_deref_instr_get_variable(deref) != var)
2816                continue;
2817
2818             /* matrix types are special: store the original deref type for later use */
2819             const struct glsl_type *matrix = deref_is_matrix(deref);
2820             nir_deref_instr *parent = nir_deref_instr_parent(deref);
2821             if (!matrix) {
2822                /* if this isn't a direct matrix deref, it's maybe a matrix row deref */
2823                hash_table_foreach(derefs, he) {
2824                   /* propagate parent matrix type to row deref */
2825                   if (he->key == parent)
2826                      matrix = he->data;
2827                }
2828             }
2829             if (matrix)
2830                _mesa_hash_table_insert(derefs, deref, (void*)matrix);
2831             if (deref->deref_type == nir_deref_type_var)
2832                deref->type = var->type;
2833             else
2834                deref->type = rewrite_64bit_type(shader, deref->type, var, doubles_only);
2835          }
2836          break;
2837          case nir_instr_type_intrinsic: {
2838             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2839             if (intr->intrinsic != nir_intrinsic_store_deref &&
2840                   intr->intrinsic != nir_intrinsic_load_deref)
2841                break;
2842             if (nir_intrinsic_get_var(intr, 0) != var)
2843                break;
2844             if ((intr->intrinsic == nir_intrinsic_store_deref && intr->src[1].ssa->bit_size != 64) ||
2845                   (intr->intrinsic == nir_intrinsic_load_deref && intr->dest.ssa.bit_size != 64))
2846                break;
2847             b.cursor = nir_before_instr(instr);
2848             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2849             unsigned num_components = intr->num_components * 2;
2850             nir_ssa_def *comp[NIR_MAX_VEC_COMPONENTS];
2851             /* this is the stored matrix type from the deref */
2852             struct hash_entry *he = _mesa_hash_table_search(derefs, deref);
2853             const struct glsl_type *matrix = he ? he->data : NULL;
2854             if (doubles_only && !matrix)
2855                break;
2856             func_progress = true;
2857             if (intr->intrinsic == nir_intrinsic_store_deref) {
2858                /* first, unpack the src data to 32bit vec2 components */
2859                for (unsigned i = 0; i < intr->num_components; i++) {
2860                   nir_ssa_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i));
2861                   comp[i * 2] = nir_channel(&b, ssa, 0);
2862                   comp[i * 2 + 1] = nir_channel(&b, ssa, 1);
2863                }
2864                unsigned wrmask = nir_intrinsic_write_mask(intr);
2865                unsigned mask = 0;
2866                /* expand writemask for doubled components */
2867                for (unsigned i = 0; i < intr->num_components; i++) {
2868                   if (wrmask & BITFIELD_BIT(i))
2869                      mask |= BITFIELD_BIT(i * 2) | BITFIELD_BIT(i * 2 + 1);
2870                }
2871                if (matrix) {
2872                   /* matrix types always come from array (row) derefs */
2873                   assert(deref->deref_type == nir_deref_type_array);
2874                   nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
2875                   /* let optimization clean up consts later */
2876                   nir_ssa_def *index = deref->arr.index.ssa;
2877                   /* this might be an indirect array index:
2878                      * - iterate over matrix columns
2879                      * - add if blocks for each column
2880                      * - perform the store in the block
2881                      */
2882                   for (unsigned idx = 0; idx < glsl_get_matrix_columns(matrix); idx++) {
2883                      nir_push_if(&b, nir_ieq_imm(&b, index, idx));
2884                      unsigned vec_components = glsl_get_vector_elements(matrix);
2885                      /* always clamp dvec3 to 4 components */
2886                      if (vec_components == 3)
2887                         vec_components = 4;
2888                      unsigned start_component = idx * vec_components * 2;
2889                      /* struct member */
2890                      unsigned member = start_component / 4;
2891                      /* number of components remaining */
2892                      unsigned remaining = num_components;
2893                      for (unsigned i = 0; i < num_components; member++) {
2894                         if (!(mask & BITFIELD_BIT(i)))
2895                            continue;
2896                         assert(member < glsl_get_length(var_deref->type));
2897                         /* deref the rewritten struct to the appropriate vec4/vec2 */
2898                         nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
2899                         unsigned incr = MIN2(remaining, 4);
2900                         /* assemble the write component vec */
2901                         nir_ssa_def *val = nir_vec(&b, &comp[i], incr);
2902                         /* use the number of components being written as the writemask */
2903                         if (glsl_get_vector_elements(strct->type) > val->num_components)
2904                            val = nir_pad_vector(&b, val, glsl_get_vector_elements(strct->type));
2905                         nir_store_deref(&b, strct, val, BITFIELD_MASK(incr));
2906                         remaining -= incr;
2907                         i += incr;
2908                      }
2909                      nir_pop_if(&b, NULL);
2910                   }
2911                   _mesa_set_add(deletes, &deref->instr);
2912                } else if (num_components <= 4) {
2913                   /* simple store case: just write out the components */
2914                   nir_ssa_def *dest = nir_vec(&b, comp, num_components);
2915                   nir_store_deref(&b, deref, dest, mask);
2916                } else {
2917                   /* writing > 4 components: access the struct and write to the appropriate vec4 members */
2918                   for (unsigned i = 0; num_components; i++, num_components -= MIN2(num_components, 4)) {
2919                      if (!(mask & BITFIELD_MASK(4)))
2920                         continue;
2921                      nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
2922                      nir_ssa_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4));
2923                      if (glsl_get_vector_elements(strct->type) > dest->num_components)
2924                         dest = nir_pad_vector(&b, dest, glsl_get_vector_elements(strct->type));
2925                      nir_store_deref(&b, strct, dest, mask & BITFIELD_MASK(4));
2926                      mask >>= 4;
2927                   }
2928                }
2929             } else {
2930                nir_ssa_def *dest = NULL;
2931                if (matrix) {
2932                   /* matrix types always come from array (row) derefs */
2933                   assert(deref->deref_type == nir_deref_type_array);
2934                   nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
2935                   /* let optimization clean up consts later */
2936                   nir_ssa_def *index = deref->arr.index.ssa;
2937                   /* this might be an indirect array index:
2938                      * - iterate over matrix columns
2939                      * - add if blocks for each column
2940                      * - phi the loads using the array index
2941                      */
2942                   unsigned cols = glsl_get_matrix_columns(matrix);
2943                   nir_ssa_def *dests[4];
2944                   for (unsigned idx = 0; idx < cols; idx++) {
2945                      /* don't add an if for the final row: this will be handled in the else */
2946                      if (idx < cols - 1)
2947                         nir_push_if(&b, nir_ieq_imm(&b, index, idx));
2948                      unsigned vec_components = glsl_get_vector_elements(matrix);
2949                      /* always clamp dvec3 to 4 components */
2950                      if (vec_components == 3)
2951                         vec_components = 4;
2952                      unsigned start_component = idx * vec_components * 2;
2953                      /* struct member */
2954                      unsigned member = start_component / 4;
2955                      /* number of components remaining */
2956                      unsigned remaining = num_components;
2957                      /* component index */
2958                      unsigned comp_idx = 0;
2959                      for (unsigned i = 0; i < num_components; member++) {
2960                         assert(member < glsl_get_length(var_deref->type));
2961                         nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
2962                         nir_ssa_def *load = nir_load_deref(&b, strct);
2963                         unsigned incr = MIN2(remaining, 4);
2964                         /* repack the loads to 64bit */
2965                         for (unsigned c = 0; c < incr / 2; c++, comp_idx++)
2966                            comp[comp_idx] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(c * 2, 2)));
2967                         remaining -= incr;
2968                         i += incr;
2969                      }
2970                      dest = dests[idx] = nir_vec(&b, comp, intr->num_components);
2971                      if (idx < cols - 1)
2972                         nir_push_else(&b, NULL);
2973                   }
2974                   /* loop over all the if blocks that were made, pop them, and phi the loaded+packed results */
2975                   for (unsigned idx = cols - 1; idx >= 1; idx--) {
2976                      nir_pop_if(&b, NULL);
2977                      dest = nir_if_phi(&b, dests[idx - 1], dest);
2978                   }
2979                   _mesa_set_add(deletes, &deref->instr);
2980                } else if (num_components <= 4) {
2981                   /* simple load case */
2982                   nir_ssa_def *load = nir_load_deref(&b, deref);
2983                   /* pack 32bit loads into 64bit: this will automagically get optimized out later */
2984                   for (unsigned i = 0; i < intr->num_components; i++) {
2985                      comp[i] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(i * 2, 2)));
2986                   }
2987                   dest = nir_vec(&b, comp, intr->num_components);
2988                } else {
2989                   /* writing > 4 components: access the struct and load the appropriate vec4 members */
2990                   for (unsigned i = 0; i < 2; i++, num_components -= 4) {
2991                      nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
2992                      nir_ssa_def *load = nir_load_deref(&b, strct);
2993                      comp[i * 2] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_MASK(2)));
2994                      if (num_components > 2)
2995                         comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2)));
2996                   }
2997                   dest = nir_vec(&b, comp, intr->num_components);
2998                }
2999                nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, dest, instr);
3000             }
3001             _mesa_set_add(deletes, instr);
3002             break;
3003          }
3004          break;
3005          default: break;
3006          }
3007       }
3008    }
3009    if (func_progress)
3010       nir_metadata_preserve(function->impl, nir_metadata_none);
3011    /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */
3012    set_foreach_remove(deletes, he)
3013       nir_instr_remove((void*)he->key);
3014    return func_progress;
3015 }
3016
3017 static bool
3018 lower_64bit_vars_loop(nir_shader *shader, nir_variable *var, struct hash_table *derefs,
3019                       struct set *deletes, bool doubles_only)
3020 {
3021    if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3022       return false;
3023    var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3024    /* once type is rewritten, rewrite all loads and stores */
3025    nir_foreach_function(function, shader)
3026       lower_64bit_vars_function(shader, function, var, derefs, deletes, doubles_only);
3027    return true;
3028 }
3029
3030 /* rewrite all input/output variables using 32bit types and load/stores */
3031 static bool
3032 lower_64bit_vars(nir_shader *shader, bool doubles_only)
3033 {
3034    bool progress = false;
3035    struct hash_table *derefs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3036    struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3037    nir_foreach_variable_with_modes(var, shader, nir_var_shader_in | nir_var_shader_out)
3038       progress |= lower_64bit_vars_loop(shader, var, derefs, deletes, doubles_only);
3039    nir_foreach_function(function, shader) {
3040       nir_foreach_function_temp_variable(var, function->impl) {
3041          if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3042             continue;
3043          var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3044          progress |= lower_64bit_vars_function(shader, function, var, derefs, deletes, doubles_only);
3045       }
3046    }
3047    ralloc_free(deletes);
3048    ralloc_free(derefs);
3049    if (progress) {
3050       nir_lower_alu_to_scalar(shader, filter_64_bit_instr, NULL);
3051       nir_lower_phis_to_scalar(shader, false);
3052       optimize_nir(shader, NULL);
3053    }
3054    return progress;
3055 }
3056
3057 static bool
3058 split_blocks(nir_shader *nir)
3059 {
3060    bool progress = false;
3061    bool changed = true;
3062    do {
3063       progress = false;
3064       nir_foreach_shader_out_variable(var, nir) {
3065          const struct glsl_type *base_type = glsl_without_array(var->type);
3066          nir_variable *members[32]; //can't have more than this without breaking NIR
3067          if (!glsl_type_is_struct(base_type))
3068             continue;
3069          /* TODO: arrays? */
3070          if (!glsl_type_is_struct(var->type) || glsl_get_length(var->type) == 1)
3071             continue;
3072          if (glsl_count_attribute_slots(var->type, false) == 1)
3073             continue;
3074          unsigned offset = 0;
3075          for (unsigned i = 0; i < glsl_get_length(var->type); i++) {
3076             members[i] = nir_variable_clone(var, nir);
3077             members[i]->type = glsl_get_struct_field(var->type, i);
3078             members[i]->name = (void*)glsl_get_struct_elem_name(var->type, i);
3079             members[i]->data.location += offset;
3080             offset += glsl_count_attribute_slots(members[i]->type, false);
3081             nir_shader_add_variable(nir, members[i]);
3082          }
3083          nir_foreach_function(function, nir) {
3084             bool func_progress = false;
3085             if (!function->impl)
3086                continue;
3087             nir_builder b;
3088             nir_builder_init(&b, function->impl);
3089             nir_foreach_block(block, function->impl) {
3090                nir_foreach_instr_safe(instr, block) {
3091                   switch (instr->type) {
3092                   case nir_instr_type_deref: {
3093                   nir_deref_instr *deref = nir_instr_as_deref(instr);
3094                   if (!(deref->modes & nir_var_shader_out))
3095                      continue;
3096                   if (nir_deref_instr_get_variable(deref) != var)
3097                      continue;
3098                   if (deref->deref_type != nir_deref_type_struct)
3099                      continue;
3100                   nir_deref_instr *parent = nir_deref_instr_parent(deref);
3101                   if (parent->deref_type != nir_deref_type_var)
3102                      continue;
3103                   deref->modes = nir_var_shader_temp;
3104                   parent->modes = nir_var_shader_temp;
3105                   b.cursor = nir_before_instr(instr);
3106                   nir_ssa_def *dest = &nir_build_deref_var(&b, members[deref->strct.index])->dest.ssa;
3107                   nir_ssa_def_rewrite_uses_after(&deref->dest.ssa, dest, &deref->instr);
3108                   nir_instr_remove(&deref->instr);
3109                   func_progress = true;
3110                   break;
3111                   }
3112                   default: break;
3113                   }
3114                }
3115             }
3116             if (func_progress)
3117                nir_metadata_preserve(function->impl, nir_metadata_none);
3118          }
3119          var->data.mode = nir_var_shader_temp;
3120          changed = true;
3121          progress = true;
3122       }
3123    } while (progress);
3124    return changed;
3125 }
3126
3127 static void
3128 zink_shader_dump(void *words, size_t size, const char *file)
3129 {
3130    FILE *fp = fopen(file, "wb");
3131    if (fp) {
3132       fwrite(words, 1, size, fp);
3133       fclose(fp);
3134       fprintf(stderr, "wrote '%s'...\n", file);
3135    }
3136 }
3137
3138 static struct zink_shader_object
3139 zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool separate)
3140 {
3141    VkShaderModuleCreateInfo smci = {0};
3142    VkShaderCreateInfoEXT sci = {0};
3143
3144    if (!spirv)
3145       spirv = zs->spirv;
3146
3147    if (zink_debug & ZINK_DEBUG_SPIRV) {
3148       char buf[256];
3149       static int i;
3150       snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
3151       zink_shader_dump(spirv->words, spirv->num_words * sizeof(uint32_t), buf);
3152    }
3153
3154    sci.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT;
3155    sci.stage = mesa_to_vk_shader_stage(zs->info.stage);
3156    if (sci.stage != VK_SHADER_STAGE_FRAGMENT_BIT)
3157       sci.nextStage = VK_SHADER_STAGE_FRAGMENT_BIT;
3158    sci.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT;
3159    sci.codeSize = spirv->num_words * sizeof(uint32_t);
3160    sci.pCode = spirv->words;
3161    sci.pName = "main";
3162    sci.setLayoutCount = 2;
3163    VkDescriptorSetLayout dsl[2] = {0};
3164    dsl[zs->info.stage == MESA_SHADER_FRAGMENT] = zs->precompile.dsl;
3165    sci.pSetLayouts = dsl;
3166    VkPushConstantRange pcr;
3167    pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
3168    pcr.offset = 0;
3169    pcr.size = sizeof(struct zink_gfx_push_constant);
3170    sci.pushConstantRangeCount = 1;
3171    sci.pPushConstantRanges = &pcr;
3172
3173    smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
3174    smci.codeSize = spirv->num_words * sizeof(uint32_t);
3175    smci.pCode = spirv->words;
3176
3177 #ifndef NDEBUG
3178    if (zink_debug & ZINK_DEBUG_VALIDATION) {
3179       static const struct spirv_to_nir_options spirv_options = {
3180          .environment = NIR_SPIRV_VULKAN,
3181          .caps = {
3182             .float64 = true,
3183             .int16 = true,
3184             .int64 = true,
3185             .tessellation = true,
3186             .float_controls = true,
3187             .image_ms_array = true,
3188             .image_read_without_format = true,
3189             .image_write_without_format = true,
3190             .storage_image_ms = true,
3191             .geometry_streams = true,
3192             .storage_8bit = true,
3193             .storage_16bit = true,
3194             .variable_pointers = true,
3195             .stencil_export = true,
3196             .post_depth_coverage = true,
3197             .transform_feedback = true,
3198             .device_group = true,
3199             .draw_parameters = true,
3200             .shader_viewport_index_layer = true,
3201             .multiview = true,
3202             .physical_storage_buffer_address = true,
3203             .int64_atomics = true,
3204             .subgroup_arithmetic = true,
3205             .subgroup_basic = true,
3206             .subgroup_ballot = true,
3207             .subgroup_quad = true,
3208             .subgroup_shuffle = true,
3209             .subgroup_vote = true,
3210             .vk_memory_model = true,
3211             .vk_memory_model_device_scope = true,
3212             .int8 = true,
3213             .float16 = true,
3214             .demote_to_helper_invocation = true,
3215             .sparse_residency = true,
3216             .min_lod = true,
3217          },
3218          .ubo_addr_format = nir_address_format_32bit_index_offset,
3219          .ssbo_addr_format = nir_address_format_32bit_index_offset,
3220          .phys_ssbo_addr_format = nir_address_format_64bit_global,
3221          .push_const_addr_format = nir_address_format_logical,
3222          .shared_addr_format = nir_address_format_32bit_offset,
3223       };
3224       uint32_t num_spec_entries = 0;
3225       struct nir_spirv_specialization *spec_entries = NULL;
3226       VkSpecializationInfo sinfo = {0};
3227       VkSpecializationMapEntry me[3];
3228       uint32_t size[3] = {1,1,1};
3229       if (!zs->info.workgroup_size[0]) {
3230          sinfo.mapEntryCount = 3;
3231          sinfo.pMapEntries = &me[0];
3232          sinfo.dataSize = sizeof(uint32_t) * 3;
3233          sinfo.pData = size;
3234          uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z};
3235          for (int i = 0; i < 3; i++) {
3236             me[i].size = sizeof(uint32_t);
3237             me[i].constantID = ids[i];
3238             me[i].offset = i * sizeof(uint32_t);
3239          }
3240          spec_entries = vk_spec_info_to_nir_spirv(&sinfo, &num_spec_entries);
3241       }
3242       nir_shader *nir = spirv_to_nir(spirv->words, spirv->num_words,
3243                          spec_entries, num_spec_entries,
3244                          clamp_stage(&zs->info), "main", &spirv_options, &screen->nir_options);
3245       assert(nir);
3246       ralloc_free(nir);
3247       free(spec_entries);
3248    }
3249 #endif
3250
3251    VkResult ret;
3252    struct zink_shader_object obj;
3253    if (!separate || !screen->info.have_EXT_shader_object)
3254       ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &obj.mod);
3255    else
3256       ret = VKSCR(CreateShadersEXT)(screen->dev, 1, &sci, NULL, &obj.obj);
3257    bool success = zink_screen_handle_vkresult(screen, ret);
3258    assert(success);
3259    return obj;
3260 }
3261
3262 static void
3263 prune_io(nir_shader *nir)
3264 {
3265    nir_foreach_shader_in_variable_safe(var, nir) {
3266       if (!find_var_deref(nir, var))
3267          var->data.mode = nir_var_shader_temp;
3268    }
3269    nir_foreach_shader_out_variable_safe(var, nir) {
3270       if (!find_var_deref(nir, var))
3271          var->data.mode = nir_var_shader_temp;
3272    }
3273    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3274 }
3275
3276 static void
3277 flag_shadow_tex(nir_variable *var, struct zink_shader *zs)
3278 {
3279    /* unconvert from zink_binding() */
3280    uint32_t sampler_id = var->data.binding - (PIPE_MAX_SAMPLERS * MESA_SHADER_FRAGMENT);
3281    assert(sampler_id < 32); //bitfield size for tracking
3282    zs->fs.legacy_shadow_mask |= BITFIELD_BIT(sampler_id);
3283 }
3284
3285 static nir_ssa_def *
3286 rewrite_tex_dest(nir_builder *b, nir_tex_instr *tex, nir_variable *var, void *data)
3287 {
3288    assert(var);
3289    const struct glsl_type *type = glsl_without_array(var->type);
3290    enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3291    bool is_int = glsl_base_type_is_integer(ret_type);
3292    unsigned bit_size = glsl_base_type_get_bit_size(ret_type);
3293    unsigned dest_size = nir_dest_bit_size(tex->dest);
3294    b->cursor = nir_after_instr(&tex->instr);
3295    unsigned num_components = nir_dest_num_components(tex->dest);
3296    bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse;
3297    if (bit_size == dest_size && !rewrite_depth)
3298       return NULL;
3299    nir_ssa_def *dest = &tex->dest.ssa;
3300    if (rewrite_depth && data) {
3301       if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
3302          flag_shadow_tex(var, data);
3303       else
3304          mesa_loge("unhandled old-style shadow sampler in non-fragment stage!");
3305       return NULL;
3306    }
3307    if (bit_size != dest_size) {
3308       tex->dest.ssa.bit_size = bit_size;
3309       tex->dest_type = nir_get_nir_type_for_glsl_base_type(ret_type);
3310
3311       if (is_int) {
3312          if (glsl_unsigned_base_type_of(ret_type) == ret_type)
3313             dest = nir_u2uN(b, &tex->dest.ssa, dest_size);
3314          else
3315             dest = nir_i2iN(b, &tex->dest.ssa, dest_size);
3316       } else {
3317          dest = nir_f2fN(b, &tex->dest.ssa, dest_size);
3318       }
3319       if (rewrite_depth)
3320          return dest;
3321       nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dest, dest->parent_instr);
3322    } else if (rewrite_depth) {
3323       return dest;
3324    }
3325    return dest;
3326 }
3327
3328 struct lower_zs_swizzle_state {
3329    bool shadow_only;
3330    unsigned base_sampler_id;
3331    const struct zink_zs_swizzle_key *swizzle;
3332 };
3333
3334 static bool
3335 lower_zs_swizzle_tex_instr(nir_builder *b, nir_instr *instr, void *data)
3336 {
3337    struct lower_zs_swizzle_state *state = data;
3338    const struct zink_zs_swizzle_key *swizzle_key = state->swizzle;
3339    assert(state->shadow_only || swizzle_key);
3340    if (instr->type != nir_instr_type_tex)
3341       return false;
3342    nir_tex_instr *tex = nir_instr_as_tex(instr);
3343    if (tex->op == nir_texop_txs || tex->op == nir_texop_lod ||
3344        (!tex->is_shadow && state->shadow_only) || tex->is_new_style_shadow)
3345       return false;
3346    if (tex->is_shadow && tex->op == nir_texop_tg4)
3347       /* Will not even try to emulate the shadow comparison */
3348       return false;
3349    int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
3350    nir_variable *var = NULL;
3351    if (handle != -1)
3352       /* gtfo bindless depth texture mode */
3353       return false;
3354    nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
3355       if (glsl_type_is_sampler(glsl_without_array(img->type))) {
3356          unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
3357          if (tex->texture_index >= img->data.driver_location &&
3358                tex->texture_index < img->data.driver_location + size) {
3359             var = img;
3360             break;
3361          }
3362       }
3363    }
3364    assert(var);
3365    uint32_t sampler_id = var->data.binding - state->base_sampler_id;
3366    const struct glsl_type *type = glsl_without_array(var->type);
3367    enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3368    bool is_int = glsl_base_type_is_integer(ret_type);
3369    unsigned num_components = nir_dest_num_components(tex->dest);
3370    if (tex->is_shadow)
3371       tex->is_new_style_shadow = true;
3372    nir_ssa_def *dest = rewrite_tex_dest(b, tex, var, NULL);
3373    assert(dest || !state->shadow_only);
3374    if (!dest && !(swizzle_key->mask & BITFIELD_BIT(sampler_id)))
3375       return false;
3376    else if (!dest)
3377       dest = &tex->dest.ssa;
3378    else
3379       tex->dest.ssa.num_components = 1;
3380    if (swizzle_key && (swizzle_key->mask & BITFIELD_BIT(sampler_id))) {
3381       /* these require manual swizzles */
3382       if (tex->op == nir_texop_tg4) {
3383          assert(!tex->is_shadow);
3384          nir_ssa_def *swizzle;
3385          switch (swizzle_key->swizzle[sampler_id].s[tex->component]) {
3386          case PIPE_SWIZZLE_0:
3387             swizzle = nir_imm_zero(b, 4, nir_dest_bit_size(tex->dest));
3388             break;
3389          case PIPE_SWIZZLE_1:
3390             if (is_int)
3391                swizzle = nir_imm_intN_t(b, 4, nir_dest_bit_size(tex->dest));
3392             else
3393                swizzle = nir_imm_floatN_t(b, 4, nir_dest_bit_size(tex->dest));
3394             break;
3395          default:
3396             if (!tex->component)
3397                return false;
3398             tex->component = 0;
3399             return true;
3400          }
3401          nir_ssa_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3402          return true;
3403       }
3404       nir_ssa_def *vec[4];
3405       for (unsigned i = 0; i < ARRAY_SIZE(vec); i++) {
3406          switch (swizzle_key->swizzle[sampler_id].s[i]) {
3407          case PIPE_SWIZZLE_0:
3408             vec[i] = nir_imm_zero(b, 1, nir_dest_bit_size(tex->dest));
3409             break;
3410          case PIPE_SWIZZLE_1:
3411             if (is_int)
3412                vec[i] = nir_imm_intN_t(b, 1, nir_dest_bit_size(tex->dest));
3413             else
3414                vec[i] = nir_imm_floatN_t(b, 1, nir_dest_bit_size(tex->dest));
3415             break;
3416          default:
3417             vec[i] = dest->num_components == 1 ? dest : nir_channel(b, dest, i);
3418             break;
3419          }
3420       }
3421       nir_ssa_def *swizzle = nir_vec(b, vec, num_components);
3422       nir_ssa_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3423    } else {
3424       assert(tex->is_shadow);
3425       nir_ssa_def *vec[4] = {dest, dest, dest, dest};
3426       nir_ssa_def *splat = nir_vec(b, vec, num_components);
3427       nir_ssa_def_rewrite_uses_after(dest, splat, splat->parent_instr);
3428    }
3429    return true;
3430 }
3431
3432 static bool
3433 lower_zs_swizzle_tex(nir_shader *nir, const void *swizzle, bool shadow_only)
3434 {
3435    unsigned base_sampler_id = gl_shader_stage_is_compute(nir->info.stage) ? 0 : PIPE_MAX_SAMPLERS * nir->info.stage;
3436    struct lower_zs_swizzle_state state = {shadow_only, base_sampler_id, swizzle};
3437    return nir_shader_instructions_pass(nir, lower_zs_swizzle_tex_instr, nir_metadata_dominance | nir_metadata_block_index, (void*)&state);
3438 }
3439
3440 static bool
3441 invert_point_coord_instr(nir_builder *b, nir_instr *instr, void *data)
3442 {
3443    if (instr->type != nir_instr_type_intrinsic)
3444       return false;
3445    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3446    if (intr->intrinsic != nir_intrinsic_load_deref)
3447       return false;
3448    nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
3449    if (deref_var->data.location != VARYING_SLOT_PNTC)
3450       return false;
3451    b->cursor = nir_after_instr(instr);
3452    nir_ssa_def *def = nir_vec2(b, nir_channel(b, &intr->dest.ssa, 0),
3453                                   nir_fsub(b, nir_imm_float(b, 1.0), nir_channel(b, &intr->dest.ssa, 1)));
3454    nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
3455    return true;
3456 }
3457
3458 static bool
3459 invert_point_coord(nir_shader *nir)
3460 {
3461    if (!(nir->info.inputs_read & BITFIELD64_BIT(VARYING_SLOT_PNTC)))
3462       return false;
3463    return nir_shader_instructions_pass(nir, invert_point_coord_instr, nir_metadata_dominance, NULL);
3464 }
3465
3466 static struct zink_shader_object
3467 compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool separate)
3468 {
3469    struct zink_shader_info *sinfo = &zs->sinfo;
3470    prune_io(nir);
3471
3472    NIR_PASS_V(nir, nir_convert_from_ssa, true);
3473
3474    struct zink_shader_object obj;
3475    struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen->spirv_version);
3476    if (spirv)
3477       obj = zink_shader_spirv_compile(screen, zs, spirv, separate);
3478
3479    /* TODO: determine if there's any reason to cache spirv output? */
3480    if (zs->info.stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
3481       zs->spirv = spirv;
3482    else
3483       ralloc_free(spirv);
3484    return obj;
3485 }
3486
3487 VkShaderModule
3488 zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs,
3489                     nir_shader *nir, const struct zink_shader_key *key, const void *extra_data)
3490 {
3491    struct zink_shader_info *sinfo = &zs->sinfo;
3492    bool need_optimize = false;
3493    bool inlined_uniforms = false;
3494
3495    if (key) {
3496       if (key->inline_uniforms) {
3497          NIR_PASS_V(nir, nir_inline_uniforms,
3498                     nir->info.num_inlinable_uniforms,
3499                     key->base.inlined_uniform_values,
3500                     nir->info.inlinable_uniform_dw_offsets);
3501
3502          inlined_uniforms = true;
3503       }
3504
3505       /* TODO: use a separate mem ctx here for ralloc */
3506
3507       if (!screen->optimal_keys) {
3508          switch (zs->info.stage) {
3509          case MESA_SHADER_VERTEX: {
3510             uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
3511             const struct zink_vs_key *vs_key = zink_vs_key(key);
3512             switch (vs_key->size) {
3513             case 4:
3514                decomposed_attrs = vs_key->u32.decomposed_attrs;
3515                decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w;
3516                break;
3517             case 2:
3518                decomposed_attrs = vs_key->u16.decomposed_attrs;
3519                decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w;
3520                break;
3521             case 1:
3522                decomposed_attrs = vs_key->u8.decomposed_attrs;
3523                decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w;
3524                break;
3525             default: break;
3526             }
3527             if (decomposed_attrs || decomposed_attrs_without_w)
3528                NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
3529             break;
3530          }
3531
3532          case MESA_SHADER_GEOMETRY:
3533             if (zink_gs_key(key)->lower_line_stipple) {
3534                NIR_PASS_V(nir, lower_line_stipple_gs, zink_gs_key(key)->line_rectangular);
3535                NIR_PASS_V(nir, nir_lower_var_copies);
3536                need_optimize = true;
3537             }
3538
3539             if (zink_gs_key(key)->lower_line_smooth) {
3540                NIR_PASS_V(nir, lower_line_smooth_gs);
3541                NIR_PASS_V(nir, nir_lower_var_copies);
3542                need_optimize = true;
3543             }
3544
3545             if (zink_gs_key(key)->lower_gl_point) {
3546                NIR_PASS_V(nir, lower_gl_point_gs);
3547                need_optimize = true;
3548             }
3549
3550             if (zink_gs_key(key)->lower_pv_mode) {
3551                NIR_PASS_V(nir, lower_pv_mode_gs, zink_gs_key(key)->lower_pv_mode);
3552                need_optimize = true; //TODO verify that this is required
3553             }
3554             break;
3555
3556          default:
3557             break;
3558          }
3559       }
3560
3561       switch (zs->info.stage) {
3562       case MESA_SHADER_VERTEX:
3563       case MESA_SHADER_TESS_EVAL:
3564       case MESA_SHADER_GEOMETRY:
3565          if (zink_vs_key_base(key)->last_vertex_stage) {
3566             if (zs->sinfo.have_xfb)
3567                sinfo->last_vertex = true;
3568
3569             if (!zink_vs_key_base(key)->clip_halfz && !screen->info.have_EXT_depth_clip_control) {
3570                NIR_PASS_V(nir, nir_lower_clip_halfz);
3571             }
3572             if (zink_vs_key_base(key)->push_drawid) {
3573                NIR_PASS_V(nir, lower_drawid);
3574             }
3575          }
3576          if (zink_vs_key_base(key)->robust_access)
3577             NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3578          break;
3579       case MESA_SHADER_FRAGMENT:
3580          if (zink_fs_key(key)->lower_line_smooth) {
3581             NIR_PASS_V(nir, lower_line_smooth_fs,
3582                        zink_fs_key(key)->lower_line_stipple);
3583             need_optimize = true;
3584          } else if (zink_fs_key(key)->lower_line_stipple)
3585                NIR_PASS_V(nir, lower_line_stipple_fs);
3586
3587          if (zink_fs_key(key)->lower_point_smooth) {
3588             NIR_PASS_V(nir, nir_lower_point_smooth);
3589             NIR_PASS_V(nir, nir_lower_discard_if, nir_lower_discard_if_to_cf);
3590             nir->info.fs.uses_discard = true;
3591             need_optimize = true;
3592          }
3593
3594          if (zink_fs_key(key)->robust_access)
3595             NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3596
3597          if (!zink_fs_key_base(key)->samples &&
3598             nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
3599             /* VK will always use gl_SampleMask[] values even if sample count is 0,
3600             * so we need to skip this write here to mimic GL's behavior of ignoring it
3601             */
3602             nir_foreach_shader_out_variable(var, nir) {
3603                if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
3604                   var->data.mode = nir_var_shader_temp;
3605             }
3606             nir_fixup_deref_modes(nir);
3607             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3608             need_optimize = true;
3609          }
3610          if (zink_fs_key_base(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
3611             NIR_PASS_V(nir, lower_dual_blend);
3612          }
3613          if (zink_fs_key_base(key)->single_sample) {
3614             NIR_PASS_V(nir, nir_lower_single_sampled);
3615          }
3616          if (zink_fs_key_base(key)->coord_replace_bits)
3617             NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key_base(key)->coord_replace_bits, false, false);
3618          if (zink_fs_key_base(key)->point_coord_yinvert)
3619             NIR_PASS_V(nir, invert_point_coord);
3620          if (zink_fs_key_base(key)->force_persample_interp || zink_fs_key_base(key)->fbfetch_ms) {
3621             nir_foreach_shader_in_variable(var, nir)
3622                var->data.sample = true;
3623             nir->info.fs.uses_sample_qualifier = true;
3624             nir->info.fs.uses_sample_shading = true;
3625          }
3626          if (zs->fs.legacy_shadow_mask && !key->base.needs_zs_shader_swizzle)
3627             NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, zink_fs_key_base(key)->shadow_needs_shader_swizzle ? extra_data : NULL, true);
3628          if (nir->info.fs.uses_fbfetch_output) {
3629             nir_variable *fbfetch = NULL;
3630             NIR_PASS_V(nir, lower_fbfetch, &fbfetch, zink_fs_key_base(key)->fbfetch_ms);
3631             /* old variable must be deleted to avoid spirv errors */
3632             fbfetch->data.mode = nir_var_shader_temp;
3633             nir_fixup_deref_modes(nir);
3634             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3635             need_optimize = true;
3636          }
3637          nir_foreach_shader_in_variable_safe(var, nir) {
3638             if (!is_texcoord(MESA_SHADER_FRAGMENT, var) || var->data.driver_location != -1)
3639                continue;
3640             nir_shader_instructions_pass(nir, rewrite_read_as_0, nir_metadata_dominance, var);
3641             var->data.mode = nir_var_shader_temp;
3642             nir_fixup_deref_modes(nir);
3643             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3644             need_optimize = true;
3645          }
3646          break;
3647       case MESA_SHADER_COMPUTE:
3648          if (zink_cs_key(key)->robust_access)
3649             NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3650          break;
3651       default: break;
3652       }
3653       if (key->base.needs_zs_shader_swizzle) {
3654          assert(extra_data);
3655          NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, extra_data, false);
3656       }
3657       if (key->base.nonseamless_cube_mask) {
3658          NIR_PASS_V(nir, zink_lower_cubemap_to_array, key->base.nonseamless_cube_mask);
3659          need_optimize = true;
3660       }
3661    }
3662    if (screen->driconf.inline_uniforms) {
3663       NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
3664       NIR_PASS_V(nir, rewrite_bo_access, screen);
3665       NIR_PASS_V(nir, remove_bo_access, zs);
3666       need_optimize = true;
3667    }
3668    if (inlined_uniforms) {
3669       optimize_nir(nir, zs);
3670
3671       /* This must be done again. */
3672       NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
3673                                                        nir_var_shader_out);
3674
3675       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
3676       if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT)
3677          zs->can_inline = false;
3678    } else if (need_optimize)
3679       optimize_nir(nir, zs);
3680    
3681    struct zink_shader_object obj = compile_module(screen, zs, nir, false);
3682    ralloc_free(nir);
3683    return obj.mod;
3684 }
3685
3686 struct zink_shader_object
3687 zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
3688 {
3689    nir_shader *nir = zink_shader_deserialize(screen, zs);
3690    int set = nir->info.stage == MESA_SHADER_FRAGMENT;
3691    unsigned offsets[4];
3692    zink_descriptor_shader_get_binding_offsets(zs, offsets);
3693    nir_foreach_variable_with_modes(var, nir, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_uniform | nir_var_image) {
3694       if (var->data.bindless)
3695          continue;
3696       var->data.descriptor_set = set;
3697       switch (var->data.mode) {
3698       case nir_var_mem_ubo:
3699             var->data.binding = !!var->data.driver_location;
3700             break;
3701       case nir_var_uniform:
3702          if (glsl_type_is_sampler(glsl_without_array(var->type)))
3703             var->data.binding += offsets[1];
3704          break;
3705       case nir_var_mem_ssbo:
3706          var->data.binding += offsets[2];
3707          break;
3708       case nir_var_image:
3709          var->data.binding += offsets[3];
3710          break;
3711       default: break;
3712       }
3713    }
3714    if (screen->driconf.inline_uniforms) {
3715       NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
3716       NIR_PASS_V(nir, rewrite_bo_access, screen);
3717       NIR_PASS_V(nir, remove_bo_access, zs);
3718    }
3719    optimize_nir(nir, zs);
3720    zink_descriptor_shader_init(screen, zs);
3721    struct zink_shader_object obj = compile_module(screen, zs, nir, true);
3722    ralloc_free(nir);
3723    return obj;
3724 }
3725
3726 static bool
3727 lower_baseinstance_instr(nir_builder *b, nir_instr *instr, void *data)
3728 {
3729    if (instr->type != nir_instr_type_intrinsic)
3730       return false;
3731    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3732    if (intr->intrinsic != nir_intrinsic_load_instance_id)
3733       return false;
3734    b->cursor = nir_after_instr(instr);
3735    nir_ssa_def *def = nir_isub(b, &intr->dest.ssa, nir_load_base_instance(b));
3736    nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
3737    return true;
3738 }
3739
3740 static bool
3741 lower_baseinstance(nir_shader *shader)
3742 {
3743    if (shader->info.stage != MESA_SHADER_VERTEX)
3744       return false;
3745    return nir_shader_instructions_pass(shader, lower_baseinstance_instr, nir_metadata_dominance, NULL);
3746 }
3747
3748 /* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
3749  * so instead we delete all those broken variables and just make new ones
3750  */
3751 static bool
3752 unbreak_bos(nir_shader *shader, struct zink_shader *zs, bool needs_size)
3753 {
3754    uint64_t max_ssbo_size = 0;
3755    uint64_t max_ubo_size = 0;
3756    uint64_t max_uniform_size = 0;
3757
3758    if (!shader->info.num_ssbos && !shader->info.num_ubos)
3759       return false;
3760
3761    nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
3762       const struct glsl_type *type = glsl_without_array(var->type);
3763       if (type_is_counter(type))
3764          continue;
3765       /* be conservative: use the bigger of the interface and variable types to ensure in-bounds access */
3766       unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
3767       const struct glsl_type *interface_type = var->interface_type ? glsl_without_array(var->interface_type) : NULL;
3768       if (interface_type) {
3769          unsigned block_size = glsl_get_explicit_size(interface_type, true);
3770          if (glsl_get_length(interface_type) == 1) {
3771             /* handle bare unsized ssbo arrays: glsl_get_explicit_size always returns type-aligned sizes */
3772             const struct glsl_type *f = glsl_get_struct_field(interface_type, 0);
3773             if (glsl_type_is_array(f) && !glsl_array_size(f))
3774                block_size = 0;
3775          }
3776          if (block_size) {
3777             block_size = DIV_ROUND_UP(block_size, sizeof(float) * 4);
3778             size = MAX2(size, block_size);
3779          }
3780       }
3781       if (var->data.mode == nir_var_mem_ubo) {
3782          if (var->data.driver_location)
3783             max_ubo_size = MAX2(max_ubo_size, size);
3784          else
3785             max_uniform_size = MAX2(max_uniform_size, size);
3786       } else {
3787          max_ssbo_size = MAX2(max_ssbo_size, size);
3788          if (interface_type) {
3789             if (glsl_type_is_unsized_array(glsl_get_struct_field(interface_type, glsl_get_length(interface_type) - 1)))
3790                needs_size = true;
3791          }
3792       }
3793       var->data.mode = nir_var_shader_temp;
3794    }
3795    nir_fixup_deref_modes(shader);
3796    NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3797    optimize_nir(shader, NULL);
3798
3799    struct glsl_struct_field field = {0};
3800    field.name = ralloc_strdup(shader, "base");
3801    if (shader->info.num_ubos) {
3802       if (shader->num_uniforms && zs->ubos_used & BITFIELD_BIT(0)) {
3803          field.type = glsl_array_type(glsl_uint_type(), max_uniform_size * 4, 4);
3804          nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
3805                                                  glsl_array_type(glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430, false, "struct"), 1, 0),
3806                                                  "uniform_0@32");
3807          var->interface_type = var->type;
3808          var->data.mode = nir_var_mem_ubo;
3809          var->data.driver_location = 0;
3810       }
3811
3812       unsigned num_ubos = shader->info.num_ubos - !!shader->info.first_ubo_is_default_ubo;
3813       uint32_t ubos_used = zs->ubos_used & ~BITFIELD_BIT(0);
3814       if (num_ubos && ubos_used) {
3815          field.type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
3816          /* shrink array as much as possible */
3817          unsigned first_ubo = ffs(ubos_used) - 2;
3818          assert(first_ubo < PIPE_MAX_CONSTANT_BUFFERS);
3819          num_ubos -= first_ubo;
3820          assert(num_ubos);
3821          nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
3822                                    glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ubos, 0),
3823                                    "ubos@32");
3824          var->interface_type = var->type;
3825          var->data.mode = nir_var_mem_ubo;
3826          var->data.driver_location = first_ubo + !!shader->info.first_ubo_is_default_ubo;
3827       }
3828    }
3829    if (shader->info.num_ssbos && zs->ssbos_used) {
3830       /* shrink array as much as possible */
3831       unsigned first_ssbo = ffs(zs->ssbos_used) - 1;
3832       assert(first_ssbo < PIPE_MAX_SHADER_BUFFERS);
3833       unsigned num_ssbos = shader->info.num_ssbos - first_ssbo;
3834       assert(num_ssbos);
3835       const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), needs_size ? 0 : max_ssbo_size * 4, 4);
3836       field.type = ssbo_type;
3837       nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
3838                                               glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ssbos, 0),
3839                                               "ssbos@32");
3840       var->interface_type = var->type;
3841       var->data.mode = nir_var_mem_ssbo;
3842       var->data.driver_location = first_ssbo;
3843    }
3844    return true;
3845 }
3846
3847 static uint32_t
3848 get_src_mask_ssbo(unsigned total, nir_src src)
3849 {
3850    if (nir_src_is_const(src))
3851       return BITFIELD_BIT(nir_src_as_uint(src));
3852    return BITFIELD_MASK(total);
3853 }
3854
3855 static uint32_t
3856 get_src_mask_ubo(unsigned total, nir_src src)
3857 {
3858    if (nir_src_is_const(src))
3859       return BITFIELD_BIT(nir_src_as_uint(src));
3860    return BITFIELD_MASK(total) & ~BITFIELD_BIT(0);
3861 }
3862
3863 static bool
3864 analyze_io(struct zink_shader *zs, nir_shader *shader)
3865 {
3866    bool ret = false;
3867    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
3868    nir_foreach_block(block, impl) {
3869       nir_foreach_instr(instr, block) {
3870          if (shader->info.stage != MESA_SHADER_KERNEL && instr->type == nir_instr_type_tex) {
3871             /* gl_nir_lower_samplers_as_deref is where this would normally be set, but zink doesn't use it */
3872             nir_tex_instr *tex = nir_instr_as_tex(instr);
3873             nir_foreach_variable_with_modes(img, shader, nir_var_uniform) {
3874                if (glsl_type_is_sampler(glsl_without_array(img->type))) {
3875                   unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
3876                   if (tex->texture_index >= img->data.driver_location &&
3877                      tex->texture_index < img->data.driver_location + size) {
3878                      BITSET_SET_RANGE(shader->info.textures_used, img->data.driver_location, img->data.driver_location + (size - 1));
3879                      break;
3880                   }
3881                }
3882             }
3883             continue;
3884          }
3885          if (instr->type != nir_instr_type_intrinsic)
3886             continue;
3887  
3888          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3889          switch (intrin->intrinsic) {
3890          case nir_intrinsic_store_ssbo:
3891             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[1]);
3892             break;
3893  
3894          case nir_intrinsic_get_ssbo_size: {
3895             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
3896             ret = true;
3897             break;
3898          }
3899          case nir_intrinsic_ssbo_atomic_fadd:
3900          case nir_intrinsic_ssbo_atomic_add:
3901          case nir_intrinsic_ssbo_atomic_imin:
3902          case nir_intrinsic_ssbo_atomic_umin:
3903          case nir_intrinsic_ssbo_atomic_imax:
3904          case nir_intrinsic_ssbo_atomic_umax:
3905          case nir_intrinsic_ssbo_atomic_and:
3906          case nir_intrinsic_ssbo_atomic_or:
3907          case nir_intrinsic_ssbo_atomic_xor:
3908          case nir_intrinsic_ssbo_atomic_exchange:
3909          case nir_intrinsic_ssbo_atomic_comp_swap:
3910          case nir_intrinsic_ssbo_atomic_fmin:
3911          case nir_intrinsic_ssbo_atomic_fmax:
3912          case nir_intrinsic_ssbo_atomic_fcomp_swap:
3913          case nir_intrinsic_load_ssbo:
3914             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
3915             break;
3916          case nir_intrinsic_load_ubo:
3917          case nir_intrinsic_load_ubo_vec4:
3918             zs->ubos_used |= get_src_mask_ubo(shader->info.num_ubos, intrin->src[0]);
3919             break;
3920          default:
3921             break;
3922          }
3923       }
3924    }
3925    return ret;
3926 }
3927
3928 struct zink_bindless_info {
3929    nir_variable *bindless[4];
3930    unsigned bindless_set;
3931 };
3932
3933 /* this is a "default" bindless texture used if the shader has no texture variables */
3934 static nir_variable *
3935 create_bindless_texture(nir_shader *nir, nir_tex_instr *tex, unsigned descriptor_set)
3936 {
3937    unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0;
3938    nir_variable *var;
3939
3940    const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT);
3941    var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture");
3942    var->data.descriptor_set = descriptor_set;
3943    var->data.driver_location = var->data.binding = binding;
3944    return var;
3945 }
3946
3947 /* this is a "default" bindless image used if the shader has no image variables */
3948 static nir_variable *
3949 create_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim, unsigned descriptor_set)
3950 {
3951    unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2;
3952    nir_variable *var;
3953
3954    const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
3955    var = nir_variable_create(nir, nir_var_image, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image");
3956    var->data.descriptor_set = descriptor_set;
3957    var->data.driver_location = var->data.binding = binding;
3958    var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
3959    return var;
3960 }
3961
3962 /* rewrite bindless instructions as array deref instructions */
3963 static bool
3964 lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
3965 {
3966    struct zink_bindless_info *bindless = data;
3967
3968    if (in->type == nir_instr_type_tex) {
3969       nir_tex_instr *tex = nir_instr_as_tex(in);
3970       int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
3971       if (idx == -1)
3972          return false;
3973
3974       nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[1] : bindless->bindless[0];
3975       if (!var)
3976          var = create_bindless_texture(b->shader, tex, bindless->bindless_set);
3977       b->cursor = nir_before_instr(in);
3978       nir_deref_instr *deref = nir_build_deref_var(b, var);
3979       if (glsl_type_is_array(var->type))
3980          deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32));
3981       nir_instr_rewrite_src_ssa(in, &tex->src[idx].src, &deref->dest.ssa);
3982
3983       /* bindless sampling uses the variable type directly, which means the tex instr has to exactly
3984        * match up with it in contrast to normal sampler ops where things are a bit more flexible;
3985        * this results in cases where a shader is passed with sampler2DArray but the tex instr only has
3986        * 2 components, which explodes spirv compilation even though it doesn't trigger validation errors
3987        *
3988        * to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing
3989        * - Warhammer 40k: Dawn of War III
3990        */
3991       unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type));
3992       unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord);
3993       unsigned coord_components = nir_src_num_components(tex->src[c].src);
3994       if (coord_components < needed_components) {
3995          nir_ssa_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
3996          nir_instr_rewrite_src_ssa(in, &tex->src[c].src, def);
3997          tex->coord_components = needed_components;
3998       }
3999       return true;
4000    }
4001    if (in->type != nir_instr_type_intrinsic)
4002       return false;
4003    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4004
4005    nir_intrinsic_op op;
4006 #define OP_SWAP(OP) \
4007    case nir_intrinsic_bindless_image_##OP: \
4008       op = nir_intrinsic_image_deref_##OP; \
4009       break;
4010
4011
4012    /* convert bindless intrinsics to deref intrinsics */
4013    switch (instr->intrinsic) {
4014    OP_SWAP(atomic_add)
4015    OP_SWAP(atomic_and)
4016    OP_SWAP(atomic_comp_swap)
4017    OP_SWAP(atomic_dec_wrap)
4018    OP_SWAP(atomic_exchange)
4019    OP_SWAP(atomic_fadd)
4020    OP_SWAP(atomic_fmax)
4021    OP_SWAP(atomic_fmin)
4022    OP_SWAP(atomic_imax)
4023    OP_SWAP(atomic_imin)
4024    OP_SWAP(atomic_inc_wrap)
4025    OP_SWAP(atomic_or)
4026    OP_SWAP(atomic_umax)
4027    OP_SWAP(atomic_umin)
4028    OP_SWAP(atomic_xor)
4029    OP_SWAP(format)
4030    OP_SWAP(load)
4031    OP_SWAP(order)
4032    OP_SWAP(samples)
4033    OP_SWAP(size)
4034    OP_SWAP(store)
4035    default:
4036       return false;
4037    }
4038
4039    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
4040    nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[3] : bindless->bindless[2];
4041    if (!var)
4042       var = create_bindless_image(b->shader, dim, bindless->bindless_set);
4043    instr->intrinsic = op;
4044    b->cursor = nir_before_instr(in);
4045    nir_deref_instr *deref = nir_build_deref_var(b, var);
4046    if (glsl_type_is_array(var->type))
4047       deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32));
4048    nir_instr_rewrite_src_ssa(in, &instr->src[0], &deref->dest.ssa);
4049    return true;
4050 }
4051
4052 static bool
4053 lower_bindless(nir_shader *shader, struct zink_bindless_info *bindless)
4054 {
4055    if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless))
4056       return false;
4057    nir_fixup_deref_modes(shader);
4058    NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4059    optimize_nir(shader, NULL);
4060    return true;
4061 }
4062
4063 /* convert shader image/texture io variables to int64 handles for bindless indexing */
4064 static bool
4065 lower_bindless_io_instr(nir_builder *b, nir_instr *in, void *data)
4066 {
4067    if (in->type != nir_instr_type_intrinsic)
4068       return false;
4069    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4070    if (instr->intrinsic != nir_intrinsic_load_deref &&
4071        instr->intrinsic != nir_intrinsic_store_deref)
4072       return false;
4073
4074    nir_deref_instr *src_deref = nir_src_as_deref(instr->src[0]);
4075    nir_variable *var = nir_deref_instr_get_variable(src_deref);
4076    if (var->data.bindless)
4077       return false;
4078    if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out)
4079       return false;
4080    if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type))
4081       return false;
4082
4083    var->type = glsl_int64_t_type();
4084    var->data.bindless = 1;
4085    b->cursor = nir_before_instr(in);
4086    nir_deref_instr *deref = nir_build_deref_var(b, var);
4087    if (instr->intrinsic == nir_intrinsic_load_deref) {
4088        nir_ssa_def *def = nir_load_deref(b, deref);
4089        nir_instr_rewrite_src_ssa(in, &instr->src[0], def);
4090        nir_ssa_def_rewrite_uses(&instr->dest.ssa, def);
4091    } else {
4092       nir_store_deref(b, deref, instr->src[1].ssa, nir_intrinsic_write_mask(instr));
4093    }
4094    nir_instr_remove(in);
4095    nir_instr_remove(&src_deref->instr);
4096    return true;
4097 }
4098
4099 static bool
4100 lower_bindless_io(nir_shader *shader)
4101 {
4102    return nir_shader_instructions_pass(shader, lower_bindless_io_instr, nir_metadata_dominance, NULL);
4103 }
4104
4105 static uint32_t
4106 zink_binding(gl_shader_stage stage, VkDescriptorType type, int index, bool compact_descriptors)
4107 {
4108    if (stage == MESA_SHADER_NONE) {
4109       unreachable("not supported");
4110    } else {
4111       unsigned base = stage;
4112       /* clamp compute bindings for better driver efficiency */
4113       if (gl_shader_stage_is_compute(stage))
4114          base = 0;
4115       switch (type) {
4116       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4117       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
4118          return base * 2 + !!index;
4119
4120       case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
4121          assert(stage == MESA_SHADER_KERNEL);
4122          FALLTHROUGH;
4123       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4124          if (stage == MESA_SHADER_KERNEL) {
4125             assert(index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
4126             return index + PIPE_MAX_SAMPLERS;
4127          }
4128          FALLTHROUGH;
4129       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4130          assert(index < PIPE_MAX_SAMPLERS);
4131          assert(stage != MESA_SHADER_KERNEL);
4132          return (base * PIPE_MAX_SAMPLERS) + index;
4133
4134       case VK_DESCRIPTOR_TYPE_SAMPLER:
4135          assert(index < PIPE_MAX_SAMPLERS);
4136          assert(stage == MESA_SHADER_KERNEL);
4137          return index;
4138
4139       case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4140          return base + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * 2));
4141
4142       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4143       case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4144          assert(index < ZINK_MAX_SHADER_IMAGES);
4145          if (stage == MESA_SHADER_KERNEL)
4146             return index + (compact_descriptors ? (PIPE_MAX_SAMPLERS + PIPE_MAX_SHADER_SAMPLER_VIEWS) : 0);
4147          return (base * ZINK_MAX_SHADER_IMAGES) + index + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * PIPE_MAX_SAMPLERS));
4148
4149       default:
4150          unreachable("unexpected type");
4151       }
4152    }
4153 }
4154
4155 static void
4156 handle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, struct zink_bindless_info *bindless)
4157 {
4158    if (glsl_type_is_struct(type)) {
4159       for (unsigned i = 0; i < glsl_get_length(type); i++)
4160          handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless);
4161       return;
4162    }
4163
4164    /* just a random scalar in a struct */
4165    if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type))
4166       return;
4167
4168    VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
4169    unsigned binding;
4170    switch (vktype) {
4171       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4172          binding = 0;
4173          break;
4174       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4175          binding = 1;
4176          break;
4177       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4178          binding = 2;
4179          break;
4180       case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4181          binding = 3;
4182          break;
4183       default:
4184          unreachable("unknown");
4185    }
4186    if (!bindless->bindless[binding]) {
4187       bindless->bindless[binding] = nir_variable_clone(var, nir);
4188       bindless->bindless[binding]->data.bindless = 0;
4189       bindless->bindless[binding]->data.descriptor_set = bindless->bindless_set;
4190       bindless->bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0);
4191       bindless->bindless[binding]->data.driver_location = bindless->bindless[binding]->data.binding = binding;
4192       if (!bindless->bindless[binding]->data.image.format)
4193          bindless->bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
4194       nir_shader_add_variable(nir, bindless->bindless[binding]);
4195    } else {
4196       assert(glsl_get_sampler_dim(glsl_without_array(bindless->bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type)));
4197    }
4198    var->data.mode = nir_var_shader_temp;
4199 }
4200
4201 static bool
4202 convert_1d_shadow_tex(nir_builder *b, nir_instr *instr, void *data)
4203 {
4204    struct zink_screen *screen = data;
4205    if (instr->type != nir_instr_type_tex)
4206       return false;
4207    nir_tex_instr *tex = nir_instr_as_tex(instr);
4208    if (tex->sampler_dim != GLSL_SAMPLER_DIM_1D || !tex->is_shadow)
4209       return false;
4210    if (tex->is_sparse && screen->need_2D_sparse) {
4211       /* no known case of this exists: only nvidia can hit it, and nothing uses it */
4212       mesa_loge("unhandled/unsupported 1D sparse texture!");
4213       abort();
4214    }
4215    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
4216    b->cursor = nir_before_instr(instr);
4217    tex->coord_components++;
4218    unsigned srcs[] = {
4219       nir_tex_src_coord,
4220       nir_tex_src_offset,
4221       nir_tex_src_ddx,
4222       nir_tex_src_ddy,
4223    };
4224    for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++) {
4225       unsigned c = nir_tex_instr_src_index(tex, srcs[i]);
4226       if (c == -1)
4227          continue;
4228       if (tex->src[c].src.ssa->num_components == tex->coord_components)
4229          continue;
4230       nir_ssa_def *def;
4231       nir_ssa_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size);
4232       if (tex->src[c].src.ssa->num_components == 1)
4233          def = nir_vec2(b, tex->src[c].src.ssa, zero);
4234       else
4235          def = nir_vec3(b, nir_channel(b, tex->src[c].src.ssa, 0), zero, nir_channel(b, tex->src[c].src.ssa, 1));
4236       nir_instr_rewrite_src_ssa(instr, &tex->src[c].src, def);
4237    }
4238    b->cursor = nir_after_instr(instr);
4239    unsigned needed_components = nir_tex_instr_dest_size(tex);
4240    unsigned num_components = tex->dest.ssa.num_components;
4241    if (needed_components > num_components) {
4242       tex->dest.ssa.num_components = needed_components;
4243       assert(num_components < 3);
4244       /* take either xz or just x since this is promoted to 2D from 1D */
4245       uint32_t mask = num_components == 2 ? (1|4) : 1;
4246       nir_ssa_def *dst = nir_channels(b, &tex->dest.ssa, mask);
4247       nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dst, dst->parent_instr);
4248    }
4249    return true;
4250 }
4251
4252 static bool
4253 lower_1d_shadow(nir_shader *shader, struct zink_screen *screen)
4254 {
4255    bool found = false;
4256    nir_foreach_variable_with_modes(var, shader, nir_var_uniform | nir_var_image) {
4257       const struct glsl_type *type = glsl_without_array(var->type);
4258       unsigned length = glsl_get_length(var->type);
4259       if (!glsl_type_is_sampler(type) || !glsl_sampler_type_is_shadow(type) || glsl_get_sampler_dim(type) != GLSL_SAMPLER_DIM_1D)
4260          continue;
4261       const struct glsl_type *sampler = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, true, glsl_sampler_type_is_array(type), glsl_get_sampler_result_type(type));
4262       var->type = type != var->type ? glsl_array_type(sampler, length, glsl_get_explicit_stride(var->type)) : sampler;
4263
4264       found = true;
4265    }
4266    if (found)
4267       nir_shader_instructions_pass(shader, convert_1d_shadow_tex, nir_metadata_dominance, screen);
4268    return found;
4269 }
4270
4271 static void
4272 scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
4273 {
4274    nir_foreach_function(function, shader) {
4275       if (!function->impl)
4276          continue;
4277       nir_foreach_block_safe(block, function->impl) {
4278          nir_foreach_instr_safe(instr, block) {
4279             if (instr->type == nir_instr_type_tex) {
4280                nir_tex_instr *tex = nir_instr_as_tex(instr);
4281                zs->sinfo.have_sparse |= tex->is_sparse;
4282             }
4283             if (instr->type != nir_instr_type_intrinsic)
4284                continue;
4285             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4286             if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4287                 intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4288                 intr->intrinsic == nir_intrinsic_image_deref_store ||
4289                 intr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
4290                 intr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
4291                 intr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
4292                 intr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
4293                 intr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
4294                 intr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
4295                 intr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
4296                 intr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
4297                 intr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
4298                 intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
4299                 intr->intrinsic == nir_intrinsic_image_deref_atomic_fadd ||
4300                 intr->intrinsic == nir_intrinsic_image_deref_size ||
4301                 intr->intrinsic == nir_intrinsic_image_deref_samples ||
4302                 intr->intrinsic == nir_intrinsic_image_deref_format ||
4303                 intr->intrinsic == nir_intrinsic_image_deref_order) {
4304
4305                 nir_variable *var =
4306                    nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
4307
4308                 /* Structs have been lowered already, so get_aoa_size is sufficient. */
4309                 const unsigned size =
4310                    glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
4311                 BITSET_SET_RANGE(shader->info.images_used, var->data.binding,
4312                                  var->data.binding + (MAX2(size, 1) - 1));
4313             }
4314             if (intr->intrinsic == nir_intrinsic_is_sparse_texels_resident ||
4315                 intr->intrinsic == nir_intrinsic_image_deref_sparse_load)
4316                zs->sinfo.have_sparse = true;
4317
4318             static bool warned = false;
4319             if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) {
4320                switch (intr->intrinsic) {
4321                case nir_intrinsic_image_deref_atomic_add: {
4322                   nir_variable *var = nir_intrinsic_get_var(intr, 0);
4323                   if (util_format_is_float(var->data.image.format))
4324                      fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n");
4325                   break;
4326                }
4327                default:
4328                   break;
4329                }
4330             }
4331          }
4332       }
4333    }
4334 }
4335
4336 static bool
4337 is_residency_code(nir_ssa_def *src)
4338 {
4339    nir_instr *parent = src->parent_instr;
4340    while (1) {
4341       if (parent->type == nir_instr_type_intrinsic) {
4342          ASSERTED nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
4343          assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
4344          return false;
4345       }
4346       if (parent->type == nir_instr_type_tex)
4347          return true;
4348       assert(parent->type == nir_instr_type_alu);
4349       nir_alu_instr *alu = nir_instr_as_alu(parent);
4350       parent = alu->src[0].src.ssa->parent_instr;
4351    }
4352 }
4353
4354 static bool
4355 lower_sparse_instr(nir_builder *b, nir_instr *in, void *data)
4356 {
4357    if (in->type != nir_instr_type_intrinsic)
4358       return false;
4359    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4360    if (instr->intrinsic == nir_intrinsic_sparse_residency_code_and) {
4361       b->cursor = nir_before_instr(&instr->instr);
4362       nir_ssa_def *src0;
4363       if (is_residency_code(instr->src[0].ssa))
4364          src0 = nir_is_sparse_texels_resident(b, 1, instr->src[0].ssa);
4365       else
4366          src0 = instr->src[0].ssa;
4367       nir_ssa_def *src1;
4368       if (is_residency_code(instr->src[1].ssa))
4369          src1 = nir_is_sparse_texels_resident(b, 1, instr->src[1].ssa);
4370       else
4371          src1 = instr->src[1].ssa;
4372       nir_ssa_def *def = nir_iand(b, src0, src1);
4373       nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, def, in);
4374       nir_instr_remove(in);
4375       return true;
4376    }
4377    if (instr->intrinsic != nir_intrinsic_is_sparse_texels_resident)
4378       return false;
4379
4380    /* vulkan vec can only be a vec4, but this is (maybe) vec5,
4381     * so just rewrite as the first component since ntv is going to use a different
4382     * method for storing the residency value anyway
4383     */
4384    b->cursor = nir_before_instr(&instr->instr);
4385    nir_instr *parent = instr->src[0].ssa->parent_instr;
4386    if (is_residency_code(instr->src[0].ssa)) {
4387       assert(parent->type == nir_instr_type_alu);
4388       nir_alu_instr *alu = nir_instr_as_alu(parent);
4389       nir_ssa_def_rewrite_uses_after(instr->src[0].ssa, nir_channel(b, alu->src[0].src.ssa, 0), parent);
4390       nir_instr_remove(parent);
4391    } else {
4392       nir_ssa_def *src;
4393       if (parent->type == nir_instr_type_intrinsic) {
4394          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
4395          assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
4396          src = intr->src[0].ssa;
4397       } else {
4398          assert(parent->type == nir_instr_type_alu);
4399          nir_alu_instr *alu = nir_instr_as_alu(parent);
4400          src = alu->src[0].src.ssa;
4401       }
4402       if (instr->dest.ssa.bit_size != 32) {
4403          if (instr->dest.ssa.bit_size == 1)
4404             src = nir_ieq_imm(b, src, 1);
4405          else
4406             src = nir_u2uN(b, src, instr->dest.ssa.bit_size);
4407       }
4408       nir_ssa_def_rewrite_uses(&instr->dest.ssa, src);
4409       nir_instr_remove(in);
4410    }
4411    return true;
4412 }
4413
4414 static bool
4415 lower_sparse(nir_shader *shader)
4416 {
4417    return nir_shader_instructions_pass(shader, lower_sparse_instr, nir_metadata_dominance, NULL);
4418 }
4419
4420 static bool
4421 match_tex_dests_instr(nir_builder *b, nir_instr *in, void *data)
4422 {
4423    if (in->type != nir_instr_type_tex)
4424       return false;
4425    nir_tex_instr *tex = nir_instr_as_tex(in);
4426    if (tex->op == nir_texop_txs || tex->op == nir_texop_lod)
4427       return false;
4428    int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4429    nir_variable *var = NULL;
4430    if (handle != -1) {
4431       var = nir_deref_instr_get_variable(nir_src_as_deref(tex->src[handle].src));
4432    } else {
4433       nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
4434          if (glsl_type_is_sampler(glsl_without_array(img->type))) {
4435             unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
4436             if (tex->texture_index >= img->data.driver_location &&
4437                 tex->texture_index < img->data.driver_location + size) {
4438                var = img;
4439                break;
4440             }
4441          }
4442       }
4443    }
4444    return !!rewrite_tex_dest(b, tex, var, data);
4445 }
4446
4447 static bool
4448 match_tex_dests(nir_shader *shader, struct zink_shader *zs)
4449 {
4450    return nir_shader_instructions_pass(shader, match_tex_dests_instr, nir_metadata_dominance, zs);
4451 }
4452
4453 static bool
4454 split_bitfields_instr(nir_builder *b, nir_instr *in, void *data)
4455 {
4456    if (in->type != nir_instr_type_alu)
4457       return false;
4458    nir_alu_instr *alu = nir_instr_as_alu(in);
4459    switch (alu->op) {
4460    case nir_op_ubitfield_extract:
4461    case nir_op_ibitfield_extract:
4462    case nir_op_bitfield_insert:
4463       break;
4464    default:
4465       return false;
4466    }
4467    unsigned num_components = nir_dest_num_components(alu->dest.dest);
4468    if (num_components == 1)
4469       return false;
4470    b->cursor = nir_before_instr(in);
4471    nir_ssa_def *dests[NIR_MAX_VEC_COMPONENTS];
4472    for (unsigned i = 0; i < num_components; i++) {
4473       if (alu->op == nir_op_bitfield_insert)
4474          dests[i] = nir_bitfield_insert(b,
4475                                         nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4476                                         nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4477                                         nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]),
4478                                         nir_channel(b, alu->src[3].src.ssa, alu->src[3].swizzle[i]));
4479       else if (alu->op == nir_op_ubitfield_extract)
4480          dests[i] = nir_ubitfield_extract(b,
4481                                           nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4482                                           nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4483                                           nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4484       else
4485          dests[i] = nir_ibitfield_extract(b,
4486                                           nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4487                                           nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4488                                           nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4489    }
4490    nir_ssa_def *dest = nir_vec(b, dests, num_components);
4491    nir_ssa_def_rewrite_uses_after(&alu->dest.dest.ssa, dest, in);
4492    nir_instr_remove(in);
4493    return true;
4494 }
4495
4496
4497 static bool
4498 split_bitfields(nir_shader *shader)
4499 {
4500    return nir_shader_instructions_pass(shader, split_bitfields_instr, nir_metadata_dominance, NULL);
4501 }
4502
4503 static void
4504 rewrite_cl_derefs(nir_shader *nir, nir_variable *var)
4505 {
4506    nir_foreach_function(function, nir) {
4507       nir_foreach_block(block, function->impl) {
4508          nir_foreach_instr_safe(instr, block) {
4509             if (instr->type != nir_instr_type_deref)
4510                continue;
4511             nir_deref_instr *deref = nir_instr_as_deref(instr);
4512             nir_variable *img = nir_deref_instr_get_variable(deref);
4513             if (img != var)
4514                continue;
4515             if (glsl_type_is_array(var->type)) {
4516                if (deref->deref_type == nir_deref_type_array)
4517                   deref->type = glsl_without_array(var->type);
4518                else
4519                   deref->type = var->type;
4520             } else {
4521                deref->type = var->type;
4522             }
4523          }
4524       }
4525    }
4526 }
4527
4528 static void
4529 type_image(nir_shader *nir, nir_variable *var)
4530 {
4531    nir_foreach_function(function, nir) {
4532       nir_foreach_block(block, function->impl) {
4533          nir_foreach_instr_safe(instr, block) {
4534             if (instr->type != nir_instr_type_intrinsic)
4535                continue;
4536             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4537             if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4538                intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4539                intr->intrinsic == nir_intrinsic_image_deref_store ||
4540                intr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
4541                intr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
4542                intr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
4543                intr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
4544                intr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
4545                intr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
4546                intr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
4547                intr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
4548                intr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
4549                intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
4550                intr->intrinsic == nir_intrinsic_image_deref_atomic_fadd ||
4551                intr->intrinsic == nir_intrinsic_image_deref_samples ||
4552                intr->intrinsic == nir_intrinsic_image_deref_format ||
4553                intr->intrinsic == nir_intrinsic_image_deref_order) {
4554                nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4555                nir_variable *img = nir_deref_instr_get_variable(deref);
4556                if (img != var)
4557                   continue;
4558                nir_alu_type alu_type = nir_intrinsic_src_type(intr);
4559                const struct glsl_type *type = glsl_without_array(var->type);
4560                if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
4561                   assert(glsl_get_sampler_result_type(type) == nir_get_glsl_base_type_for_nir_type(alu_type));
4562                   continue;
4563                }
4564                const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
4565                if (glsl_type_is_array(var->type))
4566                   img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
4567                var->type = img_type;
4568                rewrite_cl_derefs(nir, var);
4569                return;
4570             }
4571          }
4572       }
4573    }
4574    nir_foreach_function(function, nir) {
4575       nir_foreach_block(block, function->impl) {
4576          nir_foreach_instr_safe(instr, block) {
4577             if (instr->type != nir_instr_type_intrinsic)
4578                continue;
4579             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4580             if (intr->intrinsic != nir_intrinsic_image_deref_size)
4581                continue;
4582             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4583             nir_variable *img = nir_deref_instr_get_variable(deref);
4584             if (img != var)
4585                continue;
4586             nir_alu_type alu_type = nir_type_uint32;
4587             const struct glsl_type *type = glsl_without_array(var->type);
4588             if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
4589                continue;
4590             }
4591             const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
4592             if (glsl_type_is_array(var->type))
4593                img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
4594             var->type = img_type;
4595             rewrite_cl_derefs(nir, var);
4596             return;
4597          }
4598       }
4599    }
4600    var->data.mode = nir_var_shader_temp;
4601 }
4602
4603 static nir_variable *
4604 find_sampler_var(nir_shader *nir, unsigned texture_index)
4605 {
4606    nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
4607       unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4608       if ((glsl_type_is_texture(glsl_without_array(var->type)) || glsl_type_is_sampler(glsl_without_array(var->type))) &&
4609           (var->data.binding == texture_index || (var->data.binding < texture_index && var->data.binding + size > texture_index)))
4610          return var;
4611    }
4612    return NULL;
4613 }
4614
4615 static bool
4616 type_sampler_vars(nir_shader *nir, unsigned *sampler_mask)
4617 {
4618    bool progress = false;
4619    nir_foreach_function(function, nir) {
4620       nir_foreach_block(block, function->impl) {
4621          nir_foreach_instr(instr, block) {
4622             if (instr->type != nir_instr_type_tex)
4623                continue;
4624             nir_tex_instr *tex = nir_instr_as_tex(instr);
4625             switch (tex->op) {
4626             case nir_texop_lod:
4627             case nir_texop_txs:
4628             case nir_texop_query_levels:
4629             case nir_texop_texture_samples:
4630             case nir_texop_samples_identical:
4631                continue;
4632             default:
4633                break;
4634             }
4635             *sampler_mask |= BITFIELD_BIT(tex->sampler_index);
4636             nir_variable *var = find_sampler_var(nir, tex->texture_index);
4637             assert(var);
4638             if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID)
4639                continue;
4640             const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
4641             unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4642             if (size > 1)
4643                img_type = glsl_array_type(img_type, size, 0);
4644             var->type = img_type;
4645             progress = true;
4646          }
4647       }
4648    }
4649    nir_foreach_function(function, nir) {
4650       nir_foreach_block(block, function->impl) {
4651          nir_foreach_instr(instr, block) {
4652             if (instr->type != nir_instr_type_tex)
4653                continue;
4654             nir_tex_instr *tex = nir_instr_as_tex(instr);
4655             switch (tex->op) {
4656             case nir_texop_lod:
4657             case nir_texop_txs:
4658             case nir_texop_query_levels:
4659             case nir_texop_texture_samples:
4660             case nir_texop_samples_identical:
4661                break;
4662             default:
4663                continue;
4664             }
4665             *sampler_mask |= BITFIELD_BIT(tex->sampler_index);
4666             nir_variable *var = find_sampler_var(nir, tex->texture_index);
4667             assert(var);
4668             if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID)
4669                continue;
4670             const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
4671             unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4672             if (size > 1)
4673                img_type = glsl_array_type(img_type, size, 0);
4674             var->type = img_type;
4675             progress = true;
4676          }
4677       }
4678    }
4679    return progress;
4680 }
4681
4682 static bool
4683 delete_samplers(nir_shader *nir)
4684 {
4685    bool progress = false;
4686    nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
4687       if (glsl_type_is_sampler(glsl_without_array(var->type))) {
4688          var->data.mode = nir_var_shader_temp;
4689          progress = true;
4690       }
4691    }
4692    return progress;
4693 }
4694
4695 static bool
4696 type_images(nir_shader *nir, unsigned *sampler_mask)
4697 {
4698    bool progress = false;
4699    progress |= delete_samplers(nir);
4700    progress |= type_sampler_vars(nir, sampler_mask);
4701    nir_foreach_variable_with_modes(var, nir, nir_var_image) {
4702       type_image(nir, var);
4703       progress = true;
4704    }
4705    return progress;
4706 }
4707
4708 /* attempt to assign io for separate shaders */
4709 static bool
4710 fixup_io_locations(nir_shader *nir)
4711 {
4712    nir_variable_mode mode = nir->info.stage == MESA_SHADER_FRAGMENT ? nir_var_shader_in : nir_var_shader_out;
4713    /* i/o interface blocks are required to be EXACT matches between stages:
4714     * iterate over all locations and set locations incrementally
4715     */
4716    unsigned slot = 0;
4717    for (unsigned i = 0; i < VARYING_SLOT_MAX; i++) {
4718       if (nir_slot_is_sysval_output(i, MESA_SHADER_NONE))
4719          continue;
4720       nir_variable *var = nir_find_variable_with_location(nir, mode, i);
4721       if (!var) {
4722          /* locations used between stages are not required to be contiguous */
4723          if (i >= VARYING_SLOT_VAR0)
4724             slot++;
4725          continue;
4726       }
4727       unsigned size;
4728       /* ensure variable is given enough slots */
4729       if (nir_is_arrayed_io(var, nir->info.stage))
4730          size = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
4731       else
4732          size = glsl_count_vec4_slots(var->type, false, false);
4733       var->data.driver_location = slot;
4734       slot += size;
4735       /* ensure the consumed slots aren't double iterated */
4736       i += size - 1;
4737    }
4738    return true;
4739 }
4740
4741 static uint32_t
4742 zink_flat_flags(struct nir_shader *shader)
4743 {
4744    uint32_t flat_flags = 0, c = 0;
4745    nir_foreach_shader_in_variable(var, shader) {
4746       if (var->data.interpolation == INTERP_MODE_FLAT)
4747          flat_flags |= 1u << (c++);
4748    }
4749
4750    return flat_flags;
4751 }
4752
4753 struct zink_shader *
4754 zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
4755                    const struct pipe_stream_output_info *so_info)
4756 {
4757    struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
4758    bool have_psiz = false;
4759
4760    ret->has_edgeflags = nir->info.stage == MESA_SHADER_VERTEX &&
4761                         nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_EDGE);
4762
4763    ret->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model;
4764    ret->sinfo.bindless_set_idx = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
4765
4766    util_queue_fence_init(&ret->precompile.fence);
4767    util_dynarray_init(&ret->pipeline_libs, ret);
4768    ret->hash = _mesa_hash_pointer(ret);
4769
4770    ret->programs = _mesa_pointer_set_create(NULL);
4771    simple_mtx_init(&ret->lock, mtx_plain);
4772
4773    nir_variable_mode indirect_derefs_modes = 0;
4774    if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
4775        nir->info.stage == MESA_SHADER_TESS_EVAL)
4776       indirect_derefs_modes |= nir_var_shader_in | nir_var_shader_out;
4777
4778    NIR_PASS_V(nir, nir_lower_indirect_derefs, indirect_derefs_modes,
4779               UINT32_MAX);
4780
4781    if (nir->info.stage < MESA_SHADER_COMPUTE)
4782       create_gfx_pushconst(nir);
4783
4784    if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
4785             nir->info.stage == MESA_SHADER_TESS_EVAL)
4786       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
4787
4788    if (nir->info.stage < MESA_SHADER_FRAGMENT)
4789       have_psiz = check_psiz(nir);
4790    if (nir->info.stage == MESA_SHADER_FRAGMENT)
4791       ret->flat_flags = zink_flat_flags(nir);
4792
4793    if (!gl_shader_stage_is_compute(nir->info.stage) && nir->info.separate_shader)
4794       NIR_PASS_V(nir, fixup_io_locations);
4795
4796    NIR_PASS_V(nir, lower_basevertex);
4797    NIR_PASS_V(nir, nir_lower_regs_to_ssa);
4798    NIR_PASS_V(nir, lower_baseinstance);
4799    NIR_PASS_V(nir, lower_sparse);
4800    NIR_PASS_V(nir, split_bitfields);
4801    NIR_PASS_V(nir, nir_lower_frexp); /* TODO: Use the spirv instructions for this. */
4802
4803    if (screen->info.have_EXT_shader_demote_to_helper_invocation) {
4804       NIR_PASS_V(nir, nir_lower_discard_or_demote,
4805                  screen->driconf.glsl_correct_derivatives_after_discard ||
4806                  nir->info.use_legacy_math_rules);
4807    }
4808
4809    if (screen->need_2D_zs)
4810       NIR_PASS_V(nir, lower_1d_shadow, screen);
4811
4812    {
4813       nir_lower_subgroups_options subgroup_options = {0};
4814       subgroup_options.lower_to_scalar = true;
4815       subgroup_options.subgroup_size = screen->info.props11.subgroupSize;
4816       subgroup_options.ballot_bit_size = 32;
4817       subgroup_options.ballot_components = 4;
4818       subgroup_options.lower_subgroup_masks = true;
4819       if (!(screen->info.subgroup.supportedStages & mesa_to_vk_shader_stage(clamp_stage(&nir->info)))) {
4820          subgroup_options.subgroup_size = 1;
4821          subgroup_options.lower_vote_trivial = true;
4822       }
4823       NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
4824    }
4825
4826    if (so_info && so_info->num_outputs)
4827       NIR_PASS_V(nir, split_blocks);
4828
4829    optimize_nir(nir, NULL);
4830    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
4831    NIR_PASS_V(nir, nir_lower_discard_if, (nir_lower_discard_if_to_cf |
4832                                           nir_lower_demote_if_to_cf |
4833                                           nir_lower_terminate_if_to_cf));
4834    NIR_PASS_V(nir, nir_lower_fragcolor,
4835          nir->info.fs.color_is_dual_source ? 1 : 8);
4836    NIR_PASS_V(nir, lower_64bit_vertex_attribs);
4837    bool needs_size = analyze_io(ret, nir);
4838    NIR_PASS_V(nir, unbreak_bos, ret, needs_size);
4839    /* run in compile if there could be inlined uniforms */
4840    if (!screen->driconf.inline_uniforms && !nir->info.num_inlinable_uniforms) {
4841       NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
4842       NIR_PASS_V(nir, rewrite_bo_access, screen);
4843       NIR_PASS_V(nir, remove_bo_access, ret);
4844    }
4845
4846    if (zink_debug & ZINK_DEBUG_NIR) {
4847       fprintf(stderr, "NIR shader:\n---8<---\n");
4848       nir_print_shader(nir, stderr);
4849       fprintf(stderr, "---8<---\n");
4850    }
4851
4852    struct zink_bindless_info bindless = {0};
4853    bindless.bindless_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
4854    bool has_bindless_io = false;
4855    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
4856       var->data.is_xfb = false;
4857       if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
4858          has_bindless_io = true;
4859          break;
4860       }
4861    }
4862    if (has_bindless_io)
4863       NIR_PASS_V(nir, lower_bindless_io);
4864
4865    optimize_nir(nir, NULL);
4866    prune_io(nir);
4867
4868    scan_nir(screen, nir, ret);
4869    unsigned sampler_mask = 0;
4870    if (nir->info.stage == MESA_SHADER_KERNEL) {
4871       NIR_PASS_V(nir, type_images, &sampler_mask);
4872       enum zink_descriptor_type ztype = ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW;
4873       VkDescriptorType vktype = VK_DESCRIPTOR_TYPE_SAMPLER;
4874       u_foreach_bit(s, sampler_mask) {
4875          ret->bindings[ztype][ret->num_bindings[ztype]].index = s;
4876          ret->bindings[ztype][ret->num_bindings[ztype]].binding = zink_binding(MESA_SHADER_KERNEL, vktype, s, screen->compact_descriptors);
4877          ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
4878          ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
4879          ret->num_bindings[ztype]++;
4880       }
4881       ret->sinfo.sampler_mask = sampler_mask;
4882    }
4883
4884    unsigned ubo_binding_mask = 0;
4885    unsigned ssbo_binding_mask = 0;
4886    foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) {
4887       if (_nir_shader_variable_has_mode(var, nir_var_uniform |
4888                                         nir_var_image |
4889                                         nir_var_mem_ubo |
4890                                         nir_var_mem_ssbo)) {
4891          enum zink_descriptor_type ztype;
4892          const struct glsl_type *type = glsl_without_array(var->type);
4893          if (var->data.mode == nir_var_mem_ubo) {
4894             ztype = ZINK_DESCRIPTOR_TYPE_UBO;
4895             /* buffer 0 is a push descriptor */
4896             var->data.descriptor_set = !!var->data.driver_location;
4897             var->data.binding = !var->data.driver_location ? clamp_stage(&nir->info) :
4898                                 zink_binding(nir->info.stage,
4899                                              VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
4900                                              var->data.driver_location,
4901                                              screen->compact_descriptors);
4902             assert(var->data.driver_location || var->data.binding < 10);
4903             VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
4904             int binding = var->data.binding;
4905
4906             if (!var->data.driver_location) {
4907                ret->has_uniforms = true;
4908             } else if (!(ubo_binding_mask & BITFIELD_BIT(binding))) {
4909                ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
4910                ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding;
4911                ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
4912                ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
4913                assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
4914                ret->num_bindings[ztype]++;
4915                ubo_binding_mask |= BITFIELD_BIT(binding);
4916             }
4917          } else if (var->data.mode == nir_var_mem_ssbo) {
4918             ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
4919             var->data.descriptor_set = screen->desc_set_id[ztype];
4920             var->data.binding = zink_binding(nir->info.stage,
4921                                              VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
4922                                              var->data.driver_location,
4923                                              screen->compact_descriptors);
4924             if (!(ssbo_binding_mask & BITFIELD_BIT(var->data.binding))) {
4925                ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
4926                ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
4927                ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
4928                ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
4929                assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
4930                ret->num_bindings[ztype]++;
4931                ssbo_binding_mask |= BITFIELD_BIT(var->data.binding);
4932             }
4933          } else {
4934             assert(var->data.mode == nir_var_uniform ||
4935                    var->data.mode == nir_var_image);
4936             if (var->data.bindless) {
4937                ret->bindless = true;
4938                handle_bindless_var(nir, var, type, &bindless);
4939             } else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
4940                VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
4941                if (nir->info.stage == MESA_SHADER_KERNEL && vktype == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
4942                   vktype = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
4943                ztype = zink_desc_type_from_vktype(vktype);
4944                if (vktype == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)
4945                   ret->num_texel_buffers++;
4946                var->data.driver_location = var->data.binding;
4947                var->data.descriptor_set = screen->desc_set_id[ztype];
4948                var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location, screen->compact_descriptors);
4949                ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
4950                ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
4951                ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
4952                if (glsl_type_is_array(var->type))
4953                   ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_aoa_size(var->type);
4954                else
4955                   ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
4956                ret->num_bindings[ztype]++;
4957             } else if (var->data.mode == nir_var_uniform) {
4958                /* this is a dead uniform */
4959                var->data.mode = 0;
4960                exec_node_remove(&var->node);
4961             }
4962          }
4963       }
4964    }
4965    bool bindless_lowered = false;
4966    NIR_PASS(bindless_lowered, nir, lower_bindless, &bindless);
4967    ret->bindless |= bindless_lowered;
4968
4969    if (!screen->info.feats.features.shaderInt64 || !screen->info.feats.features.shaderFloat64)
4970       NIR_PASS_V(nir, lower_64bit_vars, screen->info.feats.features.shaderInt64);
4971    if (nir->info.stage != MESA_SHADER_KERNEL)
4972       NIR_PASS_V(nir, match_tex_dests, ret);
4973
4974    if (!nir->info.internal)
4975       nir_foreach_shader_out_variable(var, nir)
4976          var->data.explicit_xfb_buffer = 0;
4977    if (so_info && so_info->num_outputs)
4978       update_so_info(ret, nir, so_info, nir->info.outputs_written, have_psiz);
4979    else if (have_psiz) {
4980       bool have_fake_psiz = false;
4981       nir_variable *psiz = NULL;
4982       nir_foreach_shader_out_variable(var, nir) {
4983          if (var->data.location == VARYING_SLOT_PSIZ) {
4984             if (!var->data.explicit_location)
4985                have_fake_psiz = true;
4986             else
4987                psiz = var;
4988          }
4989       }
4990       if (have_fake_psiz && psiz) {
4991          psiz->data.mode = nir_var_shader_temp;
4992          nir_fixup_deref_modes(nir);
4993          NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4994       }
4995    }
4996    zink_shader_serialize_blob(nir, &ret->blob);
4997    memcpy(&ret->info, &nir->info, sizeof(nir->info));
4998
4999    ret->can_inline = true;
5000
5001    return ret;
5002 }
5003
5004 char *
5005 zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr)
5006 {
5007    struct zink_screen *screen = zink_screen(pscreen);
5008    nir_shader *nir = nirptr;
5009
5010    nir_lower_tex_options tex_opts = {
5011       .lower_invalid_implicit_lod = true,
5012    };
5013    /*
5014       Sampled Image must be an object whose type is OpTypeSampledImage.
5015       The Dim operand of the underlying OpTypeImage must be 1D, 2D, 3D,
5016       or Rect, and the Arrayed and MS operands must be 0.
5017       - SPIRV, OpImageSampleProj* opcodes
5018     */
5019    tex_opts.lower_txp = BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) |
5020                         BITFIELD_BIT(GLSL_SAMPLER_DIM_MS);
5021    tex_opts.lower_txp_array = true;
5022    if (!screen->info.feats.features.shaderImageGatherExtended)
5023       tex_opts.lower_tg4_offsets = true;
5024    NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
5025    optimize_nir(nir, NULL);
5026    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
5027    if (screen->driconf.inline_uniforms)
5028       nir_find_inlinable_uniforms(nir);
5029
5030    return NULL;
5031 }
5032
5033 void
5034 zink_shader_free(struct zink_screen *screen, struct zink_shader *shader)
5035 {
5036    _mesa_set_destroy(shader->programs, NULL);
5037    util_queue_fence_wait(&shader->precompile.fence);
5038    util_queue_fence_destroy(&shader->precompile.fence);
5039    zink_descriptor_shader_deinit(screen, shader);
5040    if (screen->info.have_EXT_shader_object) {
5041       VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.obj.obj, NULL);
5042    } else {
5043       if (shader->precompile.obj.mod)
5044          VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.obj.mod, NULL);
5045       if (shader->precompile.gpl)
5046          VKSCR(DestroyPipeline)(screen->dev, shader->precompile.gpl, NULL);
5047    }
5048    blob_finish(&shader->blob);
5049    ralloc_free(shader->spirv);
5050    free(shader->precompile.bindings);
5051    ralloc_free(shader);
5052 }
5053
5054 void
5055 zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader)
5056 {
5057    assert(shader->info.stage != MESA_SHADER_COMPUTE);
5058    util_queue_fence_wait(&shader->precompile.fence);
5059    set_foreach(shader->programs, entry) {
5060       struct zink_gfx_program *prog = (void*)entry->key;
5061       gl_shader_stage stage = shader->info.stage;
5062       assert(stage < ZINK_GFX_SHADER_COUNT);
5063       unsigned stages_present = prog->stages_present;
5064       if (prog->shaders[MESA_SHADER_TESS_CTRL] &&
5065             prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated)
5066          stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
5067       unsigned idx = zink_program_cache_stages(stages_present);
5068       if (!prog->base.removed && prog->stages_present == prog->stages_remaining &&
5069           (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated)) {
5070          struct hash_table *ht = &prog->ctx->program_cache[idx];
5071          simple_mtx_lock(&prog->ctx->program_lock[idx]);
5072          struct hash_entry *he = _mesa_hash_table_search(ht, prog->shaders);
5073          assert(he && he->data == prog);
5074          _mesa_hash_table_remove(ht, he);
5075          prog->base.removed = true;
5076          simple_mtx_unlock(&prog->ctx->program_lock[idx]);
5077          util_queue_fence_wait(&prog->base.cache_fence);
5078
5079          for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
5080             for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
5081                hash_table_foreach(&prog->pipelines[r][i], entry) {
5082                   struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data;
5083
5084                   util_queue_fence_wait(&pc_entry->fence);
5085                }
5086             }
5087          }
5088
5089       }
5090       while (util_dynarray_contains(&shader->pipeline_libs, struct zink_gfx_lib_cache*)) {
5091          struct zink_gfx_lib_cache *libs = util_dynarray_pop(&shader->pipeline_libs, struct zink_gfx_lib_cache*);
5092          //this condition is equivalent to verifying that, for each bit stages_present_i in stages_present,
5093          //stages_present_i implies libs->stages_present_i
5094          if ((stages_present & ~(libs->stages_present & stages_present)) != 0)
5095             continue;
5096          if (!libs->removed) {
5097             libs->removed = true;
5098             simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
5099             _mesa_set_remove_key(&screen->pipeline_libs[idx], libs);
5100             simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
5101          }
5102          zink_gfx_lib_cache_unref(screen, libs);
5103       }
5104       if (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated) {
5105          prog->shaders[stage] = NULL;
5106          prog->stages_remaining &= ~BITFIELD_BIT(stage);
5107       }
5108       /* only remove generated tcs during parent tes destruction */
5109       if (stage == MESA_SHADER_TESS_EVAL && shader->non_fs.generated_tcs)
5110          prog->shaders[MESA_SHADER_TESS_CTRL] = NULL;
5111       if (stage != MESA_SHADER_FRAGMENT &&
5112           prog->shaders[MESA_SHADER_GEOMETRY] &&
5113           prog->shaders[MESA_SHADER_GEOMETRY]->non_fs.parent ==
5114           shader) {
5115          prog->shaders[MESA_SHADER_GEOMETRY] = NULL;
5116       }
5117       zink_gfx_program_reference(screen, &prog, NULL);
5118    }
5119    if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
5120        shader->non_fs.generated_tcs) {
5121       /* automatically destroy generated tcs shaders when tes is destroyed */
5122       zink_gfx_shader_free(screen, shader->non_fs.generated_tcs);
5123       shader->non_fs.generated_tcs = NULL;
5124    }
5125    for (unsigned int i = 0; i < ARRAY_SIZE(shader->non_fs.generated_gs); i++) {
5126       for (int j = 0; j < ARRAY_SIZE(shader->non_fs.generated_gs[0]); j++) {
5127          if (shader->info.stage != MESA_SHADER_FRAGMENT &&
5128              shader->non_fs.generated_gs[i][j]) {
5129             /* automatically destroy generated gs shaders when owner is destroyed */
5130             zink_gfx_shader_free(screen, shader->non_fs.generated_gs[i][j]);
5131             shader->non_fs.generated_gs[i][j] = NULL;
5132          }
5133       }
5134    }
5135    zink_shader_free(screen, shader);
5136 }
5137
5138
5139 struct zink_shader_object
5140 zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices)
5141 {
5142    assert(zs->info.stage == MESA_SHADER_TESS_CTRL);
5143    /* shortcut all the nir passes since we just have to change this one word */
5144    zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
5145    return zink_shader_spirv_compile(screen, zs, NULL, false);
5146 }
5147
5148 /* creating a passthrough tcs shader that's roughly:
5149
5150 #version 150
5151 #extension GL_ARB_tessellation_shader : require
5152
5153 in vec4 some_var[gl_MaxPatchVertices];
5154 out vec4 some_var_out;
5155
5156 layout(push_constant) uniform tcsPushConstants {
5157     layout(offset = 0) float TessLevelInner[2];
5158     layout(offset = 8) float TessLevelOuter[4];
5159 } u_tcsPushConstants;
5160 layout(vertices = $vertices_per_patch) out;
5161 void main()
5162 {
5163   gl_TessLevelInner = u_tcsPushConstants.TessLevelInner;
5164   gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter;
5165   some_var_out = some_var[gl_InvocationID];
5166 }
5167
5168 */
5169 struct zink_shader *
5170 zink_shader_tcs_create(struct zink_screen *screen, nir_shader *tes, unsigned vertices_per_patch, nir_shader **nir_ret)
5171 {
5172    struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
5173    util_queue_fence_init(&ret->precompile.fence);
5174    ret->hash = _mesa_hash_pointer(ret);
5175    ret->programs = _mesa_pointer_set_create(NULL);
5176    simple_mtx_init(&ret->lock, mtx_plain);
5177
5178    nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &screen->nir_options, NULL);
5179    nir_function *fn = nir_function_create(nir, "main");
5180    fn->is_entrypoint = true;
5181    nir_function_impl *impl = nir_function_impl_create(fn);
5182
5183    nir_builder b;
5184    nir_builder_init(&b, impl);
5185    b.cursor = nir_before_block(nir_start_block(impl));
5186
5187    nir_ssa_def *invocation_id = nir_load_invocation_id(&b);
5188
5189    nir_foreach_shader_in_variable(var, tes) {
5190       if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
5191          continue;
5192       const struct glsl_type *in_type = var->type;
5193       const struct glsl_type *out_type = var->type;
5194       char buf[1024];
5195       snprintf(buf, sizeof(buf), "%s_out", var->name);
5196       if (!nir_is_arrayed_io(var, MESA_SHADER_TESS_EVAL)) {
5197          const struct glsl_type *type = var->type;
5198          in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
5199          out_type = glsl_array_type(type, vertices_per_patch, 0);
5200       }
5201
5202       nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
5203       nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
5204       out->data.location = in->data.location = var->data.location;
5205       out->data.location_frac = in->data.location_frac = var->data.location_frac;
5206
5207       /* gl_in[] receives values from equivalent built-in output
5208          variables written by the vertex shader (section 2.14.7).  Each array
5209          element of gl_in[] is a structure holding values for a specific vertex of
5210          the input patch.  The length of gl_in[] is equal to the
5211          implementation-dependent maximum patch size (gl_MaxPatchVertices).
5212          - ARB_tessellation_shader
5213        */
5214       /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
5215       nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
5216       nir_deref_instr *out_value = nir_build_deref_array(&b, nir_build_deref_var(&b, out), invocation_id);
5217       copy_vars(&b, out_value, in_value);
5218    }
5219    nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
5220    gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
5221    gl_TessLevelInner->data.patch = 1;
5222    nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter");
5223    gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
5224    gl_TessLevelOuter->data.patch = 1;
5225
5226    create_gfx_pushconst(nir);
5227
5228    nir_ssa_def *load_inner = nir_load_push_constant(&b, 2, 32,
5229                                                     nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL),
5230                                                     .base = 1, .range = 8);
5231    nir_ssa_def *load_outer = nir_load_push_constant(&b, 4, 32,
5232                                                     nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL),
5233                                                     .base = 2, .range = 16);
5234
5235    for (unsigned i = 0; i < 2; i++) {
5236       nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
5237       nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff);
5238    }
5239    for (unsigned i = 0; i < 4; i++) {
5240       nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i);
5241       nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff);
5242    }
5243
5244    nir->info.tess.tcs_vertices_out = vertices_per_patch;
5245    nir_validate_shader(nir, "created");
5246
5247    NIR_PASS_V(nir, nir_lower_regs_to_ssa);
5248    optimize_nir(nir, NULL);
5249    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
5250    NIR_PASS_V(nir, nir_convert_from_ssa, true);
5251
5252    *nir_ret = nir;
5253    zink_shader_serialize_blob(nir, &ret->blob);
5254    memcpy(&ret->info, &nir->info, sizeof(nir->info));
5255    ret->non_fs.is_generated = true;
5256    return ret;
5257 }
5258
5259 bool
5260 zink_shader_has_cubes(nir_shader *nir)
5261 {
5262    nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
5263       const struct glsl_type *type = glsl_without_array(var->type);
5264       if (glsl_type_is_sampler(type) && glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE)
5265          return true;
5266    }
5267    return false;
5268 }
5269
5270 nir_shader *
5271 zink_shader_blob_deserialize(struct zink_screen *screen, struct blob *blob)
5272 {
5273    struct blob_reader blob_reader;
5274    blob_reader_init(&blob_reader, blob->data, blob->size);
5275    return nir_deserialize(NULL, &screen->nir_options, &blob_reader);
5276 }
5277
5278 nir_shader *
5279 zink_shader_deserialize(struct zink_screen *screen, struct zink_shader *zs)
5280 {
5281    return zink_shader_blob_deserialize(screen, &zs->blob);
5282 }
5283
5284 void
5285 zink_shader_serialize_blob(nir_shader *nir, struct blob *blob)
5286 {
5287    blob_init(blob);
5288 #ifndef NDEBUG
5289    bool strip = !(zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV | ZINK_DEBUG_TGSI));
5290 #else
5291    bool strip = false;
5292 #endif
5293    nir_serialize(blob, nir, strip);
5294 }
5295
5296 void
5297 zink_print_shader(struct zink_screen *screen, struct zink_shader *zs, FILE *fp)
5298 {
5299    nir_shader *nir = zink_shader_deserialize(screen, zs);
5300    nir_print_shader(nir, fp);
5301    ralloc_free(nir);
5302 }