nir/zink: use sysvals in `nir_create_passthrough_gs`
[platform/upstream/mesa.git] / src / gallium / drivers / zink / zink_compiler.c
1 /*
2  * Copyright 2018 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23
24 #include "nir_opcodes.h"
25 #include "zink_context.h"
26 #include "zink_compiler.h"
27 #include "zink_descriptors.h"
28 #include "zink_program.h"
29 #include "zink_screen.h"
30 #include "nir_to_spirv/nir_to_spirv.h"
31
32 #include "pipe/p_state.h"
33
34 #include "nir.h"
35 #include "nir_xfb_info.h"
36 #include "nir/nir_draw_helpers.h"
37 #include "compiler/nir/nir_builder.h"
38 #include "compiler/nir/nir_serialize.h"
39 #include "compiler/nir/nir_builtin_builder.h"
40
41 #include "nir/tgsi_to_nir.h"
42 #include "tgsi/tgsi_dump.h"
43 #include "tgsi/tgsi_from_mesa.h"
44
45 #include "util/u_memory.h"
46
47 #include "compiler/spirv/nir_spirv.h"
48 #include "vulkan/util/vk_util.h"
49
50 bool
51 zink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask);
52
53
54 static void
55 copy_vars(nir_builder *b, nir_deref_instr *dst, nir_deref_instr *src)
56 {
57    assert(glsl_get_bare_type(dst->type) == glsl_get_bare_type(src->type));
58    if (glsl_type_is_struct(dst->type)) {
59       for (unsigned i = 0; i < glsl_get_length(dst->type); ++i) {
60          copy_vars(b, nir_build_deref_struct(b, dst, i), nir_build_deref_struct(b, src, i));
61       }
62    } else if (glsl_type_is_array_or_matrix(dst->type)) {
63       unsigned count = glsl_type_is_array(dst->type) ? glsl_array_size(dst->type) : glsl_get_matrix_columns(dst->type);
64       for (unsigned i = 0; i < count; i++) {
65          copy_vars(b, nir_build_deref_array_imm(b, dst, i), nir_build_deref_array_imm(b, src, i));
66       }
67    } else {
68       nir_ssa_def *load = nir_load_deref(b, src);
69       nir_store_deref(b, dst, load, BITFIELD_MASK(load->num_components));
70    }
71 }
72
73 #define SIZEOF_FIELD(type, field) sizeof(((type *)0)->field)
74
75 static void
76 create_gfx_pushconst(nir_shader *nir)
77 {
78 #define PUSHCONST_MEMBER(member_idx, field)                                                                     \
79 fields[member_idx].type =                                                                                       \
80    glsl_array_type(glsl_uint_type(), SIZEOF_FIELD(struct zink_gfx_push_constant, field) / sizeof(uint32_t), 0); \
81 fields[member_idx].name = ralloc_asprintf(nir, #field);                                                         \
82 fields[member_idx].offset = offsetof(struct zink_gfx_push_constant, field);
83
84    nir_variable *pushconst;
85    /* create compatible layout for the ntv push constant loader */
86    struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, ZINK_GFX_PUSHCONST_MAX);
87    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED, draw_mode_is_indexed);
88    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_ID, draw_id);
89    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, framebuffer_is_layered);
90    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL, default_inner_level);
91    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL, default_outer_level);
92    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN, line_stipple_pattern);
93    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_VIEWPORT_SCALE, viewport_scale);
94    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_WIDTH, line_width);
95
96    pushconst = nir_variable_create(nir, nir_var_mem_push_const,
97                                    glsl_struct_type(fields, ZINK_GFX_PUSHCONST_MAX, "struct", false),
98                                    "gfx_pushconst");
99    pushconst->data.location = INT_MAX; //doesn't really matter
100
101 #undef PUSHCONST_MEMBER
102 }
103
104 static bool
105 lower_64bit_vertex_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
106 {
107    if (instr->type != nir_instr_type_intrinsic)
108       return false;
109    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
110    if (intr->intrinsic != nir_intrinsic_load_deref)
111       return false;
112    nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
113    if (var->data.mode != nir_var_shader_in)
114       return false;
115    if (!glsl_type_is_64bit(var->type) || !glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) < 3)
116       return false;
117
118    /* create second variable for the split */
119    nir_variable *var2 = nir_variable_clone(var, b->shader);
120    /* split new variable into second slot */
121    var2->data.driver_location++;
122    nir_shader_add_variable(b->shader, var2);
123
124    unsigned total_num_components = glsl_get_vector_elements(var->type);
125    /* new variable is the second half of the dvec */
126    var2->type = glsl_vector_type(glsl_get_base_type(var->type), glsl_get_vector_elements(var->type) - 2);
127    /* clamp original variable to a dvec2 */
128    var->type = glsl_vector_type(glsl_get_base_type(var->type), 2);
129
130    b->cursor = nir_after_instr(instr);
131
132    /* this is the first load instruction for the first half of the dvec3/4 components */
133    nir_ssa_def *load = nir_load_var(b, var);
134    /* this is the second load instruction for the second half of the dvec3/4 components */
135    nir_ssa_def *load2 = nir_load_var(b, var2);
136
137    nir_ssa_def *def[4];
138    /* create a new dvec3/4 comprised of all the loaded components from both variables */
139    def[0] = nir_vector_extract(b, load, nir_imm_int(b, 0));
140    def[1] = nir_vector_extract(b, load, nir_imm_int(b, 1));
141    def[2] = nir_vector_extract(b, load2, nir_imm_int(b, 0));
142    if (total_num_components == 4)
143       def[3] = nir_vector_extract(b, load2, nir_imm_int(b, 1));
144    nir_ssa_def *new_vec = nir_vec(b, def, total_num_components);
145    /* use the assembled dvec3/4 for all other uses of the load */
146    nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, new_vec,
147                                   new_vec->parent_instr);
148
149    /* remove the original instr and its deref chain */
150    nir_instr *parent = intr->src[0].ssa->parent_instr;
151    nir_instr_remove(instr);
152    nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
153
154    return true;
155 }
156
157 /* mesa/gallium always provides UINT versions of 64bit formats:
158  * - rewrite loads as 32bit vec loads
159  * - cast back to 64bit
160  */
161 static bool
162 lower_64bit_uint_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
163 {
164    if (instr->type != nir_instr_type_intrinsic)
165       return false;
166    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
167    if (intr->intrinsic != nir_intrinsic_load_deref)
168       return false;
169    nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
170    if (var->data.mode != nir_var_shader_in)
171       return false;
172    if (glsl_get_bit_size(var->type) != 64 || glsl_get_base_type(var->type) >= GLSL_TYPE_SAMPLER)
173       return false;
174
175    unsigned num_components = glsl_get_vector_elements(var->type);
176    enum glsl_base_type base_type;
177    switch (glsl_get_base_type(var->type)) {
178    case GLSL_TYPE_UINT64:
179       base_type = GLSL_TYPE_UINT;
180       break;
181    case GLSL_TYPE_INT64:
182       base_type = GLSL_TYPE_INT;
183       break;
184    case GLSL_TYPE_DOUBLE:
185       base_type = GLSL_TYPE_FLOAT;
186       break;
187    default:
188       unreachable("unknown 64-bit vertex attribute format!");
189    }
190    var->type = glsl_vector_type(base_type, num_components * 2);
191
192    b->cursor = nir_after_instr(instr);
193
194    nir_ssa_def *load = nir_load_var(b, var);
195    nir_ssa_def *casted[2];
196    for (unsigned i = 0; i < num_components; i++)
197      casted[i] = nir_pack_64_2x32(b, nir_channels(b, load, BITFIELD_RANGE(i * 2, 2)));
198    nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec(b, casted, num_components));
199
200    /* remove the original instr and its deref chain */
201    nir_instr *parent = intr->src[0].ssa->parent_instr;
202    nir_instr_remove(instr);
203    nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
204
205    return true;
206 }
207
208 /* "64-bit three- and four-component vectors consume two consecutive locations."
209  *  - 14.1.4. Location Assignment
210  *
211  * this pass splits dvec3 and dvec4 vertex inputs into a dvec2 and a double/dvec2 which
212  * are assigned to consecutive locations, loaded separately, and then assembled back into a
213  * composite value that's used in place of the original loaded ssa src
214  */
215 static bool
216 lower_64bit_vertex_attribs(nir_shader *shader)
217 {
218    if (shader->info.stage != MESA_SHADER_VERTEX)
219       return false;
220
221    bool progress = nir_shader_instructions_pass(shader, lower_64bit_vertex_attribs_instr, nir_metadata_dominance, NULL);
222    progress |= nir_shader_instructions_pass(shader, lower_64bit_uint_attribs_instr, nir_metadata_dominance, NULL);
223    return progress;
224 }
225
226 static bool
227 lower_basevertex_instr(nir_builder *b, nir_instr *in, void *data)
228 {
229    if (in->type != nir_instr_type_intrinsic)
230       return false;
231    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
232    if (instr->intrinsic != nir_intrinsic_load_base_vertex)
233       return false;
234
235    b->cursor = nir_after_instr(&instr->instr);
236    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
237    load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED));
238    nir_intrinsic_set_range(load, 4);
239    load->num_components = 1;
240    nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_mode_is_indexed");
241    nir_builder_instr_insert(b, &load->instr);
242
243    nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel,
244                                           nir_build_alu(b, nir_op_ieq, &load->dest.ssa, nir_imm_int(b, 1), NULL, NULL),
245                                           &instr->dest.ssa,
246                                           nir_imm_int(b, 0),
247                                           NULL);
248
249    nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, composite,
250                                   composite->parent_instr);
251    return true;
252 }
253
254 static bool
255 lower_basevertex(nir_shader *shader)
256 {
257    if (shader->info.stage != MESA_SHADER_VERTEX)
258       return false;
259
260    if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
261       return false;
262
263    return nir_shader_instructions_pass(shader, lower_basevertex_instr, nir_metadata_dominance, NULL);
264 }
265
266
267 static bool
268 lower_drawid_instr(nir_builder *b, nir_instr *in, void *data)
269 {
270    if (in->type != nir_instr_type_intrinsic)
271       return false;
272    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
273    if (instr->intrinsic != nir_intrinsic_load_draw_id)
274       return false;
275
276    b->cursor = nir_before_instr(&instr->instr);
277    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
278    load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_ID));
279    nir_intrinsic_set_range(load, 4);
280    load->num_components = 1;
281    nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_id");
282    nir_builder_instr_insert(b, &load->instr);
283
284    nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
285
286    return true;
287 }
288
289 static bool
290 lower_drawid(nir_shader *shader)
291 {
292    if (shader->info.stage != MESA_SHADER_VERTEX)
293       return false;
294
295    if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
296       return false;
297
298    return nir_shader_instructions_pass(shader, lower_drawid_instr, nir_metadata_dominance, NULL);
299 }
300
301 struct lower_gl_point_state {
302    nir_variable *gl_pos_out;
303    nir_variable *gl_point_size;
304 };
305
306 static bool
307 lower_gl_point_gs_instr(nir_builder *b, nir_instr *instr, void *data)
308 {
309    struct lower_gl_point_state *state = data;
310    nir_ssa_def *vp_scale, *pos;
311
312    if (instr->type != nir_instr_type_intrinsic)
313       return false;
314
315    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
316    if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
317        intrin->intrinsic != nir_intrinsic_emit_vertex)
318       return false;
319
320    if (nir_intrinsic_stream_id(intrin) != 0)
321       return false;
322
323    if (intrin->intrinsic == nir_intrinsic_end_primitive_with_counter ||
324          intrin->intrinsic == nir_intrinsic_end_primitive) {
325       nir_instr_remove(&intrin->instr);
326       return true;
327    }
328
329    b->cursor = nir_before_instr(instr);
330
331    // viewport-map endpoints
332    nir_ssa_def *vp_const_pos = nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE);
333    vp_scale = nir_load_push_constant(b, 2, 32, vp_const_pos, .base = 1, .range = 2);
334
335    // Load point info values
336    nir_ssa_def *point_size = nir_load_var(b, state->gl_point_size);
337    nir_ssa_def *point_pos = nir_load_var(b, state->gl_pos_out);
338
339    // w_delta = gl_point_size / width_viewport_size_scale * gl_Position.w
340    nir_ssa_def *w_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 0));
341    w_delta = nir_fmul(b, w_delta, nir_channel(b, point_pos, 3));
342    // halt_w_delta = w_delta / 2
343    nir_ssa_def *half_w_delta = nir_fmul(b, w_delta, nir_imm_float(b, 0.5));
344
345    // h_delta = gl_point_size / height_viewport_size_scale * gl_Position.w
346    nir_ssa_def *h_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 1));
347    h_delta = nir_fmul(b, h_delta, nir_channel(b, point_pos, 3));
348    // halt_h_delta = h_delta / 2
349    nir_ssa_def *half_h_delta = nir_fmul(b, h_delta, nir_imm_float(b, 0.5));
350
351    nir_ssa_def *point_dir[4][2] = {
352       { nir_imm_float(b, -1), nir_imm_float(b, -1) },
353       { nir_imm_float(b, -1), nir_imm_float(b, 1) },
354       { nir_imm_float(b, 1), nir_imm_float(b, -1) },
355       { nir_imm_float(b, 1), nir_imm_float(b, 1) }
356    };
357
358    nir_ssa_def *point_pos_x = nir_channel(b, point_pos, 0);
359    nir_ssa_def *point_pos_y = nir_channel(b, point_pos, 1);
360
361    for (size_t i = 0; i < 4; i++) {
362       pos = nir_vec4(b,
363                      nir_ffma(b, half_w_delta, point_dir[i][0], point_pos_x),
364                      nir_ffma(b, half_h_delta, point_dir[i][1], point_pos_y),
365                      nir_channel(b, point_pos, 2),
366                      nir_channel(b, point_pos, 3));
367
368       nir_store_var(b, state->gl_pos_out, pos, 0xf);
369
370       nir_emit_vertex(b);
371    }
372
373    nir_end_primitive(b);
374
375    nir_instr_remove(&intrin->instr);
376
377    return true;
378 }
379
380 static bool
381 lower_gl_point_gs(nir_shader *shader)
382 {
383    struct lower_gl_point_state state;
384    nir_builder b;
385
386    shader->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
387    shader->info.gs.vertices_out *= 4;
388
389    // Gets the gl_Position in and out
390    state.gl_pos_out =
391       nir_find_variable_with_location(shader, nir_var_shader_out,
392                                       VARYING_SLOT_POS);
393    state.gl_point_size =
394       nir_find_variable_with_location(shader, nir_var_shader_out,
395                                       VARYING_SLOT_PSIZ);
396
397    // if position in or gl_PointSize aren't written, we have nothing to do
398    if (!state.gl_pos_out || !state.gl_point_size)
399       return false;
400
401    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
402    nir_builder_init(&b, entry);
403    b.cursor = nir_before_cf_list(&entry->body);
404
405    return nir_shader_instructions_pass(shader, lower_gl_point_gs_instr,
406                                        nir_metadata_dominance, &state);
407 }
408
409 struct lower_pv_mode_state {
410    nir_variable *varyings[VARYING_SLOT_MAX];
411    nir_variable *pos_counter;
412    nir_variable *out_pos_counter;
413    nir_variable *ring_offset;
414    unsigned ring_size;
415    unsigned primitive_vert_count;
416    unsigned prim;
417 };
418
419 static nir_ssa_def*
420 lower_pv_mode_gs_ring_index(nir_builder *b,
421                             struct lower_pv_mode_state *state,
422                             nir_ssa_def *index)
423 {
424    nir_ssa_def *ring_offset = nir_load_var(b, state->ring_offset);
425    return nir_imod(b, nir_iadd(b, index, ring_offset),
426                       nir_imm_int(b, state->ring_size));
427 }
428
429 static bool
430 lower_pv_mode_gs_store(nir_builder *b,
431                        nir_intrinsic_instr *intrin,
432                        struct lower_pv_mode_state *state)
433 {
434    b->cursor = nir_before_instr(&intrin->instr);
435    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
436    if (nir_deref_mode_is(deref, nir_var_shader_out)) {
437       nir_variable *var = nir_deref_instr_get_variable(deref);
438
439       gl_varying_slot location = var->data.location;
440       assert(state->varyings[location]);
441       assert(intrin->src[1].is_ssa);
442       nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
443       nir_ssa_def *index = lower_pv_mode_gs_ring_index(b, state, pos_counter);
444       nir_store_array_var(b, state->varyings[location],
445                              index, intrin->src[1].ssa,
446                              nir_intrinsic_write_mask(intrin));
447       nir_instr_remove(&intrin->instr);
448       return true;
449    }
450
451    return false;
452 }
453
454 static void
455 lower_pv_mode_emit_rotated_prim(nir_builder *b,
456                                 struct lower_pv_mode_state *state,
457                                 nir_ssa_def *current_vertex)
458 {
459    nir_ssa_def *two = nir_imm_int(b, 2);
460    nir_ssa_def *three = nir_imm_int(b, 3);
461    bool is_triangle = state->primitive_vert_count == 3;
462    /* This shader will always see the last three vertices emitted by the user gs.
463     * The following table is used to to rotate primitives within a strip generated
464     * by the user gs such that the last vertex becomes the first.
465     *
466     * [lines, tris][even/odd index][vertex mod 3]
467     */
468    static const unsigned vert_maps[2][2][3] = {
469       {{1, 0, 0}, {1, 0, 0}},
470       {{2, 0, 1}, {2, 1, 0}}
471    };
472    /* When the primive supplied to the gs comes from a strip, the last provoking vertex
473     * is either the last or the second, depending on whether the triangle is at an odd
474     * or even position within the strip.
475     *
476     * odd or even primitive within draw
477     */
478    nir_ssa_def *odd_prim = nir_imod(b, nir_load_primitive_id(b), two);
479    for (unsigned i = 0; i < state->primitive_vert_count; i++) {
480       /* odd or even triangle within strip emitted by user GS
481        * this is handled using the table
482        */
483       nir_ssa_def *odd_user_prim = nir_imod(b, current_vertex, two);
484       unsigned offset_even = vert_maps[is_triangle][0][i];
485       unsigned offset_odd = vert_maps[is_triangle][1][i];
486       nir_ssa_def *offset_even_value = nir_imm_int(b, offset_even);
487       nir_ssa_def *offset_odd_value = nir_imm_int(b, offset_odd);
488       nir_ssa_def *rotated_i = nir_bcsel(b, nir_b2b1(b, odd_user_prim),
489                                             offset_odd_value, offset_even_value);
490       /* Here we account for how triangles are provided to the gs from a strip.
491        * For even primitives we rotate by 3, meaning we do nothing.
492        * For odd primitives we rotate by 2, combined with the previous rotation this
493        * means the second vertex becomes the last.
494        */
495       if (state->prim == ZINK_PVE_PRIMITIVE_TRISTRIP)
496         rotated_i = nir_imod(b, nir_iadd(b, rotated_i,
497                                             nir_isub(b, three,
498                                                         odd_prim)),
499                                             three);
500       /* Triangles that come from fans are provided to the gs the same way as
501        * odd triangles from a strip so always rotate by 2.
502        */
503       else if (state->prim == ZINK_PVE_PRIMITIVE_FAN)
504         rotated_i = nir_imod(b, nir_iadd_imm(b, rotated_i, 2),
505                                 three);
506       rotated_i = nir_iadd(b, rotated_i, current_vertex);
507       nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
508          gl_varying_slot location = var->data.location;
509          if (state->varyings[location]) {
510             nir_ssa_def *index = lower_pv_mode_gs_ring_index(b, state, rotated_i);
511             nir_deref_instr *value = nir_build_deref_array(b, nir_build_deref_var(b, state->varyings[location]), index);
512             copy_vars(b, nir_build_deref_var(b, var), value);
513          }
514       }
515       nir_emit_vertex(b);
516    }
517 }
518
519 static bool
520 lower_pv_mode_gs_emit_vertex(nir_builder *b,
521                              nir_intrinsic_instr *intrin,
522                              struct lower_pv_mode_state *state)
523 {
524    b->cursor = nir_before_instr(&intrin->instr);
525
526    // increment pos_counter
527    nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
528    nir_store_var(b, state->pos_counter, nir_iadd_imm(b, pos_counter, 1), 1);
529
530    nir_instr_remove(&intrin->instr);
531    return true;
532 }
533
534 static bool
535 lower_pv_mode_gs_end_primitive(nir_builder *b,
536                                nir_intrinsic_instr *intrin,
537                                struct lower_pv_mode_state *state)
538 {
539    b->cursor = nir_before_instr(&intrin->instr);
540
541    nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
542    nir_push_loop(b);
543    {
544       nir_ssa_def *out_pos_counter = nir_load_var(b, state->out_pos_counter);
545       nir_push_if(b, nir_ilt(b, nir_isub(b, pos_counter, out_pos_counter),
546                                 nir_imm_int(b, state->primitive_vert_count)));
547       nir_jump(b, nir_jump_break);
548       nir_pop_if(b, NULL);
549
550       lower_pv_mode_emit_rotated_prim(b, state, out_pos_counter);
551       nir_end_primitive(b);
552
553       nir_store_var(b, state->out_pos_counter, nir_iadd_imm(b, out_pos_counter, 1), 1);
554    }
555    nir_pop_loop(b, NULL);
556    /* Set the ring offset such that when position 0 is
557     * read we get the last value written
558     */
559    nir_store_var(b, state->ring_offset, pos_counter, 1);
560    nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
561    nir_store_var(b, state->out_pos_counter, nir_imm_int(b, 0), 1);
562
563    nir_instr_remove(&intrin->instr);
564    return true;
565 }
566
567 static bool
568 lower_pv_mode_gs_instr(nir_builder *b, nir_instr *instr, void *data)
569 {
570    if (instr->type != nir_instr_type_intrinsic)
571       return false;
572
573    struct lower_pv_mode_state *state = data;
574    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
575
576    switch (intrin->intrinsic) {
577    case nir_intrinsic_store_deref:
578       return lower_pv_mode_gs_store(b, intrin, state);
579    case nir_intrinsic_copy_deref:
580       unreachable("should be lowered");
581    case nir_intrinsic_emit_vertex_with_counter:
582    case nir_intrinsic_emit_vertex:
583       return lower_pv_mode_gs_emit_vertex(b, intrin, state);
584    case nir_intrinsic_end_primitive:
585    case nir_intrinsic_end_primitive_with_counter:
586       return lower_pv_mode_gs_end_primitive(b, intrin, state);
587    default:
588       return false;
589    }
590 }
591
592 static unsigned int
593 lower_pv_mode_vertices_for_prim(enum shader_prim prim)
594 {
595    switch (prim) {
596    case SHADER_PRIM_POINTS:
597       return 1;
598    case SHADER_PRIM_LINE_STRIP:
599       return 2;
600    case SHADER_PRIM_TRIANGLE_STRIP:
601       return 3;
602    default:
603       unreachable("unsupported primitive for gs output");
604    }
605 }
606
607 static bool
608 lower_pv_mode_gs(nir_shader *shader, unsigned prim)
609 {
610    nir_builder b;
611    struct lower_pv_mode_state state;
612    memset(state.varyings, 0, sizeof(state.varyings));
613
614    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
615    nir_builder_init(&b, entry);
616    b.cursor = nir_before_cf_list(&entry->body);
617
618    state.primitive_vert_count =
619       lower_pv_mode_vertices_for_prim(shader->info.gs.output_primitive);
620    state.ring_size = shader->info.gs.vertices_out;
621
622    nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
623       gl_varying_slot location = var->data.location;
624
625       char name[100];
626       snprintf(name, sizeof(name), "__tmp_primverts_%d", location);
627       state.varyings[location] =
628          nir_local_variable_create(entry,
629                                    glsl_array_type(var->type,
630                                                    state.ring_size,
631                                                    false),
632                                    name);
633    }
634
635    state.pos_counter = nir_local_variable_create(entry,
636                                                  glsl_uint_type(),
637                                                  "__pos_counter");
638
639    state.out_pos_counter = nir_local_variable_create(entry,
640                                                      glsl_uint_type(),
641                                                      "__out_pos_counter");
642
643    state.ring_offset = nir_local_variable_create(entry,
644                                                  glsl_uint_type(),
645                                                  "__ring_offset");
646
647    state.prim = prim;
648
649    // initialize pos_counter and out_pos_counter
650    nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
651    nir_store_var(&b, state.out_pos_counter, nir_imm_int(&b, 0), 1);
652    nir_store_var(&b, state.ring_offset, nir_imm_int(&b, 0), 1);
653
654    shader->info.gs.vertices_out = (shader->info.gs.vertices_out -
655                                    (state.primitive_vert_count - 1)) *
656                                   state.primitive_vert_count;
657    return nir_shader_instructions_pass(shader, lower_pv_mode_gs_instr,
658                                        nir_metadata_dominance, &state);
659 }
660
661 struct lower_line_stipple_state {
662    nir_variable *pos_out;
663    nir_variable *stipple_out;
664    nir_variable *prev_pos;
665    nir_variable *pos_counter;
666    nir_variable *stipple_counter;
667    bool line_rectangular;
668 };
669
670 static nir_ssa_def *
671 viewport_map(nir_builder *b, nir_ssa_def *vert,
672              nir_ssa_def *scale)
673 {
674    nir_ssa_def *w_recip = nir_frcp(b, nir_channel(b, vert, 3));
675    nir_ssa_def *ndc_point = nir_fmul(b, nir_channels(b, vert, 0x3),
676                                         w_recip);
677    return nir_fmul(b, ndc_point, scale);
678 }
679
680 static bool
681 lower_line_stipple_gs_instr(nir_builder *b, nir_instr *instr, void *data)
682 {
683    struct lower_line_stipple_state *state = data;
684    if (instr->type != nir_instr_type_intrinsic)
685       return false;
686
687    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
688    if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
689        intrin->intrinsic != nir_intrinsic_emit_vertex)
690       return false;
691
692    b->cursor = nir_before_instr(instr);
693
694    nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
695    // viewport-map endpoints
696    nir_ssa_def *vp_scale = nir_load_push_constant(b, 2, 32,
697                                                   nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE),
698                                                   .base = 1,
699                                                   .range = 2);
700    nir_ssa_def *prev = nir_load_var(b, state->prev_pos);
701    nir_ssa_def *curr = nir_load_var(b, state->pos_out);
702    prev = viewport_map(b, prev, vp_scale);
703    curr = viewport_map(b, curr, vp_scale);
704
705    // calculate length of line
706    nir_ssa_def *len;
707    if (state->line_rectangular)
708       len = nir_fast_distance(b, prev, curr);
709    else {
710       nir_ssa_def *diff = nir_fabs(b, nir_fsub(b, prev, curr));
711       len = nir_fmax(b, nir_channel(b, diff, 0), nir_channel(b, diff, 1));
712    }
713    // update stipple_counter
714    nir_store_var(b, state->stipple_counter,
715                     nir_fadd(b, nir_load_var(b, state->stipple_counter),
716                                 len), 1);
717    nir_pop_if(b, NULL);
718    // emit stipple out
719    nir_copy_var(b, state->stipple_out, state->stipple_counter);
720    nir_copy_var(b, state->prev_pos, state->pos_out);
721
722    // update prev_pos and pos_counter for next vertex
723    b->cursor = nir_after_instr(instr);
724    nir_store_var(b, state->pos_counter,
725                     nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
726                                     1), 1);
727
728    return true;
729 }
730
731 static bool
732 lower_line_stipple_gs(nir_shader *shader, bool line_rectangular)
733 {
734    nir_builder b;
735    struct lower_line_stipple_state state;
736
737    state.pos_out =
738       nir_find_variable_with_location(shader, nir_var_shader_out,
739                                       VARYING_SLOT_POS);
740
741    // if position isn't written, we have nothing to do
742    if (!state.pos_out)
743       return false;
744
745    state.stipple_out = nir_variable_create(shader, nir_var_shader_out,
746                                            glsl_float_type(),
747                                            "__stipple");
748    state.stipple_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
749    state.stipple_out->data.driver_location = shader->num_outputs++;
750    state.stipple_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
751    shader->info.outputs_written |= BITFIELD64_BIT(state.stipple_out->data.location);
752
753    // create temp variables
754    state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
755                                         glsl_vec4_type(),
756                                         "__prev_pos");
757    state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
758                                            glsl_uint_type(),
759                                            "__pos_counter");
760    state.stipple_counter = nir_variable_create(shader, nir_var_shader_temp,
761                                                glsl_float_type(),
762                                                "__stipple_counter");
763
764    state.line_rectangular = line_rectangular;
765    // initialize pos_counter and stipple_counter
766    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
767    nir_builder_init(&b, entry);
768    b.cursor = nir_before_cf_list(&entry->body);
769    nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
770    nir_store_var(&b, state.stipple_counter, nir_imm_float(&b, 0), 1);
771
772    return nir_shader_instructions_pass(shader, lower_line_stipple_gs_instr,
773                                        nir_metadata_dominance, &state);
774 }
775
776 static bool
777 lower_line_stipple_fs(nir_shader *shader)
778 {
779    nir_builder b;
780    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
781    nir_builder_init(&b, entry);
782
783    // create stipple counter
784    nir_variable *stipple = nir_variable_create(shader, nir_var_shader_in,
785                                                glsl_float_type(),
786                                                "__stipple");
787    stipple->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
788    stipple->data.driver_location = shader->num_inputs++;
789    stipple->data.location = MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
790    shader->info.inputs_read |= BITFIELD64_BIT(stipple->data.location);
791
792    nir_variable *sample_mask_out =
793       nir_find_variable_with_location(shader, nir_var_shader_out,
794                                       FRAG_RESULT_SAMPLE_MASK);
795    if (!sample_mask_out) {
796       sample_mask_out = nir_variable_create(shader, nir_var_shader_out,
797                                         glsl_uint_type(), "sample_mask");
798       sample_mask_out->data.driver_location = shader->num_outputs++;
799       sample_mask_out->data.location = FRAG_RESULT_SAMPLE_MASK;
800    }
801
802    b.cursor = nir_after_cf_list(&entry->body);
803
804    nir_ssa_def *pattern = nir_load_push_constant(&b, 1, 32,
805                                                  nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN),
806                                                  .base = 1);
807    nir_ssa_def *factor = nir_i2f32(&b, nir_ishr_imm(&b, pattern, 16));
808    pattern = nir_iand_imm(&b, pattern, 0xffff);
809
810    nir_ssa_def *sample_mask_in = nir_load_sample_mask_in(&b);
811    nir_variable *v = nir_local_variable_create(entry, glsl_uint_type(), NULL);
812    nir_variable *sample_mask = nir_local_variable_create(entry, glsl_uint_type(), NULL);
813    nir_store_var(&b, v, sample_mask_in, 1);
814    nir_store_var(&b, sample_mask, sample_mask_in, 1);
815    nir_push_loop(&b);
816    {
817       nir_ssa_def *value = nir_load_var(&b, v);
818       nir_ssa_def *index = nir_ufind_msb(&b, value);
819       nir_ssa_def *index_mask = nir_ishl(&b, nir_imm_int(&b, 1), index);
820       nir_ssa_def *new_value = nir_ixor(&b, value, index_mask);
821       nir_store_var(&b, v, new_value,  1);
822       nir_push_if(&b, nir_ieq_imm(&b, value, 0));
823       nir_jump(&b, nir_jump_break);
824       nir_pop_if(&b, NULL);
825
826       nir_ssa_def *stipple_pos =
827          nir_interp_deref_at_sample(&b, 1, 32,
828             &nir_build_deref_var(&b, stipple)->dest.ssa, index);
829       stipple_pos = nir_fmod(&b, nir_fdiv(&b, stipple_pos, factor),
830                                  nir_imm_float(&b, 16.0));
831       stipple_pos = nir_f2i32(&b, stipple_pos);
832       nir_ssa_def *bit =
833          nir_iand_imm(&b, nir_ishr(&b, pattern, stipple_pos), 1);
834       nir_push_if(&b, nir_ieq_imm(&b, bit, 0));
835       {
836          nir_ssa_def *value = nir_load_var(&b, sample_mask);
837          value = nir_ixor(&b, value, index_mask);
838          nir_store_var(&b, sample_mask, value, 1);
839       }
840       nir_pop_if(&b, NULL);
841    }
842    nir_pop_loop(&b, NULL);
843    nir_store_var(&b, sample_mask_out, nir_load_var(&b, sample_mask), 1);
844
845    return true;
846 }
847
848 struct lower_line_smooth_state {
849    nir_variable *pos_out;
850    nir_variable *line_coord_out;
851    nir_variable *prev_pos;
852    nir_variable *pos_counter;
853    nir_variable *prev_varyings[VARYING_SLOT_MAX][4],
854                 *varyings[VARYING_SLOT_MAX][4]; // location_frac
855 };
856
857 static bool
858 lower_line_smooth_gs_store(nir_builder *b,
859                            nir_intrinsic_instr *intrin,
860                            struct lower_line_smooth_state *state)
861 {
862    b->cursor = nir_before_instr(&intrin->instr);
863    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
864    if (nir_deref_mode_is(deref, nir_var_shader_out)) {
865       nir_variable *var = nir_deref_instr_get_variable(deref);
866
867       // we take care of position elsewhere
868       gl_varying_slot location = var->data.location;
869       unsigned location_frac = var->data.location_frac;
870       if (location != VARYING_SLOT_POS) {
871          assert(state->varyings[location]);
872          assert(intrin->src[1].is_ssa);
873          nir_store_var(b, state->varyings[location][location_frac],
874                        intrin->src[1].ssa,
875                        nir_intrinsic_write_mask(intrin));
876          nir_instr_remove(&intrin->instr);
877          return true;
878       }
879    }
880
881    return false;
882 }
883
884 static bool
885 lower_line_smooth_gs_emit_vertex(nir_builder *b,
886                                  nir_intrinsic_instr *intrin,
887                                  struct lower_line_smooth_state *state)
888 {
889    b->cursor = nir_before_instr(&intrin->instr);
890
891    nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
892    nir_ssa_def *vp_scale = nir_load_push_constant(b, 2, 32,
893                                                   nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE),
894                                                   .base = 1,
895                                                   .range = 2);
896    nir_ssa_def *prev = nir_load_var(b, state->prev_pos);
897    nir_ssa_def *curr = nir_load_var(b, state->pos_out);
898    nir_ssa_def *prev_vp = viewport_map(b, prev, vp_scale);
899    nir_ssa_def *curr_vp = viewport_map(b, curr, vp_scale);
900
901    nir_ssa_def *width = nir_load_push_constant(b, 1, 32,
902                                                nir_imm_int(b, ZINK_GFX_PUSHCONST_LINE_WIDTH),
903                                                .base = 1);
904    nir_ssa_def *half_width = nir_fadd_imm(b, nir_fmul_imm(b, width, 0.5), 0.5);
905
906    const unsigned yx[2] = { 1, 0 };
907    nir_ssa_def *vec = nir_fsub(b, curr_vp, prev_vp);
908    nir_ssa_def *len = nir_fast_length(b, vec);
909    nir_ssa_def *dir = nir_normalize(b, vec);
910    nir_ssa_def *half_length = nir_fmul_imm(b, len, 0.5);
911    half_length = nir_fadd_imm(b, half_length, 0.5);
912
913    nir_ssa_def *vp_scale_rcp = nir_frcp(b, vp_scale);
914    nir_ssa_def *tangent =
915       nir_fmul(b,
916                nir_fmul(b,
917                         nir_swizzle(b, dir, yx, 2),
918                         nir_imm_vec2(b, 1.0, -1.0)),
919                vp_scale_rcp);
920    tangent = nir_fmul(b, tangent, half_width);
921    tangent = nir_pad_vector_imm_int(b, tangent, 0, 4);
922    dir = nir_fmul_imm(b, nir_fmul(b, dir, vp_scale_rcp), 0.5);
923
924    nir_ssa_def *line_offets[8] = {
925       nir_fadd(b, tangent, nir_fneg(b, dir)),
926       nir_fadd(b, nir_fneg(b, tangent), nir_fneg(b, dir)),
927       tangent,
928       nir_fneg(b, tangent),
929       tangent,
930       nir_fneg(b, tangent),
931       nir_fadd(b, tangent, dir),
932       nir_fadd(b, nir_fneg(b, tangent), dir),
933    };
934    nir_ssa_def *line_coord =
935       nir_vec4(b, half_width, half_width, half_length, half_length);
936    nir_ssa_def *line_coords[8] = {
937       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,  -1,  1)),
938       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,  -1,  1)),
939       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,   0,  1)),
940       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,   0,  1)),
941       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,   0,  1)),
942       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,   0,  1)),
943       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,   1,  1)),
944       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,   1,  1)),
945    };
946
947    /* emit first end-cap, and start line */
948    for (int i = 0; i < 4; ++i) {
949       nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
950          gl_varying_slot location = var->data.location;
951          unsigned location_frac = var->data.location_frac;
952          if (state->prev_varyings[location][location_frac])
953             nir_copy_var(b, var, state->prev_varyings[location][location_frac]);
954       }
955       nir_store_var(b, state->pos_out,
956                     nir_fadd(b, prev, nir_fmul(b, line_offets[i],
957                              nir_channel(b, prev, 3))), 0xf);
958       nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
959       nir_emit_vertex(b);
960    }
961
962    /* finish line and emit last end-cap */
963    for (int i = 4; i < 8; ++i) {
964       nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
965          gl_varying_slot location = var->data.location;
966          unsigned location_frac = var->data.location_frac;
967          if (state->varyings[location][location_frac])
968             nir_copy_var(b, var, state->varyings[location][location_frac]);
969       }
970       nir_store_var(b, state->pos_out,
971                     nir_fadd(b, curr, nir_fmul(b, line_offets[i],
972                              nir_channel(b, curr, 3))), 0xf);
973       nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
974       nir_emit_vertex(b);
975    }
976    nir_end_primitive(b);
977
978    nir_pop_if(b, NULL);
979
980    nir_copy_var(b, state->prev_pos, state->pos_out);
981    nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
982       gl_varying_slot location = var->data.location;
983       unsigned location_frac = var->data.location_frac;
984       if (state->varyings[location][location_frac])
985          nir_copy_var(b, state->prev_varyings[location][location_frac], state->varyings[location][location_frac]);
986    }
987
988    // update prev_pos and pos_counter for next vertex
989    b->cursor = nir_after_instr(&intrin->instr);
990    nir_store_var(b, state->pos_counter,
991                     nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
992                                     1), 1);
993
994    nir_instr_remove(&intrin->instr);
995    return true;
996 }
997
998 static bool
999 lower_line_smooth_gs_end_primitive(nir_builder *b,
1000                                    nir_intrinsic_instr *intrin,
1001                                    struct lower_line_smooth_state *state)
1002 {
1003    b->cursor = nir_before_instr(&intrin->instr);
1004
1005    // reset line counter
1006    nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
1007
1008    nir_instr_remove(&intrin->instr);
1009    return true;
1010 }
1011
1012 static bool
1013 lower_line_smooth_gs_instr(nir_builder *b, nir_instr *instr, void *data)
1014 {
1015    if (instr->type != nir_instr_type_intrinsic)
1016       return false;
1017
1018    struct lower_line_smooth_state *state = data;
1019    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1020
1021    switch (intrin->intrinsic) {
1022    case nir_intrinsic_store_deref:
1023       return lower_line_smooth_gs_store(b, intrin, state);
1024    case nir_intrinsic_copy_deref:
1025       unreachable("should be lowered");
1026    case nir_intrinsic_emit_vertex_with_counter:
1027    case nir_intrinsic_emit_vertex:
1028       return lower_line_smooth_gs_emit_vertex(b, intrin, state);
1029    case nir_intrinsic_end_primitive:
1030    case nir_intrinsic_end_primitive_with_counter:
1031       return lower_line_smooth_gs_end_primitive(b, intrin, state);
1032    default:
1033       return false;
1034    }
1035 }
1036
1037 static bool
1038 lower_line_smooth_gs(nir_shader *shader)
1039 {
1040    nir_builder b;
1041    struct lower_line_smooth_state state;
1042
1043    memset(state.varyings, 0, sizeof(state.varyings));
1044    memset(state.prev_varyings, 0, sizeof(state.prev_varyings));
1045    nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
1046       gl_varying_slot location = var->data.location;
1047       unsigned location_frac = var->data.location_frac;
1048       if (location == VARYING_SLOT_POS)
1049          continue;
1050
1051       char name[100];
1052       snprintf(name, sizeof(name), "__tmp_%d_%d", location, location_frac);
1053       state.varyings[location][location_frac] =
1054          nir_variable_create(shader, nir_var_shader_temp,
1055                               var->type, name);
1056
1057       snprintf(name, sizeof(name), "__tmp_prev_%d_%d", location, location_frac);
1058       state.prev_varyings[location][location_frac] =
1059          nir_variable_create(shader, nir_var_shader_temp,
1060                               var->type, name);
1061    }
1062
1063    state.pos_out =
1064       nir_find_variable_with_location(shader, nir_var_shader_out,
1065                                       VARYING_SLOT_POS);
1066
1067    // if position isn't written, we have nothing to do
1068    if (!state.pos_out)
1069       return false;
1070
1071    state.line_coord_out =
1072       nir_variable_create(shader, nir_var_shader_out, glsl_vec4_type(),
1073                           "__line_coord");
1074    state.line_coord_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
1075    state.line_coord_out->data.driver_location = shader->num_outputs++;
1076    state.line_coord_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
1077    shader->info.outputs_written |= BITFIELD64_BIT(state.line_coord_out->data.location);
1078
1079    // create temp variables
1080    state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
1081                                         glsl_vec4_type(),
1082                                         "__prev_pos");
1083    state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
1084                                            glsl_uint_type(),
1085                                            "__pos_counter");
1086
1087    // initialize pos_counter
1088    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
1089    nir_builder_init(&b, entry);
1090    b.cursor = nir_before_cf_list(&entry->body);
1091    nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
1092
1093    shader->info.gs.vertices_out = 8 * shader->info.gs.vertices_out;
1094    shader->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
1095
1096    return nir_shader_instructions_pass(shader, lower_line_smooth_gs_instr,
1097                                        nir_metadata_dominance, &state);
1098 }
1099
1100 static bool
1101 lower_line_smooth_fs(nir_shader *shader, bool lower_stipple)
1102 {
1103    int dummy;
1104    nir_builder b;
1105
1106    nir_variable *stipple_counter = NULL, *stipple_pattern = NULL;
1107    if (lower_stipple) {
1108       stipple_counter = nir_variable_create(shader, nir_var_shader_in,
1109                                             glsl_float_type(),
1110                                             "__stipple");
1111       stipple_counter->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
1112       stipple_counter->data.driver_location = shader->num_inputs++;
1113       stipple_counter->data.location =
1114          MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
1115       shader->info.inputs_read |= BITFIELD64_BIT(stipple_counter->data.location);
1116
1117       stipple_pattern = nir_variable_create(shader, nir_var_shader_temp,
1118                                             glsl_uint_type(),
1119                                             "stipple_pattern");
1120
1121       // initialize stipple_pattern
1122       nir_function_impl *entry = nir_shader_get_entrypoint(shader);
1123       nir_builder_init(&b, entry);
1124       b.cursor = nir_before_cf_list(&entry->body);
1125       nir_ssa_def *pattern = nir_load_push_constant(&b, 1, 32,
1126                                                    nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN),
1127                                                    .base = 1);
1128       nir_store_var(&b, stipple_pattern, pattern, 1);
1129    }
1130
1131    nir_lower_aaline_fs(shader, &dummy, stipple_counter, stipple_pattern);
1132    return true;
1133 }
1134
1135 static bool
1136 lower_dual_blend(nir_shader *shader)
1137 {
1138    bool progress = false;
1139    nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1);
1140    if (var) {
1141       var->data.location = FRAG_RESULT_DATA0;
1142       var->data.index = 1;
1143       progress = true;
1144    }
1145    nir_shader_preserve_all_metadata(shader);
1146    return progress;
1147 }
1148
1149 static bool
1150 lower_64bit_pack_instr(nir_builder *b, nir_instr *instr, void *data)
1151 {
1152    if (instr->type != nir_instr_type_alu)
1153       return false;
1154    nir_alu_instr *alu_instr = (nir_alu_instr *) instr;
1155    if (alu_instr->op != nir_op_pack_64_2x32 &&
1156        alu_instr->op != nir_op_unpack_64_2x32)
1157       return false;
1158    b->cursor = nir_before_instr(&alu_instr->instr);
1159    nir_ssa_def *src = nir_ssa_for_alu_src(b, alu_instr, 0);
1160    nir_ssa_def *dest;
1161    switch (alu_instr->op) {
1162    case nir_op_pack_64_2x32:
1163       dest = nir_pack_64_2x32_split(b, nir_channel(b, src, 0), nir_channel(b, src, 1));
1164       break;
1165    case nir_op_unpack_64_2x32:
1166       dest = nir_vec2(b, nir_unpack_64_2x32_split_x(b, src), nir_unpack_64_2x32_split_y(b, src));
1167       break;
1168    default:
1169       unreachable("Impossible opcode");
1170    }
1171    nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa, dest);
1172    nir_instr_remove(&alu_instr->instr);
1173    return true;
1174 }
1175
1176 static bool
1177 lower_64bit_pack(nir_shader *shader)
1178 {
1179    return nir_shader_instructions_pass(shader, lower_64bit_pack_instr,
1180                                        nir_metadata_block_index | nir_metadata_dominance, NULL);
1181 }
1182
1183 nir_shader *
1184 zink_create_quads_emulation_gs(const nir_shader_compiler_options *options,
1185                                const nir_shader *prev_stage)
1186 {
1187    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY,
1188                                                   options,
1189                                                   "filled quad gs");
1190
1191    nir_shader *nir = b.shader;
1192    nir->info.gs.input_primitive = SHADER_PRIM_LINES_ADJACENCY;
1193    nir->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
1194    nir->info.gs.vertices_in = 4;
1195    nir->info.gs.vertices_out = 6;
1196    nir->info.gs.invocations = 1;
1197    nir->info.gs.active_stream_mask = 1;
1198
1199    nir->info.has_transform_feedback_varyings = prev_stage->info.has_transform_feedback_varyings;
1200    memcpy(nir->info.xfb_stride, prev_stage->info.xfb_stride, sizeof(prev_stage->info.xfb_stride));
1201    if (prev_stage->xfb_info) {
1202       nir->xfb_info = mem_dup(prev_stage->xfb_info, sizeof(nir_xfb_info));
1203    }
1204
1205    nir_variable *in_vars[VARYING_SLOT_MAX];
1206    nir_variable *out_vars[VARYING_SLOT_MAX];
1207    unsigned num_vars = 0;
1208
1209    /* Create input/output variables. */
1210    nir_foreach_shader_out_variable(var, prev_stage) {
1211       assert(!var->data.patch);
1212
1213       char name[100];
1214       if (var->name)
1215          snprintf(name, sizeof(name), "in_%s", var->name);
1216       else
1217          snprintf(name, sizeof(name), "in_%d", var->data.driver_location);
1218
1219       nir_variable *in = nir_variable_clone(var, nir);
1220       ralloc_free(in->name);
1221       in->name = ralloc_strdup(in, name);
1222       in->type = glsl_array_type(var->type, 4, false);
1223       in->data.mode = nir_var_shader_in;
1224       nir_shader_add_variable(nir, in);
1225
1226       if (var->name)
1227          snprintf(name, sizeof(name), "out_%s", var->name);
1228       else
1229          snprintf(name, sizeof(name), "out_%d", var->data.driver_location);
1230
1231       nir_variable *out = nir_variable_clone(var, nir);
1232       ralloc_free(out->name);
1233       out->name = ralloc_strdup(out, name);
1234       out->data.mode = nir_var_shader_out;
1235       nir_shader_add_variable(nir, out);
1236
1237       in_vars[num_vars] = in;
1238       out_vars[num_vars++] = out;
1239    }
1240
1241    int mapping_first[] = {0, 1, 2, 0, 2, 3};
1242    int mapping_last[] = {0, 1, 3, 1, 2, 3};
1243    nir_ssa_def *last_pv_vert_def = nir_load_provoking_last(&b);
1244    last_pv_vert_def = nir_ine_imm(&b, last_pv_vert_def, 0);
1245    for (unsigned i = 0; i < 6; ++i) {
1246       /* swap indices 2 and 3 */
1247       nir_ssa_def *idx = nir_bcsel(&b, last_pv_vert_def,
1248                                    nir_imm_int(&b, mapping_last[i]),
1249                                    nir_imm_int(&b, mapping_first[i]));
1250       /* Copy inputs to outputs. */
1251       for (unsigned j = 0; j < num_vars; ++j) {
1252          if (in_vars[j]->data.location == VARYING_SLOT_EDGE) {
1253             continue;
1254          }
1255          nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in_vars[j]), idx);
1256          copy_vars(&b, nir_build_deref_var(&b, out_vars[j]), in_value);
1257       }
1258       nir_emit_vertex(&b, 0);
1259       if (i == 2)
1260         nir_end_primitive(&b, 0);
1261    }
1262
1263    nir_end_primitive(&b, 0);
1264    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1265    nir_validate_shader(nir, "in zink_create_quads_emulation_gs");
1266    return nir;
1267 }
1268
1269 static bool
1270 lower_system_values_to_inlined_uniforms_instr(nir_builder *b, nir_instr *instr, void *data)
1271 {
1272    if (instr->type != nir_instr_type_intrinsic)
1273       return false;
1274
1275    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1276
1277    int inlined_uniform_offset;
1278    switch (intrin->intrinsic) {
1279    case nir_intrinsic_load_flat_mask:
1280       inlined_uniform_offset = ZINK_INLINE_VAL_FLAT_MASK * sizeof(uint32_t);
1281       break;
1282    case nir_intrinsic_load_provoking_last:
1283       inlined_uniform_offset = ZINK_INLINE_VAL_PV_LAST_VERT * sizeof(uint32_t);
1284       break;
1285    default:
1286       return false;
1287    }
1288
1289    b->cursor = nir_before_instr(&intrin->instr);
1290    nir_ssa_def *new_dest_def = nir_load_ubo(b, 1, 32, nir_imm_int(b, 0),
1291                                             nir_imm_int(b, inlined_uniform_offset),
1292                                             .align_mul = 4, .align_offset = 0,
1293                                             .range_base = 0, .range = ~0);
1294    nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_dest_def);
1295    nir_instr_remove(instr);
1296    return true;
1297 }
1298
1299 bool
1300 zink_lower_system_values_to_inlined_uniforms(nir_shader *nir)
1301 {
1302    return nir_shader_instructions_pass(nir, lower_system_values_to_inlined_uniforms_instr,
1303                                        nir_metadata_dominance, NULL);
1304 }
1305
1306 void
1307 zink_screen_init_compiler(struct zink_screen *screen)
1308 {
1309    static const struct nir_shader_compiler_options
1310    default_options = {
1311       .lower_ffma16 = true,
1312       .lower_ffma32 = true,
1313       .lower_ffma64 = true,
1314       .lower_scmp = true,
1315       .lower_fdph = true,
1316       .lower_flrp32 = true,
1317       .lower_fpow = true,
1318       .lower_fsat = true,
1319       .lower_extract_byte = true,
1320       .lower_extract_word = true,
1321       .lower_insert_byte = true,
1322       .lower_insert_word = true,
1323
1324       /* We can only support 32-bit ldexp, but NIR doesn't have a flag
1325        * distinguishing 64-bit ldexp support (radeonsi *does* support 64-bit
1326        * ldexp, so we don't just always lower it in NIR).  Given that ldexp is
1327        * effectively unused (no instances in shader-db), it's not worth the
1328        * effort to do so.
1329        * */
1330       .lower_ldexp = true,
1331
1332       .lower_mul_high = true,
1333       .lower_rotate = true,
1334       .lower_uadd_carry = true,
1335       .lower_usub_borrow = true,
1336       .lower_uadd_sat = true,
1337       .lower_usub_sat = true,
1338       .lower_vector_cmp = true,
1339       .lower_int64_options = 0,
1340       .lower_doubles_options = 0,
1341       .lower_uniforms_to_ubo = true,
1342       .has_fsub = true,
1343       .has_isub = true,
1344       .has_txs = true,
1345       .lower_mul_2x32_64 = true,
1346       .support_16bit_alu = true, /* not quite what it sounds like */
1347       .max_unroll_iterations = 0,
1348    };
1349
1350    screen->nir_options = default_options;
1351
1352    if (!screen->info.feats.features.shaderInt64)
1353       screen->nir_options.lower_int64_options = ~0;
1354
1355    if (!screen->info.feats.features.shaderFloat64) {
1356       screen->nir_options.lower_doubles_options = ~0;
1357       screen->nir_options.lower_flrp64 = true;
1358       screen->nir_options.lower_ffma64 = true;
1359       /* soft fp64 function inlining will blow up loop bodies and effectively
1360        * stop Vulkan drivers from unrolling the loops.
1361        */
1362       screen->nir_options.max_unroll_iterations_fp64 = 32;
1363    }
1364
1365    /*
1366        The OpFRem and OpFMod instructions use cheap approximations of remainder,
1367        and the error can be large due to the discontinuity in trunc() and floor().
1368        This can produce mathematically unexpected results in some cases, such as
1369        FMod(x,x) computing x rather than 0, and can also cause the result to have
1370        a different sign than the infinitely precise result.
1371
1372        -Table 84. Precision of core SPIR-V Instructions
1373        * for drivers that are known to have imprecise fmod for doubles, lower dmod
1374     */
1375    if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV ||
1376        screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
1377        screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY)
1378       screen->nir_options.lower_doubles_options = nir_lower_dmod;
1379 }
1380
1381 const void *
1382 zink_get_compiler_options(struct pipe_screen *pscreen,
1383                           enum pipe_shader_ir ir,
1384                           gl_shader_stage shader)
1385 {
1386    assert(ir == PIPE_SHADER_IR_NIR);
1387    return &zink_screen(pscreen)->nir_options;
1388 }
1389
1390 struct nir_shader *
1391 zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
1392 {
1393    if (zink_debug & ZINK_DEBUG_TGSI) {
1394       fprintf(stderr, "TGSI shader:\n---8<---\n");
1395       tgsi_dump_to_file(tokens, 0, stderr);
1396       fprintf(stderr, "---8<---\n\n");
1397    }
1398
1399    return tgsi_to_nir(tokens, screen, false);
1400 }
1401
1402
1403 static bool
1404 dest_is_64bit(nir_dest *dest, void *state)
1405 {
1406    bool *lower = (bool *)state;
1407    if (dest && (nir_dest_bit_size(*dest) == 64)) {
1408       *lower = true;
1409       return false;
1410    }
1411    return true;
1412 }
1413
1414 static bool
1415 src_is_64bit(nir_src *src, void *state)
1416 {
1417    bool *lower = (bool *)state;
1418    if (src && (nir_src_bit_size(*src) == 64)) {
1419       *lower = true;
1420       return false;
1421    }
1422    return true;
1423 }
1424
1425 static bool
1426 filter_64_bit_instr(const nir_instr *const_instr, UNUSED const void *data)
1427 {
1428    bool lower = false;
1429    /* lower_alu_to_scalar required nir_instr to be const, but nir_foreach_*
1430     * doesn't have const variants, so do the ugly const_cast here. */
1431    nir_instr *instr = (nir_instr *)const_instr;
1432
1433    nir_foreach_dest(instr, dest_is_64bit, &lower);
1434    if (lower)
1435       return true;
1436    nir_foreach_src(instr, src_is_64bit, &lower);
1437    return lower;
1438 }
1439
1440 static bool
1441 filter_pack_instr(const nir_instr *const_instr, UNUSED const void *data)
1442 {
1443    nir_instr *instr = (nir_instr *)const_instr;
1444    nir_alu_instr *alu = nir_instr_as_alu(instr);
1445    switch (alu->op) {
1446    case nir_op_pack_64_2x32_split:
1447    case nir_op_pack_32_2x16_split:
1448    case nir_op_unpack_32_2x16_split_x:
1449    case nir_op_unpack_32_2x16_split_y:
1450    case nir_op_unpack_64_2x32_split_x:
1451    case nir_op_unpack_64_2x32_split_y:
1452       return true;
1453    default:
1454       break;
1455    }
1456    return false;
1457 }
1458
1459
1460 struct bo_vars {
1461    nir_variable *uniforms[5];
1462    nir_variable *ubo[5];
1463    nir_variable *ssbo[5];
1464    uint32_t first_ubo;
1465    uint32_t first_ssbo;
1466 };
1467
1468 static struct bo_vars
1469 get_bo_vars(struct zink_shader *zs, nir_shader *shader)
1470 {
1471    struct bo_vars bo;
1472    memset(&bo, 0, sizeof(bo));
1473    if (zs->ubos_used)
1474       bo.first_ubo = ffs(zs->ubos_used & ~BITFIELD_BIT(0)) - 2;
1475    assert(bo.first_ssbo < PIPE_MAX_CONSTANT_BUFFERS);
1476    if (zs->ssbos_used)
1477       bo.first_ssbo = ffs(zs->ssbos_used) - 1;
1478    assert(bo.first_ssbo < PIPE_MAX_SHADER_BUFFERS);
1479    nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
1480       unsigned idx = glsl_get_explicit_stride(glsl_get_struct_field(glsl_without_array(var->type), 0)) >> 1;
1481       if (var->data.mode == nir_var_mem_ssbo) {
1482          assert(!bo.ssbo[idx]);
1483          bo.ssbo[idx] = var;
1484       } else {
1485          if (var->data.driver_location) {
1486             assert(!bo.ubo[idx]);
1487             bo.ubo[idx] = var;
1488          } else {
1489             assert(!bo.uniforms[idx]);
1490             bo.uniforms[idx] = var;
1491          }
1492       }
1493    }
1494    return bo;
1495 }
1496
1497 static bool
1498 bound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
1499 {
1500    struct bo_vars *bo = data;
1501    if (instr->type != nir_instr_type_intrinsic)
1502       return false;
1503    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1504    nir_variable *var = NULL;
1505    nir_ssa_def *offset = NULL;
1506    bool is_load = true;
1507    b->cursor = nir_before_instr(instr);
1508
1509    switch (intr->intrinsic) {
1510    case nir_intrinsic_store_ssbo:
1511       var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
1512       offset = intr->src[2].ssa;
1513       is_load = false;
1514       break;
1515    case nir_intrinsic_load_ssbo:
1516       var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
1517       offset = intr->src[1].ssa;
1518       break;
1519    case nir_intrinsic_load_ubo:
1520       if (nir_src_is_const(intr->src[0]) && nir_src_as_const_value(intr->src[0])->u32 == 0)
1521          var = bo->uniforms[nir_dest_bit_size(intr->dest) >> 4];
1522       else
1523          var = bo->ubo[nir_dest_bit_size(intr->dest) >> 4];
1524       offset = intr->src[1].ssa;
1525       break;
1526    default:
1527       return false;
1528    }
1529    nir_src offset_src = nir_src_for_ssa(offset);
1530    if (!nir_src_is_const(offset_src))
1531       return false;
1532
1533    unsigned offset_bytes = nir_src_as_const_value(offset_src)->u32;
1534    const struct glsl_type *strct_type = glsl_get_array_element(var->type);
1535    unsigned size = glsl_array_size(glsl_get_struct_field(strct_type, 0));
1536    bool has_unsized = glsl_array_size(glsl_get_struct_field(strct_type, glsl_get_length(strct_type) - 1)) == 0;
1537    if (has_unsized || offset_bytes + intr->num_components - 1 < size)
1538       return false;
1539
1540    unsigned rewrites = 0;
1541    nir_ssa_def *result[2];
1542    for (unsigned i = 0; i < intr->num_components; i++) {
1543       if (offset_bytes + i >= size) {
1544          rewrites++;
1545          if (is_load)
1546             result[i] = nir_imm_zero(b, 1, nir_dest_bit_size(intr->dest));
1547       }
1548    }
1549    assert(rewrites == intr->num_components);
1550    if (is_load) {
1551       nir_ssa_def *load = nir_vec(b, result, intr->num_components);
1552       nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1553    }
1554    nir_instr_remove(instr);
1555    return true;
1556 }
1557
1558 static bool
1559 bound_bo_access(nir_shader *shader, struct zink_shader *zs)
1560 {
1561    struct bo_vars bo = get_bo_vars(zs, shader);
1562    return nir_shader_instructions_pass(shader, bound_bo_access_instr, nir_metadata_dominance, &bo);
1563 }
1564
1565 static void
1566 optimize_nir(struct nir_shader *s, struct zink_shader *zs)
1567 {
1568    bool progress;
1569    do {
1570       progress = false;
1571       if (s->options->lower_int64_options)
1572          NIR_PASS_V(s, nir_lower_int64);
1573       if (s->options->lower_doubles_options & nir_lower_fp64_full_software)
1574          NIR_PASS_V(s, lower_64bit_pack);
1575       NIR_PASS_V(s, nir_lower_vars_to_ssa);
1576       NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_pack_instr, NULL);
1577       NIR_PASS(progress, s, nir_opt_copy_prop_vars);
1578       NIR_PASS(progress, s, nir_copy_prop);
1579       NIR_PASS(progress, s, nir_opt_remove_phis);
1580       if (s->options->lower_int64_options) {
1581          NIR_PASS(progress, s, nir_lower_64bit_phis);
1582          NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_64_bit_instr, NULL);
1583       }
1584       NIR_PASS(progress, s, nir_opt_dce);
1585       NIR_PASS(progress, s, nir_opt_dead_cf);
1586       NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
1587       NIR_PASS(progress, s, nir_opt_cse);
1588       NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
1589       NIR_PASS(progress, s, nir_opt_algebraic);
1590       NIR_PASS(progress, s, nir_opt_constant_folding);
1591       NIR_PASS(progress, s, nir_opt_undef);
1592       NIR_PASS(progress, s, zink_nir_lower_b2b);
1593       if (zs)
1594          NIR_PASS(progress, s, bound_bo_access, zs);
1595    } while (progress);
1596
1597    do {
1598       progress = false;
1599       NIR_PASS(progress, s, nir_opt_algebraic_late);
1600       if (progress) {
1601          NIR_PASS_V(s, nir_copy_prop);
1602          NIR_PASS_V(s, nir_opt_dce);
1603          NIR_PASS_V(s, nir_opt_cse);
1604       }
1605    } while (progress);
1606 }
1607
1608 /* - copy the lowered fbfetch variable
1609  * - set the new one up as an input attachment for descriptor 0.6
1610  * - load it as an image
1611  * - overwrite the previous load
1612  */
1613 static bool
1614 lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
1615 {
1616    bool ms = data != NULL;
1617    if (instr->type != nir_instr_type_intrinsic)
1618       return false;
1619    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1620    if (intr->intrinsic != nir_intrinsic_load_deref)
1621       return false;
1622    nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
1623    if (!var->data.fb_fetch_output)
1624       return false;
1625    b->cursor = nir_after_instr(instr);
1626    nir_variable *fbfetch = nir_variable_clone(var, b->shader);
1627    /* If Dim is SubpassData, ... Image Format must be Unknown
1628     * - SPIRV OpTypeImage specification
1629     */
1630    fbfetch->data.image.format = 0;
1631    fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */
1632    fbfetch->data.mode = nir_var_uniform;
1633    fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1634    fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1635    fbfetch->data.sample = ms;
1636    enum glsl_sampler_dim dim = ms ? GLSL_SAMPLER_DIM_SUBPASS_MS : GLSL_SAMPLER_DIM_SUBPASS;
1637    fbfetch->type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
1638    nir_shader_add_variable(b->shader, fbfetch);
1639    nir_ssa_def *deref = &nir_build_deref_var(b, fbfetch)->dest.ssa;
1640    nir_ssa_def *sample = ms ? nir_load_sample_id(b) : nir_ssa_undef(b, 1, 32);
1641    nir_ssa_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0));
1642    nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1643    return true;
1644 }
1645
1646 static bool
1647 lower_fbfetch(nir_shader *shader, nir_variable **fbfetch, bool ms)
1648 {
1649    nir_foreach_shader_out_variable(var, shader) {
1650       if (var->data.fb_fetch_output) {
1651          *fbfetch = var;
1652          break;
1653       }
1654    }
1655    assert(*fbfetch);
1656    if (!*fbfetch)
1657       return false;
1658    return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, (void*)ms);
1659 }
1660
1661 /*
1662  * Add a check for out of bounds LOD for every texel fetch op
1663  * It boils down to:
1664  * - if (lod < query_levels(tex))
1665  * -    res = txf(tex)
1666  * - else
1667  * -    res = (0, 0, 0, 1)
1668  */
1669 static bool
1670 lower_txf_lod_robustness_instr(nir_builder *b, nir_instr *in, void *data)
1671 {
1672    if (in->type != nir_instr_type_tex)
1673       return false;
1674    nir_tex_instr *txf = nir_instr_as_tex(in);
1675    if (txf->op != nir_texop_txf)
1676       return false;
1677
1678    b->cursor = nir_before_instr(in);
1679    int lod_idx = nir_tex_instr_src_index(txf, nir_tex_src_lod);
1680    assert(lod_idx >= 0);
1681    nir_src lod_src = txf->src[lod_idx].src;
1682    if (nir_src_is_const(lod_src) && nir_src_as_const_value(lod_src)->u32 == 0)
1683       return false;
1684
1685    assert(lod_src.is_ssa);
1686    nir_ssa_def *lod = lod_src.ssa;
1687
1688    int offset_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_offset);
1689    int handle_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_handle);
1690    nir_tex_instr *levels = nir_tex_instr_create(b->shader,
1691                                                 !!(offset_idx >= 0) + !!(handle_idx >= 0));
1692    levels->op = nir_texop_query_levels;
1693    levels->texture_index = txf->texture_index;
1694    levels->dest_type = nir_type_int | lod->bit_size;
1695    if (offset_idx >= 0) {
1696       levels->src[0].src_type = nir_tex_src_texture_offset;
1697       nir_src_copy(&levels->src[0].src, &txf->src[offset_idx].src, &levels->instr);
1698    }
1699    if (handle_idx >= 0) {
1700       levels->src[!!(offset_idx >= 0)].src_type = nir_tex_src_texture_handle;
1701       nir_src_copy(&levels->src[!!(offset_idx >= 0)].src, &txf->src[handle_idx].src, &levels->instr);
1702    }
1703    nir_ssa_dest_init(&levels->instr, &levels->dest,
1704                      nir_tex_instr_dest_size(levels), 32, NULL);
1705    nir_builder_instr_insert(b, &levels->instr);
1706
1707    nir_if *lod_oob_if = nir_push_if(b, nir_ilt(b, lod, &levels->dest.ssa));
1708    nir_tex_instr *new_txf = nir_instr_as_tex(nir_instr_clone(b->shader, in));
1709    nir_builder_instr_insert(b, &new_txf->instr);
1710
1711    nir_if *lod_oob_else = nir_push_else(b, lod_oob_if);
1712    nir_const_value oob_values[4] = {0};
1713    unsigned bit_size = nir_alu_type_get_type_size(txf->dest_type);
1714    oob_values[3] = (txf->dest_type & nir_type_float) ?
1715                    nir_const_value_for_float(1.0, bit_size) : nir_const_value_for_uint(1, bit_size);
1716    nir_ssa_def *oob_val = nir_build_imm(b, nir_tex_instr_dest_size(txf), bit_size, oob_values);
1717
1718    nir_pop_if(b, lod_oob_else);
1719    nir_ssa_def *robust_txf = nir_if_phi(b, &new_txf->dest.ssa, oob_val);
1720
1721    nir_ssa_def_rewrite_uses(&txf->dest.ssa, robust_txf);
1722    nir_instr_remove_v(in);
1723    return true;
1724 }
1725
1726 /* This pass is used to workaround the lack of out of bounds LOD robustness
1727  * for texel fetch ops in VK_EXT_image_robustness.
1728  */
1729 static bool
1730 lower_txf_lod_robustness(nir_shader *shader)
1731 {
1732    return nir_shader_instructions_pass(shader, lower_txf_lod_robustness_instr, nir_metadata_none, NULL);
1733 }
1734
1735 /* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
1736 static bool
1737 check_psiz(struct nir_shader *s)
1738 {
1739    bool have_psiz = false;
1740    nir_foreach_shader_out_variable(var, s) {
1741       if (var->data.location == VARYING_SLOT_PSIZ) {
1742          /* genuine PSIZ outputs will have this set */
1743          have_psiz |= !!var->data.explicit_location;
1744       }
1745    }
1746    return have_psiz;
1747 }
1748
1749 static nir_variable *
1750 find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz)
1751 {
1752    assert((int)location >= 0);
1753
1754    unsigned found = 0;
1755    if (!location_frac && location != VARYING_SLOT_PSIZ) {
1756       nir_foreach_shader_out_variable(var, nir) {
1757          if (var->data.location == location)
1758             found++;
1759       }
1760    }
1761    if (found) {
1762       /* multiple variables found for this location: find the biggest one */
1763       nir_variable *out = NULL;
1764       unsigned slots = 0;
1765       nir_foreach_shader_out_variable(var, nir) {
1766          if (var->data.location == location) {
1767             unsigned count_slots = glsl_count_vec4_slots(var->type, false, false);
1768             if (count_slots > slots) {
1769                slots = count_slots;
1770                out = var;
1771             }
1772          }
1773       }
1774       return out;
1775    } else {
1776       /* only one variable found or this is location_frac */
1777       nir_foreach_shader_out_variable(var, nir) {
1778          if (var->data.location == location &&
1779              (var->data.location_frac == location_frac ||
1780               (glsl_type_is_array(var->type) ? glsl_array_size(var->type) : glsl_get_vector_elements(var->type)) >= location_frac + 1)) {
1781             if (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location)
1782                return var;
1783          }
1784       }
1785    }
1786    return NULL;
1787 }
1788
1789 static bool
1790 is_inlined(const bool *inlined, const struct pipe_stream_output *output)
1791 {
1792    for (unsigned i = 0; i < output->num_components; i++)
1793       if (!inlined[output->start_component + i])
1794          return false;
1795    return true;
1796 }
1797
1798 static void
1799 update_psiz_location(nir_shader *nir, nir_variable *psiz)
1800 {
1801    uint32_t last_output = util_last_bit64(nir->info.outputs_written);
1802    if (last_output < VARYING_SLOT_VAR0)
1803       last_output = VARYING_SLOT_VAR0;
1804    else
1805       last_output++;
1806    /* this should get fixed up by slot remapping */
1807    psiz->data.location = last_output;
1808 }
1809
1810 static const struct glsl_type *
1811 clamp_slot_type(const struct glsl_type *type, unsigned slot)
1812 {
1813    /* could be dvec/dmat/mat: each member is the same */
1814    const struct glsl_type *plain = glsl_without_array_or_matrix(type);
1815    /* determine size of each member type */
1816    unsigned slot_count = glsl_count_vec4_slots(plain, false, false);
1817    /* normalize slot idx to current type's size */
1818    slot %= slot_count;
1819    unsigned slot_components = glsl_get_components(plain);
1820    if (glsl_base_type_is_64bit(glsl_get_base_type(plain)))
1821       slot_components *= 2;
1822    /* create a vec4 mask of the selected slot's components out of all the components */
1823    uint32_t mask = BITFIELD_MASK(slot_components) & BITFIELD_RANGE(slot * 4, 4);
1824    /* return a vecN of the selected components */
1825    slot_components = util_bitcount(mask);
1826    return glsl_vec_type(slot_components);
1827 }
1828
1829 static const struct glsl_type *
1830 unroll_struct_type(const struct glsl_type *slot_type, unsigned *slot_idx)
1831 {
1832    const struct glsl_type *type = slot_type;
1833    unsigned slot_count = 0;
1834    unsigned cur_slot = 0;
1835    /* iterate over all the members in the struct, stopping once the slot idx is reached */
1836    for (unsigned i = 0; i < glsl_get_length(slot_type) && cur_slot <= *slot_idx; i++, cur_slot += slot_count) {
1837       /* use array type for slot counting but return array member type for unroll */
1838       const struct glsl_type *arraytype = glsl_get_struct_field(slot_type, i);
1839       type = glsl_without_array(arraytype);
1840       slot_count = glsl_count_vec4_slots(arraytype, false, false);
1841    }
1842    *slot_idx -= (cur_slot - slot_count);
1843    if (!glsl_type_is_struct_or_ifc(type))
1844       /* this is a fully unrolled struct: find the number of vec components to output */
1845       type = clamp_slot_type(type, *slot_idx);
1846    return type;
1847 }
1848
1849 static unsigned
1850 get_slot_components(nir_variable *var, unsigned slot, unsigned so_slot)
1851 {
1852    assert(var && slot < var->data.location + glsl_count_vec4_slots(var->type, false, false));
1853    const struct glsl_type *orig_type = var->type;
1854    const struct glsl_type *type = glsl_without_array(var->type);
1855    unsigned slot_idx = slot - so_slot;
1856    if (type != orig_type)
1857       slot_idx %= glsl_count_vec4_slots(type, false, false);
1858    /* need to find the vec4 that's being exported by this slot */
1859    while (glsl_type_is_struct_or_ifc(type))
1860       type = unroll_struct_type(type, &slot_idx);
1861
1862    /* arrays here are already fully unrolled from their structs, so slot handling is implicit */
1863    unsigned num_components = glsl_get_components(glsl_without_array(type));
1864    /* special handling: clip/cull distance are arrays with vector semantics */
1865    if (var->data.location == VARYING_SLOT_CLIP_DIST0 || var->data.location == VARYING_SLOT_CULL_DIST0) {
1866       num_components = glsl_array_size(type);
1867       if (slot_idx)
1868          /* this is the second vec4 */
1869          num_components %= 4;
1870       else
1871          /* this is the first vec4 */
1872          num_components = MIN2(num_components, 4);
1873    }
1874    assert(num_components);
1875    /* gallium handles xfb in terms of 32bit units */
1876    if (glsl_base_type_is_64bit(glsl_get_base_type(glsl_without_array(type))))
1877       num_components *= 2;
1878    return num_components;
1879 }
1880
1881 static const struct pipe_stream_output *
1882 find_packed_output(const struct pipe_stream_output_info *so_info, uint8_t *reverse_map, unsigned slot)
1883 {
1884    for (unsigned i = 0; i < so_info->num_outputs; i++) {
1885       const struct pipe_stream_output *packed_output = &so_info->output[i];
1886       if (reverse_map[packed_output->register_index] == slot)
1887          return packed_output;
1888    }
1889    return NULL;
1890 }
1891
1892 static void
1893 update_so_info(struct zink_shader *zs, nir_shader *nir, const struct pipe_stream_output_info *so_info,
1894                uint64_t outputs_written, bool have_psiz)
1895 {
1896    uint8_t reverse_map[VARYING_SLOT_MAX] = {0};
1897    unsigned slot = 0;
1898    /* semi-copied from iris */
1899    while (outputs_written) {
1900       int bit = u_bit_scan64(&outputs_written);
1901       /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */
1902       if (bit == VARYING_SLOT_PSIZ && !have_psiz)
1903          continue;
1904       reverse_map[slot++] = bit;
1905    }
1906
1907    bool have_fake_psiz = false;
1908    nir_foreach_shader_out_variable(var, nir) {
1909       if (var->data.location == VARYING_SLOT_PSIZ && !var->data.explicit_location)
1910          have_fake_psiz = true;
1911    }
1912
1913    bool inlined[VARYING_SLOT_MAX][4] = {0};
1914    uint64_t packed = 0;
1915    uint8_t packed_components[VARYING_SLOT_MAX] = {0};
1916    uint8_t packed_streams[VARYING_SLOT_MAX] = {0};
1917    uint8_t packed_buffers[VARYING_SLOT_MAX] = {0};
1918    uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0};
1919    nir_variable *psiz = NULL;
1920    for (unsigned i = 0; i < so_info->num_outputs; i++) {
1921       const struct pipe_stream_output *output = &so_info->output[i];
1922       unsigned slot = reverse_map[output->register_index];
1923       /* always set stride to be used during draw */
1924       zs->sinfo.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer];
1925       if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
1926          nir_variable *var = NULL;
1927          unsigned so_slot;
1928          while (!var)
1929             var = find_var_with_location_frac(nir, slot--, output->start_component, have_psiz);
1930          if (var->data.location == VARYING_SLOT_PSIZ)
1931             psiz = var;
1932          so_slot = slot + 1;
1933          slot = reverse_map[output->register_index];
1934          if (var->data.explicit_xfb_buffer) {
1935             /* handle dvec3 where gallium splits streamout over 2 registers */
1936             for (unsigned j = 0; j < output->num_components; j++)
1937                inlined[slot][output->start_component + j] = true;
1938          }
1939          if (is_inlined(inlined[slot], output))
1940             continue;
1941          bool is_struct = glsl_type_is_struct_or_ifc(glsl_without_array(var->type));
1942          unsigned num_components = get_slot_components(var, slot, so_slot);
1943          /* if this is the entire variable, try to blast it out during the initial declaration
1944           * structs must be handled later to ensure accurate analysis
1945           */
1946          if (!is_struct && (num_components == output->num_components || (num_components > output->num_components && output->num_components == 4))) {
1947             var->data.explicit_xfb_buffer = 1;
1948             var->data.xfb.buffer = output->output_buffer;
1949             var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
1950             var->data.offset = output->dst_offset * 4;
1951             var->data.stream = output->stream;
1952             for (unsigned j = 0; j < output->num_components; j++)
1953                inlined[slot][output->start_component + j] = true;
1954          } else {
1955             /* otherwise store some metadata for later */
1956             packed |= BITFIELD64_BIT(slot);
1957             packed_components[slot] += output->num_components;
1958             packed_streams[slot] |= BITFIELD_BIT(output->stream);
1959             packed_buffers[slot] |= BITFIELD_BIT(output->output_buffer);
1960             for (unsigned j = 0; j < output->num_components; j++)
1961                packed_offsets[output->register_index][j + output->start_component] = output->dst_offset + j;
1962          }
1963       }
1964    }
1965
1966    /* if this was flagged as a packed output before, and if all the components are
1967     * being output with the same stream on the same buffer with increasing offsets, this entire variable
1968     * can be consolidated into a single output to conserve locations
1969     */
1970    for (unsigned i = 0; i < so_info->num_outputs; i++) {
1971       const struct pipe_stream_output *output = &so_info->output[i];
1972       unsigned slot = reverse_map[output->register_index];
1973       if (is_inlined(inlined[slot], output))
1974          continue;
1975       if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
1976          nir_variable *var = NULL;
1977          while (!var)
1978             var = find_var_with_location_frac(nir, slot--, output->start_component, have_psiz);
1979          /* this is a lowered 64bit variable that can't be exported due to packing */
1980          if (var->data.is_xfb)
1981             goto out;
1982
1983          unsigned num_slots = glsl_count_vec4_slots(var->type, false, false);
1984          /* for each variable, iterate over all the variable's slots and inline the outputs */
1985          for (unsigned j = 0; j < num_slots; j++) {
1986             slot = var->data.location + j;
1987             const struct pipe_stream_output *packed_output = find_packed_output(so_info, reverse_map, slot);
1988             if (!packed_output)
1989                goto out;
1990
1991             /* if this slot wasn't packed or isn't in the same stream/buffer, skip consolidation */
1992             if (!(packed & BITFIELD64_BIT(slot)) ||
1993                 util_bitcount(packed_streams[slot]) != 1 ||
1994                 util_bitcount(packed_buffers[slot]) != 1)
1995                goto out;
1996
1997             /* if all the components the variable exports to this slot aren't captured, skip consolidation */
1998             unsigned num_components = get_slot_components(var, slot, var->data.location);
1999             if (num_components != packed_components[slot])
2000                goto out;
2001
2002             /* in order to pack the xfb output, all the offsets must be sequentially incrementing */
2003             uint32_t prev_offset = packed_offsets[packed_output->register_index][0];
2004             for (unsigned k = 1; k < num_components; k++) {
2005                /* if the offsets are not incrementing as expected, skip consolidation */
2006                if (packed_offsets[packed_output->register_index][k] != prev_offset + 1)
2007                   goto out;
2008                prev_offset = packed_offsets[packed_output->register_index][k + packed_output->start_component];
2009             }
2010          }
2011          /* this output can be consolidated: blast out all the data inlined */
2012          var->data.explicit_xfb_buffer = 1;
2013          var->data.xfb.buffer = output->output_buffer;
2014          var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
2015          var->data.offset = output->dst_offset * 4;
2016          var->data.stream = output->stream;
2017          /* GLSL specifies that interface blocks are split per-buffer in XFB */
2018          if (glsl_type_is_array(var->type) && glsl_array_size(var->type) > 1 && glsl_type_is_interface(glsl_without_array(var->type)))
2019             zs->sinfo.so_propagate |= BITFIELD_BIT(var->data.location - VARYING_SLOT_VAR0);
2020          /* mark all slot components inlined to skip subsequent loop iterations */
2021          for (unsigned j = 0; j < num_slots; j++) {
2022             slot = var->data.location + j;
2023             for (unsigned k = 0; k < packed_components[slot]; k++)
2024                inlined[slot][k] = true;
2025             packed &= ~BITFIELD64_BIT(slot);
2026          }
2027          continue;
2028       }
2029 out:
2030       /* these are packed/explicit varyings which can't be exported with normal output */
2031       zs->sinfo.so_info.output[zs->sinfo.so_info.num_outputs] = *output;
2032       /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
2033       zs->sinfo.so_info_slots[zs->sinfo.so_info.num_outputs++] = reverse_map[output->register_index];
2034    }
2035    zs->sinfo.have_xfb = zs->sinfo.so_info.num_outputs || zs->sinfo.so_propagate;
2036    /* ensure this doesn't get output in the shader by unsetting location */
2037    if (have_fake_psiz && psiz)
2038       update_psiz_location(nir, psiz);
2039 }
2040
2041 struct decompose_state {
2042   nir_variable **split;
2043   bool needs_w;
2044 };
2045
2046 static bool
2047 lower_attrib(nir_builder *b, nir_instr *instr, void *data)
2048 {
2049    struct decompose_state *state = data;
2050    nir_variable **split = state->split;
2051    if (instr->type != nir_instr_type_intrinsic)
2052       return false;
2053    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2054    if (intr->intrinsic != nir_intrinsic_load_deref)
2055       return false;
2056    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2057    nir_variable *var = nir_deref_instr_get_variable(deref);
2058    if (var != split[0])
2059       return false;
2060    unsigned num_components = glsl_get_vector_elements(split[0]->type);
2061    b->cursor = nir_after_instr(instr);
2062    nir_ssa_def *loads[4];
2063    for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
2064       loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
2065    if (state->needs_w) {
2066       /* oob load w comopnent to get correct value for int/float */
2067       loads[3] = nir_channel(b, loads[0], 3);
2068       loads[0] = nir_channel(b, loads[0], 0);
2069    }
2070    nir_ssa_def *new_load = nir_vec(b, loads, num_components);
2071    nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load);
2072    nir_instr_remove_v(instr);
2073    return true;
2074 }
2075
2076 static bool
2077 decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w)
2078 {
2079    uint32_t bits = 0;
2080    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in)
2081       bits |= BITFIELD_BIT(var->data.driver_location);
2082    bits = ~bits;
2083    u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) {
2084       nir_variable *split[5];
2085       struct decompose_state state;
2086       state.split = split;
2087       nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location);
2088       assert(var);
2089       split[0] = var;
2090       bits |= BITFIELD_BIT(var->data.driver_location);
2091       const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type);
2092       unsigned num_components = glsl_get_vector_elements(var->type);
2093       state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4;
2094       for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) {
2095          split[i+1] = nir_variable_clone(var, nir);
2096          split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i);
2097          if (decomposed_attrs_without_w & BITFIELD_BIT(location))
2098             split[i+1]->type = !i && num_components == 4 ? var->type : new_type;
2099          else
2100             split[i+1]->type = new_type;
2101          split[i+1]->data.driver_location = ffs(bits) - 1;
2102          bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location);
2103          nir_shader_add_variable(nir, split[i+1]);
2104       }
2105       var->data.mode = nir_var_shader_temp;
2106       nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state);
2107    }
2108    nir_fixup_deref_modes(nir);
2109    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2110    optimize_nir(nir, NULL);
2111    return true;
2112 }
2113
2114 static bool
2115 rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2116 {
2117    struct zink_screen *screen = data;
2118    const bool has_int64 = screen->info.feats.features.shaderInt64;
2119    if (instr->type != nir_instr_type_intrinsic)
2120       return false;
2121    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2122    b->cursor = nir_before_instr(instr);
2123    switch (intr->intrinsic) {
2124    case nir_intrinsic_ssbo_atomic_fadd:
2125    case nir_intrinsic_ssbo_atomic_add:
2126    case nir_intrinsic_ssbo_atomic_umin:
2127    case nir_intrinsic_ssbo_atomic_imin:
2128    case nir_intrinsic_ssbo_atomic_umax:
2129    case nir_intrinsic_ssbo_atomic_imax:
2130    case nir_intrinsic_ssbo_atomic_and:
2131    case nir_intrinsic_ssbo_atomic_or:
2132    case nir_intrinsic_ssbo_atomic_xor:
2133    case nir_intrinsic_ssbo_atomic_exchange:
2134    case nir_intrinsic_ssbo_atomic_comp_swap: {
2135       /* convert offset to uintN_t[idx] */
2136       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, nir_dest_bit_size(intr->dest) / 8);
2137       nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2138       return true;
2139    }
2140    case nir_intrinsic_load_ssbo:
2141    case nir_intrinsic_load_ubo: {
2142       /* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */
2143       bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo &&
2144                         nir_src_is_const(intr->src[0]) &&
2145                         nir_src_as_uint(intr->src[0]) == 0 &&
2146                         nir_dest_bit_size(intr->dest) == 64 &&
2147                         nir_intrinsic_align_offset(intr) % 8 != 0;
2148       force_2x32 |= nir_dest_bit_size(intr->dest) == 64 && !has_int64;
2149       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
2150       nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2151       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2152       if (force_2x32) {
2153          /* this is always scalarized */
2154          assert(intr->dest.ssa.num_components == 1);
2155          /* rewrite as 2x32 */
2156          nir_ssa_def *load[2];
2157          for (unsigned i = 0; i < 2; i++) {
2158             if (intr->intrinsic == nir_intrinsic_load_ssbo)
2159                load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2160             else
2161                load[i] = nir_load_ubo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0, .range = 4);
2162             nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr));
2163          }
2164          /* cast back to 64bit */
2165          nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2166          nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
2167          nir_instr_remove(instr);
2168       }
2169       return true;
2170    }
2171    case nir_intrinsic_load_shared:
2172       b->cursor = nir_before_instr(instr);
2173       bool force_2x32 = nir_dest_bit_size(intr->dest) == 64 && !has_int64;
2174       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
2175       nir_instr_rewrite_src_ssa(instr, &intr->src[0], offset);
2176       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2177       if (force_2x32) {
2178          /* this is always scalarized */
2179          assert(intr->dest.ssa.num_components == 1);
2180          /* rewrite as 2x32 */
2181          nir_ssa_def *load[2];
2182          for (unsigned i = 0; i < 2; i++)
2183             load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0);
2184          /* cast back to 64bit */
2185          nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2186          nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
2187          nir_instr_remove(instr);
2188          return true;
2189       }
2190       break;
2191    case nir_intrinsic_store_ssbo: {
2192       b->cursor = nir_before_instr(instr);
2193       bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2194       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2195       nir_instr_rewrite_src_ssa(instr, &intr->src[2], offset);
2196       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2197       if (force_2x32) {
2198          /* this is always scalarized */
2199          assert(intr->src[0].ssa->num_components == 1);
2200          nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2201          for (unsigned i = 0; i < 2; i++)
2202             nir_store_ssbo(b, vals[i], intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0);
2203          nir_instr_remove(instr);
2204       }
2205       return true;
2206    }
2207    case nir_intrinsic_store_shared: {
2208       b->cursor = nir_before_instr(instr);
2209       bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2210       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2211       nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2212       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2213       if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) {
2214          /* this is always scalarized */
2215          assert(intr->src[0].ssa->num_components == 1);
2216          nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2217          for (unsigned i = 0; i < 2; i++)
2218             nir_store_shared(b, vals[i], nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2219          nir_instr_remove(instr);
2220       }
2221       return true;
2222    }
2223    default:
2224       break;
2225    }
2226    return false;
2227 }
2228
2229 static bool
2230 rewrite_bo_access(nir_shader *shader, struct zink_screen *screen)
2231 {
2232    return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen);
2233 }
2234
2235 static nir_variable *
2236 get_bo_var(nir_shader *shader, struct bo_vars *bo, bool ssbo, nir_src *src, unsigned bit_size)
2237 {
2238    nir_variable *var, **ptr;
2239    unsigned idx = ssbo || (nir_src_is_const(*src) && !nir_src_as_uint(*src)) ? 0 : 1;
2240
2241    if (ssbo)
2242       ptr = &bo->ssbo[bit_size >> 4];
2243    else {
2244       if (!idx) {
2245          ptr = &bo->uniforms[bit_size >> 4];
2246       } else
2247          ptr = &bo->ubo[bit_size >> 4];
2248    }
2249    var = *ptr;
2250    if (!var) {
2251       if (ssbo)
2252          var = bo->ssbo[32 >> 4];
2253       else {
2254          if (!idx)
2255             var = bo->uniforms[32 >> 4];
2256          else
2257             var = bo->ubo[32 >> 4];
2258       }
2259       var = nir_variable_clone(var, shader);
2260       if (ssbo)
2261          var->name = ralloc_asprintf(shader, "%s@%u", "ssbos", bit_size);
2262       else
2263          var->name = ralloc_asprintf(shader, "%s@%u", idx ? "ubos" : "uniform_0", bit_size);
2264       *ptr = var;
2265       nir_shader_add_variable(shader, var);
2266
2267       struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
2268       fields[0].name = ralloc_strdup(shader, "base");
2269       fields[1].name = ralloc_strdup(shader, "unsized");
2270       unsigned array_size = glsl_get_length(var->type);
2271       const struct glsl_type *bare_type = glsl_without_array(var->type);
2272       const struct glsl_type *array_type = glsl_get_struct_field(bare_type, 0);
2273       unsigned length = glsl_get_length(array_type);
2274       const struct glsl_type *type;
2275       const struct glsl_type *unsized = glsl_array_type(glsl_uintN_t_type(bit_size), 0, bit_size / 8);
2276       if (bit_size > 32) {
2277          assert(bit_size == 64);
2278          type = glsl_array_type(glsl_uintN_t_type(bit_size), length / 2, bit_size / 8);
2279       } else {
2280          type = glsl_array_type(glsl_uintN_t_type(bit_size), length * (32 / bit_size), bit_size / 8);
2281       }
2282       fields[0].type = type;
2283       fields[1].type = unsized;
2284       var->type = glsl_array_type(glsl_struct_type(fields, glsl_get_length(bare_type), "struct", false), array_size, 0);
2285       var->data.driver_location = idx;
2286    }
2287    return var;
2288 }
2289
2290 static void
2291 rewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo)
2292 {
2293    nir_intrinsic_op op;
2294    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2295    switch (intr->intrinsic) {
2296    case nir_intrinsic_ssbo_atomic_fadd:
2297       op = nir_intrinsic_deref_atomic_fadd;
2298       break;
2299    case nir_intrinsic_ssbo_atomic_fmin:
2300       op = nir_intrinsic_deref_atomic_fmin;
2301       break;
2302    case nir_intrinsic_ssbo_atomic_fmax:
2303       op = nir_intrinsic_deref_atomic_fmax;
2304       break;
2305    case nir_intrinsic_ssbo_atomic_fcomp_swap:
2306       op = nir_intrinsic_deref_atomic_fcomp_swap;
2307       break;
2308    case nir_intrinsic_ssbo_atomic_add:
2309       op = nir_intrinsic_deref_atomic_add;
2310       break;
2311    case nir_intrinsic_ssbo_atomic_umin:
2312       op = nir_intrinsic_deref_atomic_umin;
2313       break;
2314    case nir_intrinsic_ssbo_atomic_imin:
2315       op = nir_intrinsic_deref_atomic_imin;
2316       break;
2317    case nir_intrinsic_ssbo_atomic_umax:
2318       op = nir_intrinsic_deref_atomic_umax;
2319       break;
2320    case nir_intrinsic_ssbo_atomic_imax:
2321       op = nir_intrinsic_deref_atomic_imax;
2322       break;
2323    case nir_intrinsic_ssbo_atomic_and:
2324       op = nir_intrinsic_deref_atomic_and;
2325       break;
2326    case nir_intrinsic_ssbo_atomic_or:
2327       op = nir_intrinsic_deref_atomic_or;
2328       break;
2329    case nir_intrinsic_ssbo_atomic_xor:
2330       op = nir_intrinsic_deref_atomic_xor;
2331       break;
2332    case nir_intrinsic_ssbo_atomic_exchange:
2333       op = nir_intrinsic_deref_atomic_exchange;
2334       break;
2335    case nir_intrinsic_ssbo_atomic_comp_swap:
2336       op = nir_intrinsic_deref_atomic_comp_swap;
2337       break;
2338    default:
2339       unreachable("unknown intrinsic");
2340    }
2341    nir_ssa_def *offset = intr->src[1].ssa;
2342    nir_src *src = &intr->src[0];
2343    nir_variable *var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
2344    nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2345    nir_ssa_def *idx = src->ssa;
2346    if (bo->first_ssbo)
2347       idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2348    nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx);
2349    nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2350
2351    /* generate new atomic deref ops for every component */
2352    nir_ssa_def *result[4];
2353    unsigned num_components = nir_dest_num_components(intr->dest);
2354    for (unsigned i = 0; i < num_components; i++) {
2355       nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
2356       nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op);
2357       nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, 1, nir_dest_bit_size(intr->dest), "");
2358       new_instr->src[0] = nir_src_for_ssa(&deref_arr->dest.ssa);
2359       /* deref ops have no offset src, so copy the srcs after it */
2360       for (unsigned i = 2; i < nir_intrinsic_infos[intr->intrinsic].num_srcs; i++)
2361          nir_src_copy(&new_instr->src[i - 1], &intr->src[i], &new_instr->instr);
2362       nir_builder_instr_insert(b, &new_instr->instr);
2363
2364       result[i] = &new_instr->dest.ssa;
2365       offset = nir_iadd_imm(b, offset, 1);
2366    }
2367
2368    nir_ssa_def *load = nir_vec(b, result, num_components);
2369    nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
2370    nir_instr_remove(instr);
2371 }
2372
2373 static bool
2374 remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2375 {
2376    struct bo_vars *bo = data;
2377    if (instr->type != nir_instr_type_intrinsic)
2378       return false;
2379    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2380    nir_variable *var = NULL;
2381    nir_ssa_def *offset = NULL;
2382    bool is_load = true;
2383    b->cursor = nir_before_instr(instr);
2384    nir_src *src;
2385    bool ssbo = true;
2386    switch (intr->intrinsic) {
2387    case nir_intrinsic_ssbo_atomic_fadd:
2388    case nir_intrinsic_ssbo_atomic_fmin:
2389    case nir_intrinsic_ssbo_atomic_fmax:
2390    case nir_intrinsic_ssbo_atomic_fcomp_swap:
2391    case nir_intrinsic_ssbo_atomic_add:
2392    case nir_intrinsic_ssbo_atomic_umin:
2393    case nir_intrinsic_ssbo_atomic_imin:
2394    case nir_intrinsic_ssbo_atomic_umax:
2395    case nir_intrinsic_ssbo_atomic_imax:
2396    case nir_intrinsic_ssbo_atomic_and:
2397    case nir_intrinsic_ssbo_atomic_or:
2398    case nir_intrinsic_ssbo_atomic_xor:
2399    case nir_intrinsic_ssbo_atomic_exchange:
2400    case nir_intrinsic_ssbo_atomic_comp_swap:
2401       rewrite_atomic_ssbo_instr(b, instr, bo);
2402       return true;
2403    case nir_intrinsic_store_ssbo:
2404       src = &intr->src[1];
2405       var = get_bo_var(b->shader, bo, true, src, nir_src_bit_size(intr->src[0]));
2406       offset = intr->src[2].ssa;
2407       is_load = false;
2408       break;
2409    case nir_intrinsic_load_ssbo:
2410       src = &intr->src[0];
2411       var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
2412       offset = intr->src[1].ssa;
2413       break;
2414    case nir_intrinsic_load_ubo:
2415       src = &intr->src[0];
2416       var = get_bo_var(b->shader, bo, false, src, nir_dest_bit_size(intr->dest));
2417       offset = intr->src[1].ssa;
2418       ssbo = false;
2419       break;
2420    default:
2421       return false;
2422    }
2423    assert(var);
2424    assert(offset);
2425    nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2426    nir_ssa_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa;
2427    if (!ssbo && bo->first_ubo && var->data.driver_location)
2428       idx = nir_iadd_imm(b, idx, -bo->first_ubo);
2429    else if (ssbo && bo->first_ssbo)
2430       idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2431    nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, nir_i2iN(b, idx, nir_dest_bit_size(deref_var->dest)));
2432    nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2433    assert(intr->num_components <= 2);
2434    if (is_load) {
2435       nir_ssa_def *result[2];
2436       for (unsigned i = 0; i < intr->num_components; i++) {
2437          nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, nir_i2iN(b, offset, nir_dest_bit_size(deref_struct->dest)));
2438          result[i] = nir_load_deref(b, deref_arr);
2439          if (intr->intrinsic == nir_intrinsic_load_ssbo)
2440             nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr));
2441          offset = nir_iadd_imm(b, offset, 1);
2442       }
2443       nir_ssa_def *load = nir_vec(b, result, intr->num_components);
2444       nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
2445    } else {
2446       nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, nir_i2iN(b, offset, nir_dest_bit_size(deref_struct->dest)));
2447       nir_build_store_deref(b, &deref_arr->dest.ssa, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr));
2448    }
2449    nir_instr_remove(instr);
2450    return true;
2451 }
2452
2453 static bool
2454 remove_bo_access(nir_shader *shader, struct zink_shader *zs)
2455 {
2456    struct bo_vars bo = get_bo_vars(zs, shader);
2457    return nir_shader_instructions_pass(shader, remove_bo_access_instr, nir_metadata_dominance, &bo);
2458 }
2459
2460 static bool
2461 find_var_deref(nir_shader *nir, nir_variable *var)
2462 {
2463    nir_foreach_function(function, nir) {
2464       if (!function->impl)
2465          continue;
2466
2467       nir_foreach_block(block, function->impl) {
2468          nir_foreach_instr(instr, block) {
2469             if (instr->type != nir_instr_type_deref)
2470                continue;
2471             nir_deref_instr *deref = nir_instr_as_deref(instr);
2472             if (deref->deref_type == nir_deref_type_var && deref->var == var)
2473                return true;
2474          }
2475       }
2476    }
2477    return false;
2478 }
2479
2480 struct clamp_layer_output_state {
2481    nir_variable *original;
2482    nir_variable *clamped;
2483 };
2484
2485 static void
2486 clamp_layer_output_emit(nir_builder *b, struct clamp_layer_output_state *state)
2487 {
2488    nir_ssa_def *is_layered = nir_load_push_constant(b, 1, 32,
2489                                                     nir_imm_int(b, ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED),
2490                                                     .base = ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, .range = 4);
2491    nir_deref_instr *original_deref = nir_build_deref_var(b, state->original);
2492    nir_deref_instr *clamped_deref = nir_build_deref_var(b, state->clamped);
2493    nir_ssa_def *layer = nir_bcsel(b, nir_ieq_imm(b, is_layered, 1),
2494                                   nir_load_deref(b, original_deref),
2495                                   nir_imm_int(b, 0));
2496    nir_store_deref(b, clamped_deref, layer, 0);
2497 }
2498
2499 static bool
2500 clamp_layer_output_instr(nir_builder *b, nir_instr *instr, void *data)
2501 {
2502    struct clamp_layer_output_state *state = data;
2503    switch (instr->type) {
2504    case nir_instr_type_intrinsic: {
2505       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2506       if (intr->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
2507           intr->intrinsic != nir_intrinsic_emit_vertex)
2508          return false;
2509       b->cursor = nir_before_instr(instr);
2510       clamp_layer_output_emit(b, state);
2511       return true;
2512    }
2513    default: return false;
2514    }
2515 }
2516
2517 static bool
2518 clamp_layer_output(nir_shader *vs, nir_shader *fs, unsigned *next_location)
2519 {
2520    switch (vs->info.stage) {
2521    case MESA_SHADER_VERTEX:
2522    case MESA_SHADER_GEOMETRY:
2523    case MESA_SHADER_TESS_EVAL:
2524       break;
2525    default:
2526       unreachable("invalid last vertex stage!");
2527    }
2528    struct clamp_layer_output_state state = {0};
2529    state.original = nir_find_variable_with_location(vs, nir_var_shader_out, VARYING_SLOT_LAYER);
2530    if (!state.original || !find_var_deref(vs, state.original))
2531       return false;
2532    state.clamped = nir_variable_create(vs, nir_var_shader_out, glsl_int_type(), "layer_clamped");
2533    state.clamped->data.location = VARYING_SLOT_LAYER;
2534    nir_variable *fs_var = nir_find_variable_with_location(fs, nir_var_shader_in, VARYING_SLOT_LAYER);
2535    if ((state.original->data.explicit_xfb_buffer || fs_var) && *next_location < MAX_VARYING) {
2536       state.original->data.location = VARYING_SLOT_VAR0; // Anything but a built-in slot
2537       state.original->data.driver_location = (*next_location)++;
2538       if (fs_var) {
2539          fs_var->data.location = state.original->data.location;
2540          fs_var->data.driver_location = state.original->data.driver_location;
2541       }
2542    } else {
2543       if (state.original->data.explicit_xfb_buffer) {
2544          /* Will xfb the clamped output but still better than nothing */
2545          state.clamped->data.explicit_xfb_buffer = state.original->data.explicit_xfb_buffer;
2546          state.clamped->data.xfb.buffer = state.original->data.xfb.buffer;
2547          state.clamped->data.xfb.stride = state.original->data.xfb.stride;
2548          state.clamped->data.offset = state.original->data.offset;
2549          state.clamped->data.stream = state.original->data.stream;
2550       }
2551       state.original->data.mode = nir_var_shader_temp;
2552       nir_fixup_deref_modes(vs);
2553    }
2554    if (vs->info.stage == MESA_SHADER_GEOMETRY) {
2555       nir_shader_instructions_pass(vs, clamp_layer_output_instr, nir_metadata_dominance, &state);
2556    } else {
2557       nir_builder b;
2558       nir_function_impl *impl = nir_shader_get_entrypoint(vs);
2559       nir_builder_init(&b, impl);
2560       assert(impl->end_block->predecessors->entries == 1);
2561       b.cursor = nir_after_cf_list(&impl->body);
2562       clamp_layer_output_emit(&b, &state);
2563       nir_metadata_preserve(impl, nir_metadata_dominance);
2564    }
2565    optimize_nir(vs, NULL);
2566    NIR_PASS_V(vs, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2567    return true;
2568 }
2569
2570 static void
2571 assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
2572 {
2573    unsigned slot = var->data.location;
2574    switch (slot) {
2575    case -1:
2576    case VARYING_SLOT_POS:
2577    case VARYING_SLOT_PNTC:
2578    case VARYING_SLOT_PSIZ:
2579    case VARYING_SLOT_LAYER:
2580    case VARYING_SLOT_PRIMITIVE_ID:
2581    case VARYING_SLOT_CLIP_DIST0:
2582    case VARYING_SLOT_CULL_DIST0:
2583    case VARYING_SLOT_VIEWPORT:
2584    case VARYING_SLOT_FACE:
2585    case VARYING_SLOT_TESS_LEVEL_OUTER:
2586    case VARYING_SLOT_TESS_LEVEL_INNER:
2587       /* use a sentinel value to avoid counting later */
2588       var->data.driver_location = UINT_MAX;
2589       break;
2590
2591    default:
2592       if (var->data.patch) {
2593          assert(slot >= VARYING_SLOT_PATCH0);
2594          slot -= VARYING_SLOT_PATCH0;
2595       }
2596       if (slot_map[slot] == 0xff) {
2597          assert(*reserved < MAX_VARYING);
2598          unsigned num_slots;
2599          if (nir_is_arrayed_io(var, stage))
2600             num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
2601          else
2602             num_slots = glsl_count_vec4_slots(var->type, false, false);
2603          assert(*reserved + num_slots <= MAX_VARYING);
2604          for (unsigned i = 0; i < num_slots; i++)
2605             slot_map[slot + i] = (*reserved)++;
2606       }
2607       slot = slot_map[slot];
2608       assert(slot < MAX_VARYING);
2609       var->data.driver_location = slot;
2610    }
2611 }
2612
2613 ALWAYS_INLINE static bool
2614 is_texcoord(gl_shader_stage stage, const nir_variable *var)
2615 {
2616    if (stage != MESA_SHADER_FRAGMENT)
2617       return false;
2618    return var->data.location >= VARYING_SLOT_TEX0 && 
2619           var->data.location <= VARYING_SLOT_TEX7;
2620 }
2621
2622 static bool
2623 assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
2624 {
2625    unsigned slot = var->data.location;
2626    switch (slot) {
2627    case VARYING_SLOT_POS:
2628    case VARYING_SLOT_PNTC:
2629    case VARYING_SLOT_PSIZ:
2630    case VARYING_SLOT_LAYER:
2631    case VARYING_SLOT_PRIMITIVE_ID:
2632    case VARYING_SLOT_CLIP_DIST0:
2633    case VARYING_SLOT_CULL_DIST0:
2634    case VARYING_SLOT_VIEWPORT:
2635    case VARYING_SLOT_FACE:
2636    case VARYING_SLOT_TESS_LEVEL_OUTER:
2637    case VARYING_SLOT_TESS_LEVEL_INNER:
2638       /* use a sentinel value to avoid counting later */
2639       var->data.driver_location = UINT_MAX;
2640       break;
2641    default:
2642       if (var->data.patch) {
2643          assert(slot >= VARYING_SLOT_PATCH0);
2644          slot -= VARYING_SLOT_PATCH0;
2645       }
2646       if (slot_map[slot] == (unsigned char)-1) {
2647          /* texcoords can't be eliminated in fs due to GL_COORD_REPLACE,
2648           * so keep for now and eliminate later
2649           */
2650          if (is_texcoord(stage, var)) {
2651             var->data.driver_location = -1;
2652             return true;
2653          }
2654          if (stage != MESA_SHADER_TESS_CTRL)
2655             /* dead io */
2656             return false;
2657          /* patch variables may be read in the workgroup */
2658          slot_map[slot] = (*reserved)++;
2659       }
2660       var->data.driver_location = slot_map[slot];
2661    }
2662    return true;
2663 }
2664
2665
2666 static bool
2667 rewrite_read_as_0(nir_builder *b, nir_instr *instr, void *data)
2668 {
2669    nir_variable *var = data;
2670    if (instr->type != nir_instr_type_intrinsic)
2671       return false;
2672
2673    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2674    if (intr->intrinsic != nir_intrinsic_load_deref)
2675       return false;
2676    nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
2677    if (deref_var != var)
2678       return false;
2679    b->cursor = nir_before_instr(instr);
2680    nir_ssa_def *zero = nir_imm_zero(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest));
2681    if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
2682       switch (var->data.location) {
2683       case VARYING_SLOT_COL0:
2684       case VARYING_SLOT_COL1:
2685       case VARYING_SLOT_BFC0:
2686       case VARYING_SLOT_BFC1:
2687          /* default color is 0,0,0,1 */
2688          if (nir_dest_num_components(intr->dest) == 4)
2689             zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
2690          break;
2691       default:
2692          break;
2693       }
2694    }
2695    nir_ssa_def_rewrite_uses(&intr->dest.ssa, zero);
2696    nir_instr_remove(instr);
2697    return true;
2698 }
2699
2700 void
2701 zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_shader *consumer)
2702 {
2703    unsigned reserved = 0;
2704    unsigned char slot_map[VARYING_SLOT_MAX];
2705    memset(slot_map, -1, sizeof(slot_map));
2706    bool do_fixup = false;
2707    nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
2708    if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
2709       /* remove injected pointsize from all but the last vertex stage */
2710       nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
2711       if (var && !var->data.explicit_location) {
2712          var->data.mode = nir_var_shader_temp;
2713          nir_fixup_deref_modes(producer);
2714          NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2715          optimize_nir(producer, NULL);
2716       }
2717    }
2718    if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
2719       /* never assign from tcs -> tes, always invert */
2720       nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in)
2721          assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map);
2722       nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) {
2723          if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map))
2724             /* this is an output, nothing more needs to be done for it to be dropped */
2725             do_fixup = true;
2726       }
2727    } else {
2728       nir_foreach_variable_with_modes(var, producer, nir_var_shader_out)
2729          assign_producer_var_io(producer->info.stage, var, &reserved, slot_map);
2730       nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) {
2731          if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) {
2732             do_fixup = true;
2733             /* input needs to be rewritten */
2734             nir_shader_instructions_pass(consumer, rewrite_read_as_0, nir_metadata_dominance, var);
2735          }
2736       }
2737       if (consumer->info.stage == MESA_SHADER_FRAGMENT && screen->driver_workarounds.needs_sanitised_layer)
2738          do_fixup |= clamp_layer_output(producer, consumer, &reserved);
2739    }
2740    if (!do_fixup)
2741       return;
2742    nir_fixup_deref_modes(nir);
2743    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2744    optimize_nir(nir, NULL);
2745 }
2746
2747 /* all types that hit this function contain something that is 64bit */
2748 static const struct glsl_type *
2749 rewrite_64bit_type(nir_shader *nir, const struct glsl_type *type, nir_variable *var, bool doubles_only)
2750 {
2751    if (glsl_type_is_array(type)) {
2752       const struct glsl_type *child = glsl_get_array_element(type);
2753       unsigned elements = glsl_array_size(type);
2754       unsigned stride = glsl_get_explicit_stride(type);
2755       return glsl_array_type(rewrite_64bit_type(nir, child, var, doubles_only), elements, stride);
2756    }
2757    /* rewrite structs recursively */
2758    if (glsl_type_is_struct_or_ifc(type)) {
2759       unsigned nmembers = glsl_get_length(type);
2760       struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, nmembers * 2);
2761       unsigned xfb_offset = 0;
2762       for (unsigned i = 0; i < nmembers; i++) {
2763          const struct glsl_struct_field *f = glsl_get_struct_field_data(type, i);
2764          fields[i] = *f;
2765          xfb_offset += glsl_get_component_slots(fields[i].type) * 4;
2766          if (i < nmembers - 1 && xfb_offset % 8 &&
2767              (glsl_contains_double(glsl_get_struct_field(type, i + 1)) ||
2768               (glsl_type_contains_64bit(glsl_get_struct_field(type, i + 1)) && !doubles_only))) {
2769             var->data.is_xfb = true;
2770          }
2771          fields[i].type = rewrite_64bit_type(nir, f->type, var, doubles_only);
2772       }
2773       return glsl_struct_type(fields, nmembers, glsl_get_type_name(type), glsl_struct_type_is_packed(type));
2774    }
2775    if (!glsl_type_is_64bit(type) || (!glsl_contains_double(type) && doubles_only))
2776       return type;
2777    if (doubles_only && glsl_type_is_vector_or_scalar(type))
2778       return glsl_vector_type(GLSL_TYPE_UINT64, glsl_get_vector_elements(type));
2779    enum glsl_base_type base_type;
2780    switch (glsl_get_base_type(type)) {
2781    case GLSL_TYPE_UINT64:
2782       base_type = GLSL_TYPE_UINT;
2783       break;
2784    case GLSL_TYPE_INT64:
2785       base_type = GLSL_TYPE_INT;
2786       break;
2787    case GLSL_TYPE_DOUBLE:
2788       base_type = GLSL_TYPE_FLOAT;
2789       break;
2790    default:
2791       unreachable("unknown 64-bit vertex attribute format!");
2792    }
2793    if (glsl_type_is_scalar(type))
2794       return glsl_vector_type(base_type, 2);
2795    unsigned num_components;
2796    if (glsl_type_is_matrix(type)) {
2797       /* align to vec4 size: dvec3-composed arrays are arrays of dvec3s */
2798       unsigned vec_components = glsl_get_vector_elements(type);
2799       if (vec_components == 3)
2800          vec_components = 4;
2801       num_components = vec_components * 2 * glsl_get_matrix_columns(type);
2802    } else {
2803       num_components = glsl_get_vector_elements(type) * 2;
2804       if (num_components <= 4)
2805          return glsl_vector_type(base_type, num_components);
2806    }
2807    /* dvec3/dvec4/dmatX: rewrite as struct { vec4, vec4, vec4, ... [vec2] } */
2808    struct glsl_struct_field fields[8] = {0};
2809    unsigned remaining = num_components;
2810    unsigned nfields = 0;
2811    for (unsigned i = 0; remaining; i++, remaining -= MIN2(4, remaining), nfields++) {
2812       assert(i < ARRAY_SIZE(fields));
2813       fields[i].name = "";
2814       fields[i].offset = i * 16;
2815       fields[i].type = glsl_vector_type(base_type, MIN2(4, remaining));
2816    }
2817    char buf[64];
2818    snprintf(buf, sizeof(buf), "struct(%s)", glsl_get_type_name(type));
2819    return glsl_struct_type(fields, nfields, buf, true);
2820 }
2821
2822 static const struct glsl_type *
2823 deref_is_matrix(nir_deref_instr *deref)
2824 {
2825    if (glsl_type_is_matrix(deref->type))
2826       return deref->type;
2827    nir_deref_instr *parent = nir_deref_instr_parent(deref);
2828    if (parent)
2829       return deref_is_matrix(parent);
2830    return NULL;
2831 }
2832
2833 static bool
2834 lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variable *var,
2835                           struct hash_table *derefs, struct set *deletes, bool doubles_only)
2836 {
2837    bool func_progress = false;
2838    if (!function->impl)
2839       return false;
2840    nir_builder b;
2841    nir_builder_init(&b, function->impl);
2842    nir_foreach_block(block, function->impl) {
2843       nir_foreach_instr_safe(instr, block) {
2844          switch (instr->type) {
2845          case nir_instr_type_deref: {
2846             nir_deref_instr *deref = nir_instr_as_deref(instr);
2847             if (!(deref->modes & var->data.mode))
2848                continue;
2849             if (nir_deref_instr_get_variable(deref) != var)
2850                continue;
2851
2852             /* matrix types are special: store the original deref type for later use */
2853             const struct glsl_type *matrix = deref_is_matrix(deref);
2854             nir_deref_instr *parent = nir_deref_instr_parent(deref);
2855             if (!matrix) {
2856                /* if this isn't a direct matrix deref, it's maybe a matrix row deref */
2857                hash_table_foreach(derefs, he) {
2858                   /* propagate parent matrix type to row deref */
2859                   if (he->key == parent)
2860                      matrix = he->data;
2861                }
2862             }
2863             if (matrix)
2864                _mesa_hash_table_insert(derefs, deref, (void*)matrix);
2865             if (deref->deref_type == nir_deref_type_var)
2866                deref->type = var->type;
2867             else
2868                deref->type = rewrite_64bit_type(shader, deref->type, var, doubles_only);
2869          }
2870          break;
2871          case nir_instr_type_intrinsic: {
2872             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2873             if (intr->intrinsic != nir_intrinsic_store_deref &&
2874                   intr->intrinsic != nir_intrinsic_load_deref)
2875                break;
2876             if (nir_intrinsic_get_var(intr, 0) != var)
2877                break;
2878             if ((intr->intrinsic == nir_intrinsic_store_deref && intr->src[1].ssa->bit_size != 64) ||
2879                   (intr->intrinsic == nir_intrinsic_load_deref && intr->dest.ssa.bit_size != 64))
2880                break;
2881             b.cursor = nir_before_instr(instr);
2882             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2883             unsigned num_components = intr->num_components * 2;
2884             nir_ssa_def *comp[NIR_MAX_VEC_COMPONENTS];
2885             /* this is the stored matrix type from the deref */
2886             struct hash_entry *he = _mesa_hash_table_search(derefs, deref);
2887             const struct glsl_type *matrix = he ? he->data : NULL;
2888             if (doubles_only && !matrix)
2889                break;
2890             func_progress = true;
2891             if (intr->intrinsic == nir_intrinsic_store_deref) {
2892                /* first, unpack the src data to 32bit vec2 components */
2893                for (unsigned i = 0; i < intr->num_components; i++) {
2894                   nir_ssa_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i));
2895                   comp[i * 2] = nir_channel(&b, ssa, 0);
2896                   comp[i * 2 + 1] = nir_channel(&b, ssa, 1);
2897                }
2898                unsigned wrmask = nir_intrinsic_write_mask(intr);
2899                unsigned mask = 0;
2900                /* expand writemask for doubled components */
2901                for (unsigned i = 0; i < intr->num_components; i++) {
2902                   if (wrmask & BITFIELD_BIT(i))
2903                      mask |= BITFIELD_BIT(i * 2) | BITFIELD_BIT(i * 2 + 1);
2904                }
2905                if (matrix) {
2906                   /* matrix types always come from array (row) derefs */
2907                   assert(deref->deref_type == nir_deref_type_array);
2908                   nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
2909                   /* let optimization clean up consts later */
2910                   nir_ssa_def *index = deref->arr.index.ssa;
2911                   /* this might be an indirect array index:
2912                      * - iterate over matrix columns
2913                      * - add if blocks for each column
2914                      * - perform the store in the block
2915                      */
2916                   for (unsigned idx = 0; idx < glsl_get_matrix_columns(matrix); idx++) {
2917                      nir_push_if(&b, nir_ieq_imm(&b, index, idx));
2918                      unsigned vec_components = glsl_get_vector_elements(matrix);
2919                      /* always clamp dvec3 to 4 components */
2920                      if (vec_components == 3)
2921                         vec_components = 4;
2922                      unsigned start_component = idx * vec_components * 2;
2923                      /* struct member */
2924                      unsigned member = start_component / 4;
2925                      /* number of components remaining */
2926                      unsigned remaining = num_components;
2927                      for (unsigned i = 0; i < num_components; member++) {
2928                         if (!(mask & BITFIELD_BIT(i)))
2929                            continue;
2930                         assert(member < glsl_get_length(var_deref->type));
2931                         /* deref the rewritten struct to the appropriate vec4/vec2 */
2932                         nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
2933                         unsigned incr = MIN2(remaining, 4);
2934                         /* assemble the write component vec */
2935                         nir_ssa_def *val = nir_vec(&b, &comp[i], incr);
2936                         /* use the number of components being written as the writemask */
2937                         if (glsl_get_vector_elements(strct->type) > val->num_components)
2938                            val = nir_pad_vector(&b, val, glsl_get_vector_elements(strct->type));
2939                         nir_store_deref(&b, strct, val, BITFIELD_MASK(incr));
2940                         remaining -= incr;
2941                         i += incr;
2942                      }
2943                      nir_pop_if(&b, NULL);
2944                   }
2945                   _mesa_set_add(deletes, &deref->instr);
2946                } else if (num_components <= 4) {
2947                   /* simple store case: just write out the components */
2948                   nir_ssa_def *dest = nir_vec(&b, comp, num_components);
2949                   nir_store_deref(&b, deref, dest, mask);
2950                } else {
2951                   /* writing > 4 components: access the struct and write to the appropriate vec4 members */
2952                   for (unsigned i = 0; num_components; i++, num_components -= MIN2(num_components, 4)) {
2953                      if (!(mask & BITFIELD_MASK(4)))
2954                         continue;
2955                      nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
2956                      nir_ssa_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4));
2957                      if (glsl_get_vector_elements(strct->type) > dest->num_components)
2958                         dest = nir_pad_vector(&b, dest, glsl_get_vector_elements(strct->type));
2959                      nir_store_deref(&b, strct, dest, mask & BITFIELD_MASK(4));
2960                      mask >>= 4;
2961                   }
2962                }
2963             } else {
2964                nir_ssa_def *dest = NULL;
2965                if (matrix) {
2966                   /* matrix types always come from array (row) derefs */
2967                   assert(deref->deref_type == nir_deref_type_array);
2968                   nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
2969                   /* let optimization clean up consts later */
2970                   nir_ssa_def *index = deref->arr.index.ssa;
2971                   /* this might be an indirect array index:
2972                      * - iterate over matrix columns
2973                      * - add if blocks for each column
2974                      * - phi the loads using the array index
2975                      */
2976                   unsigned cols = glsl_get_matrix_columns(matrix);
2977                   nir_ssa_def *dests[4];
2978                   for (unsigned idx = 0; idx < cols; idx++) {
2979                      /* don't add an if for the final row: this will be handled in the else */
2980                      if (idx < cols - 1)
2981                         nir_push_if(&b, nir_ieq_imm(&b, index, idx));
2982                      unsigned vec_components = glsl_get_vector_elements(matrix);
2983                      /* always clamp dvec3 to 4 components */
2984                      if (vec_components == 3)
2985                         vec_components = 4;
2986                      unsigned start_component = idx * vec_components * 2;
2987                      /* struct member */
2988                      unsigned member = start_component / 4;
2989                      /* number of components remaining */
2990                      unsigned remaining = num_components;
2991                      /* component index */
2992                      unsigned comp_idx = 0;
2993                      for (unsigned i = 0; i < num_components; member++) {
2994                         assert(member < glsl_get_length(var_deref->type));
2995                         nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
2996                         nir_ssa_def *load = nir_load_deref(&b, strct);
2997                         unsigned incr = MIN2(remaining, 4);
2998                         /* repack the loads to 64bit */
2999                         for (unsigned c = 0; c < incr / 2; c++, comp_idx++)
3000                            comp[comp_idx] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(c * 2, 2)));
3001                         remaining -= incr;
3002                         i += incr;
3003                      }
3004                      dest = dests[idx] = nir_vec(&b, comp, intr->num_components);
3005                      if (idx < cols - 1)
3006                         nir_push_else(&b, NULL);
3007                   }
3008                   /* loop over all the if blocks that were made, pop them, and phi the loaded+packed results */
3009                   for (unsigned idx = cols - 1; idx >= 1; idx--) {
3010                      nir_pop_if(&b, NULL);
3011                      dest = nir_if_phi(&b, dests[idx - 1], dest);
3012                   }
3013                   _mesa_set_add(deletes, &deref->instr);
3014                } else if (num_components <= 4) {
3015                   /* simple load case */
3016                   nir_ssa_def *load = nir_load_deref(&b, deref);
3017                   /* pack 32bit loads into 64bit: this will automagically get optimized out later */
3018                   for (unsigned i = 0; i < intr->num_components; i++) {
3019                      comp[i] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(i * 2, 2)));
3020                   }
3021                   dest = nir_vec(&b, comp, intr->num_components);
3022                } else {
3023                   /* writing > 4 components: access the struct and load the appropriate vec4 members */
3024                   for (unsigned i = 0; i < 2; i++, num_components -= 4) {
3025                      nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
3026                      nir_ssa_def *load = nir_load_deref(&b, strct);
3027                      comp[i * 2] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_MASK(2)));
3028                      if (num_components > 2)
3029                         comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2)));
3030                   }
3031                   dest = nir_vec(&b, comp, intr->num_components);
3032                }
3033                nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, dest, instr);
3034             }
3035             _mesa_set_add(deletes, instr);
3036             break;
3037          }
3038          break;
3039          default: break;
3040          }
3041       }
3042    }
3043    if (func_progress)
3044       nir_metadata_preserve(function->impl, nir_metadata_none);
3045    /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */
3046    set_foreach_remove(deletes, he)
3047       nir_instr_remove((void*)he->key);
3048    return func_progress;
3049 }
3050
3051 static bool
3052 lower_64bit_vars_loop(nir_shader *shader, nir_variable *var, struct hash_table *derefs,
3053                       struct set *deletes, bool doubles_only)
3054 {
3055    if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3056       return false;
3057    var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3058    /* once type is rewritten, rewrite all loads and stores */
3059    nir_foreach_function(function, shader)
3060       lower_64bit_vars_function(shader, function, var, derefs, deletes, doubles_only);
3061    return true;
3062 }
3063
3064 /* rewrite all input/output variables using 32bit types and load/stores */
3065 static bool
3066 lower_64bit_vars(nir_shader *shader, bool doubles_only)
3067 {
3068    bool progress = false;
3069    struct hash_table *derefs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3070    struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3071    nir_foreach_variable_with_modes(var, shader, nir_var_shader_in | nir_var_shader_out)
3072       progress |= lower_64bit_vars_loop(shader, var, derefs, deletes, doubles_only);
3073    nir_foreach_function(function, shader) {
3074       nir_foreach_function_temp_variable(var, function->impl) {
3075          if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3076             continue;
3077          var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3078          progress |= lower_64bit_vars_function(shader, function, var, derefs, deletes, doubles_only);
3079       }
3080    }
3081    ralloc_free(deletes);
3082    ralloc_free(derefs);
3083    if (progress) {
3084       nir_lower_alu_to_scalar(shader, filter_64_bit_instr, NULL);
3085       nir_lower_phis_to_scalar(shader, false);
3086       optimize_nir(shader, NULL);
3087    }
3088    return progress;
3089 }
3090
3091 static bool
3092 split_blocks(nir_shader *nir)
3093 {
3094    bool progress = false;
3095    bool changed = true;
3096    do {
3097       progress = false;
3098       nir_foreach_shader_out_variable(var, nir) {
3099          const struct glsl_type *base_type = glsl_without_array(var->type);
3100          nir_variable *members[32]; //can't have more than this without breaking NIR
3101          if (!glsl_type_is_struct(base_type))
3102             continue;
3103          /* TODO: arrays? */
3104          if (!glsl_type_is_struct(var->type) || glsl_get_length(var->type) == 1)
3105             continue;
3106          if (glsl_count_attribute_slots(var->type, false) == 1)
3107             continue;
3108          unsigned offset = 0;
3109          for (unsigned i = 0; i < glsl_get_length(var->type); i++) {
3110             members[i] = nir_variable_clone(var, nir);
3111             members[i]->type = glsl_get_struct_field(var->type, i);
3112             members[i]->name = (void*)glsl_get_struct_elem_name(var->type, i);
3113             members[i]->data.location += offset;
3114             offset += glsl_count_attribute_slots(members[i]->type, false);
3115             nir_shader_add_variable(nir, members[i]);
3116          }
3117          nir_foreach_function(function, nir) {
3118             bool func_progress = false;
3119             if (!function->impl)
3120                continue;
3121             nir_builder b;
3122             nir_builder_init(&b, function->impl);
3123             nir_foreach_block(block, function->impl) {
3124                nir_foreach_instr_safe(instr, block) {
3125                   switch (instr->type) {
3126                   case nir_instr_type_deref: {
3127                   nir_deref_instr *deref = nir_instr_as_deref(instr);
3128                   if (!(deref->modes & nir_var_shader_out))
3129                      continue;
3130                   if (nir_deref_instr_get_variable(deref) != var)
3131                      continue;
3132                   if (deref->deref_type != nir_deref_type_struct)
3133                      continue;
3134                   nir_deref_instr *parent = nir_deref_instr_parent(deref);
3135                   if (parent->deref_type != nir_deref_type_var)
3136                      continue;
3137                   deref->modes = nir_var_shader_temp;
3138                   parent->modes = nir_var_shader_temp;
3139                   b.cursor = nir_before_instr(instr);
3140                   nir_ssa_def *dest = &nir_build_deref_var(&b, members[deref->strct.index])->dest.ssa;
3141                   nir_ssa_def_rewrite_uses_after(&deref->dest.ssa, dest, &deref->instr);
3142                   nir_instr_remove(&deref->instr);
3143                   func_progress = true;
3144                   break;
3145                   }
3146                   default: break;
3147                   }
3148                }
3149             }
3150             if (func_progress)
3151                nir_metadata_preserve(function->impl, nir_metadata_none);
3152          }
3153          var->data.mode = nir_var_shader_temp;
3154          changed = true;
3155          progress = true;
3156       }
3157    } while (progress);
3158    return changed;
3159 }
3160
3161 static void
3162 zink_shader_dump(void *words, size_t size, const char *file)
3163 {
3164    FILE *fp = fopen(file, "wb");
3165    if (fp) {
3166       fwrite(words, 1, size, fp);
3167       fclose(fp);
3168       fprintf(stderr, "wrote '%s'...\n", file);
3169    }
3170 }
3171
3172 static struct zink_shader_object
3173 zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool separate)
3174 {
3175    VkShaderModuleCreateInfo smci = {0};
3176    VkShaderCreateInfoEXT sci = {0};
3177
3178    if (!spirv)
3179       spirv = zs->spirv;
3180
3181    if (zink_debug & ZINK_DEBUG_SPIRV) {
3182       char buf[256];
3183       static int i;
3184       snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
3185       zink_shader_dump(spirv->words, spirv->num_words * sizeof(uint32_t), buf);
3186    }
3187
3188    sci.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT;
3189    sci.stage = mesa_to_vk_shader_stage(zs->info.stage);
3190    if (sci.stage != VK_SHADER_STAGE_FRAGMENT_BIT)
3191       sci.nextStage = VK_SHADER_STAGE_FRAGMENT_BIT;
3192    sci.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT;
3193    sci.codeSize = spirv->num_words * sizeof(uint32_t);
3194    sci.pCode = spirv->words;
3195    sci.pName = "main";
3196    sci.setLayoutCount = 2;
3197    VkDescriptorSetLayout dsl[2] = {0};
3198    dsl[zs->info.stage == MESA_SHADER_FRAGMENT] = zs->precompile.dsl;
3199    sci.pSetLayouts = dsl;
3200    VkPushConstantRange pcr;
3201    pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
3202    pcr.offset = 0;
3203    pcr.size = sizeof(struct zink_gfx_push_constant);
3204    sci.pushConstantRangeCount = 1;
3205    sci.pPushConstantRanges = &pcr;
3206
3207    smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
3208    smci.codeSize = spirv->num_words * sizeof(uint32_t);
3209    smci.pCode = spirv->words;
3210
3211 #ifndef NDEBUG
3212    if (zink_debug & ZINK_DEBUG_VALIDATION) {
3213       static const struct spirv_to_nir_options spirv_options = {
3214          .environment = NIR_SPIRV_VULKAN,
3215          .caps = {
3216             .float64 = true,
3217             .int16 = true,
3218             .int64 = true,
3219             .tessellation = true,
3220             .float_controls = true,
3221             .image_ms_array = true,
3222             .image_read_without_format = true,
3223             .image_write_without_format = true,
3224             .storage_image_ms = true,
3225             .geometry_streams = true,
3226             .storage_8bit = true,
3227             .storage_16bit = true,
3228             .variable_pointers = true,
3229             .stencil_export = true,
3230             .post_depth_coverage = true,
3231             .transform_feedback = true,
3232             .device_group = true,
3233             .draw_parameters = true,
3234             .shader_viewport_index_layer = true,
3235             .multiview = true,
3236             .physical_storage_buffer_address = true,
3237             .int64_atomics = true,
3238             .subgroup_arithmetic = true,
3239             .subgroup_basic = true,
3240             .subgroup_ballot = true,
3241             .subgroup_quad = true,
3242             .subgroup_shuffle = true,
3243             .subgroup_vote = true,
3244             .vk_memory_model = true,
3245             .vk_memory_model_device_scope = true,
3246             .int8 = true,
3247             .float16 = true,
3248             .demote_to_helper_invocation = true,
3249             .sparse_residency = true,
3250             .min_lod = true,
3251          },
3252          .ubo_addr_format = nir_address_format_32bit_index_offset,
3253          .ssbo_addr_format = nir_address_format_32bit_index_offset,
3254          .phys_ssbo_addr_format = nir_address_format_64bit_global,
3255          .push_const_addr_format = nir_address_format_logical,
3256          .shared_addr_format = nir_address_format_32bit_offset,
3257       };
3258       uint32_t num_spec_entries = 0;
3259       struct nir_spirv_specialization *spec_entries = NULL;
3260       VkSpecializationInfo sinfo = {0};
3261       VkSpecializationMapEntry me[3];
3262       uint32_t size[3] = {1,1,1};
3263       if (!zs->info.workgroup_size[0]) {
3264          sinfo.mapEntryCount = 3;
3265          sinfo.pMapEntries = &me[0];
3266          sinfo.dataSize = sizeof(uint32_t) * 3;
3267          sinfo.pData = size;
3268          uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z};
3269          for (int i = 0; i < 3; i++) {
3270             me[i].size = sizeof(uint32_t);
3271             me[i].constantID = ids[i];
3272             me[i].offset = i * sizeof(uint32_t);
3273          }
3274          spec_entries = vk_spec_info_to_nir_spirv(&sinfo, &num_spec_entries);
3275       }
3276       nir_shader *nir = spirv_to_nir(spirv->words, spirv->num_words,
3277                          spec_entries, num_spec_entries,
3278                          clamp_stage(&zs->info), "main", &spirv_options, &screen->nir_options);
3279       assert(nir);
3280       ralloc_free(nir);
3281       free(spec_entries);
3282    }
3283 #endif
3284
3285    VkResult ret;
3286    struct zink_shader_object obj;
3287    if (!separate || !screen->info.have_EXT_shader_object)
3288       ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &obj.mod);
3289    else
3290       ret = VKSCR(CreateShadersEXT)(screen->dev, 1, &sci, NULL, &obj.obj);
3291    bool success = zink_screen_handle_vkresult(screen, ret);
3292    assert(success);
3293    return obj;
3294 }
3295
3296 static void
3297 prune_io(nir_shader *nir)
3298 {
3299    nir_foreach_shader_in_variable_safe(var, nir) {
3300       if (!find_var_deref(nir, var))
3301          var->data.mode = nir_var_shader_temp;
3302    }
3303    nir_foreach_shader_out_variable_safe(var, nir) {
3304       if (!find_var_deref(nir, var))
3305          var->data.mode = nir_var_shader_temp;
3306    }
3307    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3308 }
3309
3310 static void
3311 flag_shadow_tex(nir_variable *var, struct zink_shader *zs)
3312 {
3313    /* unconvert from zink_binding() */
3314    uint32_t sampler_id = var->data.binding - (PIPE_MAX_SAMPLERS * MESA_SHADER_FRAGMENT);
3315    assert(sampler_id < 32); //bitfield size for tracking
3316    zs->fs.legacy_shadow_mask |= BITFIELD_BIT(sampler_id);
3317 }
3318
3319 static nir_ssa_def *
3320 rewrite_tex_dest(nir_builder *b, nir_tex_instr *tex, nir_variable *var, void *data)
3321 {
3322    assert(var);
3323    const struct glsl_type *type = glsl_without_array(var->type);
3324    enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3325    bool is_int = glsl_base_type_is_integer(ret_type);
3326    unsigned bit_size = glsl_base_type_get_bit_size(ret_type);
3327    unsigned dest_size = nir_dest_bit_size(tex->dest);
3328    b->cursor = nir_after_instr(&tex->instr);
3329    unsigned num_components = nir_dest_num_components(tex->dest);
3330    bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse;
3331    if (bit_size == dest_size && !rewrite_depth)
3332       return NULL;
3333    nir_ssa_def *dest = &tex->dest.ssa;
3334    if (rewrite_depth && data) {
3335       if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
3336          flag_shadow_tex(var, data);
3337       else
3338          mesa_loge("unhandled old-style shadow sampler in non-fragment stage!");
3339       return NULL;
3340    }
3341    if (bit_size != dest_size) {
3342       tex->dest.ssa.bit_size = bit_size;
3343       tex->dest_type = nir_get_nir_type_for_glsl_base_type(ret_type);
3344
3345       if (is_int) {
3346          if (glsl_unsigned_base_type_of(ret_type) == ret_type)
3347             dest = nir_u2uN(b, &tex->dest.ssa, dest_size);
3348          else
3349             dest = nir_i2iN(b, &tex->dest.ssa, dest_size);
3350       } else {
3351          dest = nir_f2fN(b, &tex->dest.ssa, dest_size);
3352       }
3353       if (rewrite_depth)
3354          return dest;
3355       nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dest, dest->parent_instr);
3356    } else if (rewrite_depth) {
3357       return dest;
3358    }
3359    return dest;
3360 }
3361
3362 struct lower_zs_swizzle_state {
3363    bool shadow_only;
3364    unsigned base_sampler_id;
3365    const struct zink_zs_swizzle_key *swizzle;
3366 };
3367
3368 static bool
3369 lower_zs_swizzle_tex_instr(nir_builder *b, nir_instr *instr, void *data)
3370 {
3371    struct lower_zs_swizzle_state *state = data;
3372    const struct zink_zs_swizzle_key *swizzle_key = state->swizzle;
3373    assert(state->shadow_only || swizzle_key);
3374    if (instr->type != nir_instr_type_tex)
3375       return false;
3376    nir_tex_instr *tex = nir_instr_as_tex(instr);
3377    if (tex->op == nir_texop_txs || tex->op == nir_texop_lod ||
3378        (!tex->is_shadow && state->shadow_only) || tex->is_new_style_shadow)
3379       return false;
3380    if (tex->is_shadow && tex->op == nir_texop_tg4)
3381       /* Will not even try to emulate the shadow comparison */
3382       return false;
3383    int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
3384    nir_variable *var = NULL;
3385    if (handle != -1)
3386       /* gtfo bindless depth texture mode */
3387       return false;
3388    nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
3389       if (glsl_type_is_sampler(glsl_without_array(img->type))) {
3390          unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
3391          if (tex->texture_index >= img->data.driver_location &&
3392                tex->texture_index < img->data.driver_location + size) {
3393             var = img;
3394             break;
3395          }
3396       }
3397    }
3398    assert(var);
3399    uint32_t sampler_id = var->data.binding - state->base_sampler_id;
3400    const struct glsl_type *type = glsl_without_array(var->type);
3401    enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3402    bool is_int = glsl_base_type_is_integer(ret_type);
3403    unsigned num_components = nir_dest_num_components(tex->dest);
3404    if (tex->is_shadow)
3405       tex->is_new_style_shadow = true;
3406    nir_ssa_def *dest = rewrite_tex_dest(b, tex, var, NULL);
3407    assert(dest || !state->shadow_only);
3408    if (!dest && !(swizzle_key->mask & BITFIELD_BIT(sampler_id)))
3409       return false;
3410    else if (!dest)
3411       dest = &tex->dest.ssa;
3412    else
3413       tex->dest.ssa.num_components = 1;
3414    if (swizzle_key && (swizzle_key->mask & BITFIELD_BIT(sampler_id))) {
3415       /* these require manual swizzles */
3416       if (tex->op == nir_texop_tg4) {
3417          assert(!tex->is_shadow);
3418          nir_ssa_def *swizzle;
3419          switch (swizzle_key->swizzle[sampler_id].s[tex->component]) {
3420          case PIPE_SWIZZLE_0:
3421             swizzle = nir_imm_zero(b, 4, nir_dest_bit_size(tex->dest));
3422             break;
3423          case PIPE_SWIZZLE_1:
3424             if (is_int)
3425                swizzle = nir_imm_intN_t(b, 4, nir_dest_bit_size(tex->dest));
3426             else
3427                swizzle = nir_imm_floatN_t(b, 4, nir_dest_bit_size(tex->dest));
3428             break;
3429          default:
3430             if (!tex->component)
3431                return false;
3432             tex->component = 0;
3433             return true;
3434          }
3435          nir_ssa_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3436          return true;
3437       }
3438       nir_ssa_def *vec[4];
3439       for (unsigned i = 0; i < ARRAY_SIZE(vec); i++) {
3440          switch (swizzle_key->swizzle[sampler_id].s[i]) {
3441          case PIPE_SWIZZLE_0:
3442             vec[i] = nir_imm_zero(b, 1, nir_dest_bit_size(tex->dest));
3443             break;
3444          case PIPE_SWIZZLE_1:
3445             if (is_int)
3446                vec[i] = nir_imm_intN_t(b, 1, nir_dest_bit_size(tex->dest));
3447             else
3448                vec[i] = nir_imm_floatN_t(b, 1, nir_dest_bit_size(tex->dest));
3449             break;
3450          default:
3451             vec[i] = dest->num_components == 1 ? dest : nir_channel(b, dest, i);
3452             break;
3453          }
3454       }
3455       nir_ssa_def *swizzle = nir_vec(b, vec, num_components);
3456       nir_ssa_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3457    } else {
3458       assert(tex->is_shadow);
3459       nir_ssa_def *vec[4] = {dest, dest, dest, dest};
3460       nir_ssa_def *splat = nir_vec(b, vec, num_components);
3461       nir_ssa_def_rewrite_uses_after(dest, splat, splat->parent_instr);
3462    }
3463    return true;
3464 }
3465
3466 static bool
3467 lower_zs_swizzle_tex(nir_shader *nir, const void *swizzle, bool shadow_only)
3468 {
3469    unsigned base_sampler_id = gl_shader_stage_is_compute(nir->info.stage) ? 0 : PIPE_MAX_SAMPLERS * nir->info.stage;
3470    struct lower_zs_swizzle_state state = {shadow_only, base_sampler_id, swizzle};
3471    return nir_shader_instructions_pass(nir, lower_zs_swizzle_tex_instr, nir_metadata_dominance | nir_metadata_block_index, (void*)&state);
3472 }
3473
3474 static bool
3475 invert_point_coord_instr(nir_builder *b, nir_instr *instr, void *data)
3476 {
3477    if (instr->type != nir_instr_type_intrinsic)
3478       return false;
3479    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3480    if (intr->intrinsic != nir_intrinsic_load_deref)
3481       return false;
3482    nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
3483    if (deref_var->data.location != VARYING_SLOT_PNTC)
3484       return false;
3485    b->cursor = nir_after_instr(instr);
3486    nir_ssa_def *def = nir_vec2(b, nir_channel(b, &intr->dest.ssa, 0),
3487                                   nir_fsub(b, nir_imm_float(b, 1.0), nir_channel(b, &intr->dest.ssa, 1)));
3488    nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
3489    return true;
3490 }
3491
3492 static bool
3493 invert_point_coord(nir_shader *nir)
3494 {
3495    if (!(nir->info.inputs_read & BITFIELD64_BIT(VARYING_SLOT_PNTC)))
3496       return false;
3497    return nir_shader_instructions_pass(nir, invert_point_coord_instr, nir_metadata_dominance, NULL);
3498 }
3499
3500 static struct zink_shader_object
3501 compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool separate)
3502 {
3503    struct zink_shader_info *sinfo = &zs->sinfo;
3504    prune_io(nir);
3505
3506    NIR_PASS_V(nir, nir_convert_from_ssa, true);
3507
3508    struct zink_shader_object obj;
3509    struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen->spirv_version);
3510    if (spirv)
3511       obj = zink_shader_spirv_compile(screen, zs, spirv, separate);
3512
3513    /* TODO: determine if there's any reason to cache spirv output? */
3514    if (zs->info.stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
3515       zs->spirv = spirv;
3516    else
3517       ralloc_free(spirv);
3518    return obj;
3519 }
3520
3521 VkShaderModule
3522 zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs,
3523                     nir_shader *nir, const struct zink_shader_key *key, const void *extra_data)
3524 {
3525    struct zink_shader_info *sinfo = &zs->sinfo;
3526    bool need_optimize = false;
3527    bool inlined_uniforms = false;
3528
3529    if (key) {
3530       if (key->inline_uniforms) {
3531          NIR_PASS_V(nir, nir_inline_uniforms,
3532                     nir->info.num_inlinable_uniforms,
3533                     key->base.inlined_uniform_values,
3534                     nir->info.inlinable_uniform_dw_offsets);
3535
3536          inlined_uniforms = true;
3537       }
3538
3539       /* TODO: use a separate mem ctx here for ralloc */
3540
3541       if (!screen->optimal_keys) {
3542          switch (zs->info.stage) {
3543          case MESA_SHADER_VERTEX: {
3544             uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
3545             const struct zink_vs_key *vs_key = zink_vs_key(key);
3546             switch (vs_key->size) {
3547             case 4:
3548                decomposed_attrs = vs_key->u32.decomposed_attrs;
3549                decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w;
3550                break;
3551             case 2:
3552                decomposed_attrs = vs_key->u16.decomposed_attrs;
3553                decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w;
3554                break;
3555             case 1:
3556                decomposed_attrs = vs_key->u8.decomposed_attrs;
3557                decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w;
3558                break;
3559             default: break;
3560             }
3561             if (decomposed_attrs || decomposed_attrs_without_w)
3562                NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
3563             break;
3564          }
3565
3566          case MESA_SHADER_GEOMETRY:
3567             if (zink_gs_key(key)->lower_line_stipple) {
3568                NIR_PASS_V(nir, lower_line_stipple_gs, zink_gs_key(key)->line_rectangular);
3569                NIR_PASS_V(nir, nir_lower_var_copies);
3570                need_optimize = true;
3571             }
3572
3573             if (zink_gs_key(key)->lower_line_smooth) {
3574                NIR_PASS_V(nir, lower_line_smooth_gs);
3575                NIR_PASS_V(nir, nir_lower_var_copies);
3576                need_optimize = true;
3577             }
3578
3579             if (zink_gs_key(key)->lower_gl_point) {
3580                NIR_PASS_V(nir, lower_gl_point_gs);
3581                need_optimize = true;
3582             }
3583
3584             if (zink_gs_key(key)->lower_pv_mode) {
3585                NIR_PASS_V(nir, lower_pv_mode_gs, zink_gs_key(key)->lower_pv_mode);
3586                need_optimize = true; //TODO verify that this is required
3587             }
3588             break;
3589
3590          default:
3591             break;
3592          }
3593       }
3594
3595       switch (zs->info.stage) {
3596       case MESA_SHADER_VERTEX:
3597       case MESA_SHADER_TESS_EVAL:
3598       case MESA_SHADER_GEOMETRY:
3599          if (zink_vs_key_base(key)->last_vertex_stage) {
3600             if (zs->sinfo.have_xfb)
3601                sinfo->last_vertex = true;
3602
3603             if (!zink_vs_key_base(key)->clip_halfz && !screen->info.have_EXT_depth_clip_control) {
3604                NIR_PASS_V(nir, nir_lower_clip_halfz);
3605             }
3606             if (zink_vs_key_base(key)->push_drawid) {
3607                NIR_PASS_V(nir, lower_drawid);
3608             }
3609          }
3610          if (zink_vs_key_base(key)->robust_access)
3611             NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3612          break;
3613       case MESA_SHADER_FRAGMENT:
3614          if (zink_fs_key(key)->lower_line_smooth) {
3615             NIR_PASS_V(nir, lower_line_smooth_fs,
3616                        zink_fs_key(key)->lower_line_stipple);
3617             need_optimize = true;
3618          } else if (zink_fs_key(key)->lower_line_stipple)
3619                NIR_PASS_V(nir, lower_line_stipple_fs);
3620
3621          if (zink_fs_key(key)->lower_point_smooth) {
3622             NIR_PASS_V(nir, nir_lower_point_smooth);
3623             NIR_PASS_V(nir, nir_lower_discard_if, nir_lower_discard_if_to_cf);
3624             nir->info.fs.uses_discard = true;
3625             need_optimize = true;
3626          }
3627
3628          if (zink_fs_key(key)->robust_access)
3629             NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3630
3631          if (!zink_fs_key_base(key)->samples &&
3632             nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
3633             /* VK will always use gl_SampleMask[] values even if sample count is 0,
3634             * so we need to skip this write here to mimic GL's behavior of ignoring it
3635             */
3636             nir_foreach_shader_out_variable(var, nir) {
3637                if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
3638                   var->data.mode = nir_var_shader_temp;
3639             }
3640             nir_fixup_deref_modes(nir);
3641             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3642             need_optimize = true;
3643          }
3644          if (zink_fs_key_base(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
3645             NIR_PASS_V(nir, lower_dual_blend);
3646          }
3647          if (zink_fs_key_base(key)->single_sample) {
3648             NIR_PASS_V(nir, nir_lower_single_sampled);
3649          }
3650          if (zink_fs_key_base(key)->coord_replace_bits)
3651             NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key_base(key)->coord_replace_bits, false, false);
3652          if (zink_fs_key_base(key)->point_coord_yinvert)
3653             NIR_PASS_V(nir, invert_point_coord);
3654          if (zink_fs_key_base(key)->force_persample_interp || zink_fs_key_base(key)->fbfetch_ms) {
3655             nir_foreach_shader_in_variable(var, nir)
3656                var->data.sample = true;
3657             nir->info.fs.uses_sample_qualifier = true;
3658             nir->info.fs.uses_sample_shading = true;
3659          }
3660          if (zs->fs.legacy_shadow_mask && !key->base.needs_zs_shader_swizzle)
3661             NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, zink_fs_key_base(key)->shadow_needs_shader_swizzle ? extra_data : NULL, true);
3662          if (nir->info.fs.uses_fbfetch_output) {
3663             nir_variable *fbfetch = NULL;
3664             NIR_PASS_V(nir, lower_fbfetch, &fbfetch, zink_fs_key_base(key)->fbfetch_ms);
3665             /* old variable must be deleted to avoid spirv errors */
3666             fbfetch->data.mode = nir_var_shader_temp;
3667             nir_fixup_deref_modes(nir);
3668             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3669             need_optimize = true;
3670          }
3671          nir_foreach_shader_in_variable_safe(var, nir) {
3672             if (!is_texcoord(MESA_SHADER_FRAGMENT, var) || var->data.driver_location != -1)
3673                continue;
3674             nir_shader_instructions_pass(nir, rewrite_read_as_0, nir_metadata_dominance, var);
3675             var->data.mode = nir_var_shader_temp;
3676             nir_fixup_deref_modes(nir);
3677             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3678             need_optimize = true;
3679          }
3680          break;
3681       case MESA_SHADER_COMPUTE:
3682          if (zink_cs_key(key)->robust_access)
3683             NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3684          break;
3685       default: break;
3686       }
3687       if (key->base.needs_zs_shader_swizzle) {
3688          assert(extra_data);
3689          NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, extra_data, false);
3690       }
3691       if (key->base.nonseamless_cube_mask) {
3692          NIR_PASS_V(nir, zink_lower_cubemap_to_array, key->base.nonseamless_cube_mask);
3693          need_optimize = true;
3694       }
3695    }
3696    if (screen->driconf.inline_uniforms) {
3697       NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
3698       NIR_PASS_V(nir, rewrite_bo_access, screen);
3699       NIR_PASS_V(nir, remove_bo_access, zs);
3700       need_optimize = true;
3701    }
3702    if (inlined_uniforms) {
3703       optimize_nir(nir, zs);
3704
3705       /* This must be done again. */
3706       NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
3707                                                        nir_var_shader_out);
3708
3709       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
3710       if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT)
3711          zs->can_inline = false;
3712    } else if (need_optimize)
3713       optimize_nir(nir, zs);
3714    
3715    struct zink_shader_object obj = compile_module(screen, zs, nir, false);
3716    ralloc_free(nir);
3717    return obj.mod;
3718 }
3719
3720 struct zink_shader_object
3721 zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
3722 {
3723    nir_shader *nir = zink_shader_deserialize(screen, zs);
3724    int set = nir->info.stage == MESA_SHADER_FRAGMENT;
3725    unsigned offsets[4];
3726    zink_descriptor_shader_get_binding_offsets(zs, offsets);
3727    nir_foreach_variable_with_modes(var, nir, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_uniform | nir_var_image) {
3728       if (var->data.bindless)
3729          continue;
3730       var->data.descriptor_set = set;
3731       switch (var->data.mode) {
3732       case nir_var_mem_ubo:
3733             var->data.binding = !!var->data.driver_location;
3734             break;
3735       case nir_var_uniform:
3736          if (glsl_type_is_sampler(glsl_without_array(var->type)))
3737             var->data.binding += offsets[1];
3738          break;
3739       case nir_var_mem_ssbo:
3740          var->data.binding += offsets[2];
3741          break;
3742       case nir_var_image:
3743          var->data.binding += offsets[3];
3744          break;
3745       default: break;
3746       }
3747    }
3748    if (screen->driconf.inline_uniforms) {
3749       NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
3750       NIR_PASS_V(nir, rewrite_bo_access, screen);
3751       NIR_PASS_V(nir, remove_bo_access, zs);
3752    }
3753    optimize_nir(nir, zs);
3754    zink_descriptor_shader_init(screen, zs);
3755    struct zink_shader_object obj = compile_module(screen, zs, nir, true);
3756    ralloc_free(nir);
3757    return obj;
3758 }
3759
3760 static bool
3761 lower_baseinstance_instr(nir_builder *b, nir_instr *instr, void *data)
3762 {
3763    if (instr->type != nir_instr_type_intrinsic)
3764       return false;
3765    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3766    if (intr->intrinsic != nir_intrinsic_load_instance_id)
3767       return false;
3768    b->cursor = nir_after_instr(instr);
3769    nir_ssa_def *def = nir_isub(b, &intr->dest.ssa, nir_load_base_instance(b));
3770    nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
3771    return true;
3772 }
3773
3774 static bool
3775 lower_baseinstance(nir_shader *shader)
3776 {
3777    if (shader->info.stage != MESA_SHADER_VERTEX)
3778       return false;
3779    return nir_shader_instructions_pass(shader, lower_baseinstance_instr, nir_metadata_dominance, NULL);
3780 }
3781
3782 /* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
3783  * so instead we delete all those broken variables and just make new ones
3784  */
3785 static bool
3786 unbreak_bos(nir_shader *shader, struct zink_shader *zs, bool needs_size)
3787 {
3788    uint64_t max_ssbo_size = 0;
3789    uint64_t max_ubo_size = 0;
3790    uint64_t max_uniform_size = 0;
3791
3792    if (!shader->info.num_ssbos && !shader->info.num_ubos)
3793       return false;
3794
3795    nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
3796       const struct glsl_type *type = glsl_without_array(var->type);
3797       if (type_is_counter(type))
3798          continue;
3799       /* be conservative: use the bigger of the interface and variable types to ensure in-bounds access */
3800       unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
3801       const struct glsl_type *interface_type = var->interface_type ? glsl_without_array(var->interface_type) : NULL;
3802       if (interface_type) {
3803          unsigned block_size = glsl_get_explicit_size(interface_type, true);
3804          if (glsl_get_length(interface_type) == 1) {
3805             /* handle bare unsized ssbo arrays: glsl_get_explicit_size always returns type-aligned sizes */
3806             const struct glsl_type *f = glsl_get_struct_field(interface_type, 0);
3807             if (glsl_type_is_array(f) && !glsl_array_size(f))
3808                block_size = 0;
3809          }
3810          if (block_size) {
3811             block_size = DIV_ROUND_UP(block_size, sizeof(float) * 4);
3812             size = MAX2(size, block_size);
3813          }
3814       }
3815       if (var->data.mode == nir_var_mem_ubo) {
3816          if (var->data.driver_location)
3817             max_ubo_size = MAX2(max_ubo_size, size);
3818          else
3819             max_uniform_size = MAX2(max_uniform_size, size);
3820       } else {
3821          max_ssbo_size = MAX2(max_ssbo_size, size);
3822          if (interface_type) {
3823             if (glsl_type_is_unsized_array(glsl_get_struct_field(interface_type, glsl_get_length(interface_type) - 1)))
3824                needs_size = true;
3825          }
3826       }
3827       var->data.mode = nir_var_shader_temp;
3828    }
3829    nir_fixup_deref_modes(shader);
3830    NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3831    optimize_nir(shader, NULL);
3832
3833    struct glsl_struct_field field = {0};
3834    field.name = ralloc_strdup(shader, "base");
3835    if (shader->info.num_ubos) {
3836       if (shader->num_uniforms && zs->ubos_used & BITFIELD_BIT(0)) {
3837          field.type = glsl_array_type(glsl_uint_type(), max_uniform_size * 4, 4);
3838          nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
3839                                                  glsl_array_type(glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430, false, "struct"), 1, 0),
3840                                                  "uniform_0@32");
3841          var->interface_type = var->type;
3842          var->data.mode = nir_var_mem_ubo;
3843          var->data.driver_location = 0;
3844       }
3845
3846       unsigned num_ubos = shader->info.num_ubos - !!shader->info.first_ubo_is_default_ubo;
3847       uint32_t ubos_used = zs->ubos_used & ~BITFIELD_BIT(0);
3848       if (num_ubos && ubos_used) {
3849          field.type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
3850          /* shrink array as much as possible */
3851          unsigned first_ubo = ffs(ubos_used) - 2;
3852          assert(first_ubo < PIPE_MAX_CONSTANT_BUFFERS);
3853          num_ubos -= first_ubo;
3854          assert(num_ubos);
3855          nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
3856                                    glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ubos, 0),
3857                                    "ubos@32");
3858          var->interface_type = var->type;
3859          var->data.mode = nir_var_mem_ubo;
3860          var->data.driver_location = first_ubo + !!shader->info.first_ubo_is_default_ubo;
3861       }
3862    }
3863    if (shader->info.num_ssbos && zs->ssbos_used) {
3864       /* shrink array as much as possible */
3865       unsigned first_ssbo = ffs(zs->ssbos_used) - 1;
3866       assert(first_ssbo < PIPE_MAX_SHADER_BUFFERS);
3867       unsigned num_ssbos = shader->info.num_ssbos - first_ssbo;
3868       assert(num_ssbos);
3869       const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), needs_size ? 0 : max_ssbo_size * 4, 4);
3870       field.type = ssbo_type;
3871       nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
3872                                               glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ssbos, 0),
3873                                               "ssbos@32");
3874       var->interface_type = var->type;
3875       var->data.mode = nir_var_mem_ssbo;
3876       var->data.driver_location = first_ssbo;
3877    }
3878    return true;
3879 }
3880
3881 static uint32_t
3882 get_src_mask_ssbo(unsigned total, nir_src src)
3883 {
3884    if (nir_src_is_const(src))
3885       return BITFIELD_BIT(nir_src_as_uint(src));
3886    return BITFIELD_MASK(total);
3887 }
3888
3889 static uint32_t
3890 get_src_mask_ubo(unsigned total, nir_src src)
3891 {
3892    if (nir_src_is_const(src))
3893       return BITFIELD_BIT(nir_src_as_uint(src));
3894    return BITFIELD_MASK(total) & ~BITFIELD_BIT(0);
3895 }
3896
3897 static bool
3898 analyze_io(struct zink_shader *zs, nir_shader *shader)
3899 {
3900    bool ret = false;
3901    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
3902    nir_foreach_block(block, impl) {
3903       nir_foreach_instr(instr, block) {
3904          if (shader->info.stage != MESA_SHADER_KERNEL && instr->type == nir_instr_type_tex) {
3905             /* gl_nir_lower_samplers_as_deref is where this would normally be set, but zink doesn't use it */
3906             nir_tex_instr *tex = nir_instr_as_tex(instr);
3907             nir_foreach_variable_with_modes(img, shader, nir_var_uniform) {
3908                if (glsl_type_is_sampler(glsl_without_array(img->type))) {
3909                   unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
3910                   if (tex->texture_index >= img->data.driver_location &&
3911                      tex->texture_index < img->data.driver_location + size) {
3912                      BITSET_SET_RANGE(shader->info.textures_used, img->data.driver_location, img->data.driver_location + (size - 1));
3913                      break;
3914                   }
3915                }
3916             }
3917             continue;
3918          }
3919          if (instr->type != nir_instr_type_intrinsic)
3920             continue;
3921  
3922          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3923          switch (intrin->intrinsic) {
3924          case nir_intrinsic_store_ssbo:
3925             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[1]);
3926             break;
3927  
3928          case nir_intrinsic_get_ssbo_size: {
3929             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
3930             ret = true;
3931             break;
3932          }
3933          case nir_intrinsic_ssbo_atomic_fadd:
3934          case nir_intrinsic_ssbo_atomic_add:
3935          case nir_intrinsic_ssbo_atomic_imin:
3936          case nir_intrinsic_ssbo_atomic_umin:
3937          case nir_intrinsic_ssbo_atomic_imax:
3938          case nir_intrinsic_ssbo_atomic_umax:
3939          case nir_intrinsic_ssbo_atomic_and:
3940          case nir_intrinsic_ssbo_atomic_or:
3941          case nir_intrinsic_ssbo_atomic_xor:
3942          case nir_intrinsic_ssbo_atomic_exchange:
3943          case nir_intrinsic_ssbo_atomic_comp_swap:
3944          case nir_intrinsic_ssbo_atomic_fmin:
3945          case nir_intrinsic_ssbo_atomic_fmax:
3946          case nir_intrinsic_ssbo_atomic_fcomp_swap:
3947          case nir_intrinsic_load_ssbo:
3948             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
3949             break;
3950          case nir_intrinsic_load_ubo:
3951          case nir_intrinsic_load_ubo_vec4:
3952             zs->ubos_used |= get_src_mask_ubo(shader->info.num_ubos, intrin->src[0]);
3953             break;
3954          default:
3955             break;
3956          }
3957       }
3958    }
3959    return ret;
3960 }
3961
3962 struct zink_bindless_info {
3963    nir_variable *bindless[4];
3964    unsigned bindless_set;
3965 };
3966
3967 /* this is a "default" bindless texture used if the shader has no texture variables */
3968 static nir_variable *
3969 create_bindless_texture(nir_shader *nir, nir_tex_instr *tex, unsigned descriptor_set)
3970 {
3971    unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0;
3972    nir_variable *var;
3973
3974    const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT);
3975    var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture");
3976    var->data.descriptor_set = descriptor_set;
3977    var->data.driver_location = var->data.binding = binding;
3978    return var;
3979 }
3980
3981 /* this is a "default" bindless image used if the shader has no image variables */
3982 static nir_variable *
3983 create_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim, unsigned descriptor_set)
3984 {
3985    unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2;
3986    nir_variable *var;
3987
3988    const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
3989    var = nir_variable_create(nir, nir_var_image, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image");
3990    var->data.descriptor_set = descriptor_set;
3991    var->data.driver_location = var->data.binding = binding;
3992    var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
3993    return var;
3994 }
3995
3996 /* rewrite bindless instructions as array deref instructions */
3997 static bool
3998 lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
3999 {
4000    struct zink_bindless_info *bindless = data;
4001
4002    if (in->type == nir_instr_type_tex) {
4003       nir_tex_instr *tex = nir_instr_as_tex(in);
4004       int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4005       if (idx == -1)
4006          return false;
4007
4008       nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[1] : bindless->bindless[0];
4009       if (!var)
4010          var = create_bindless_texture(b->shader, tex, bindless->bindless_set);
4011       b->cursor = nir_before_instr(in);
4012       nir_deref_instr *deref = nir_build_deref_var(b, var);
4013       if (glsl_type_is_array(var->type))
4014          deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32));
4015       nir_instr_rewrite_src_ssa(in, &tex->src[idx].src, &deref->dest.ssa);
4016
4017       /* bindless sampling uses the variable type directly, which means the tex instr has to exactly
4018        * match up with it in contrast to normal sampler ops where things are a bit more flexible;
4019        * this results in cases where a shader is passed with sampler2DArray but the tex instr only has
4020        * 2 components, which explodes spirv compilation even though it doesn't trigger validation errors
4021        *
4022        * to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing
4023        * - Warhammer 40k: Dawn of War III
4024        */
4025       unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type));
4026       unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord);
4027       unsigned coord_components = nir_src_num_components(tex->src[c].src);
4028       if (coord_components < needed_components) {
4029          nir_ssa_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
4030          nir_instr_rewrite_src_ssa(in, &tex->src[c].src, def);
4031          tex->coord_components = needed_components;
4032       }
4033       return true;
4034    }
4035    if (in->type != nir_instr_type_intrinsic)
4036       return false;
4037    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4038
4039    nir_intrinsic_op op;
4040 #define OP_SWAP(OP) \
4041    case nir_intrinsic_bindless_image_##OP: \
4042       op = nir_intrinsic_image_deref_##OP; \
4043       break;
4044
4045
4046    /* convert bindless intrinsics to deref intrinsics */
4047    switch (instr->intrinsic) {
4048    OP_SWAP(atomic_add)
4049    OP_SWAP(atomic_and)
4050    OP_SWAP(atomic_comp_swap)
4051    OP_SWAP(atomic_dec_wrap)
4052    OP_SWAP(atomic_exchange)
4053    OP_SWAP(atomic_fadd)
4054    OP_SWAP(atomic_fmax)
4055    OP_SWAP(atomic_fmin)
4056    OP_SWAP(atomic_imax)
4057    OP_SWAP(atomic_imin)
4058    OP_SWAP(atomic_inc_wrap)
4059    OP_SWAP(atomic_or)
4060    OP_SWAP(atomic_umax)
4061    OP_SWAP(atomic_umin)
4062    OP_SWAP(atomic_xor)
4063    OP_SWAP(format)
4064    OP_SWAP(load)
4065    OP_SWAP(order)
4066    OP_SWAP(samples)
4067    OP_SWAP(size)
4068    OP_SWAP(store)
4069    default:
4070       return false;
4071    }
4072
4073    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
4074    nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[3] : bindless->bindless[2];
4075    if (!var)
4076       var = create_bindless_image(b->shader, dim, bindless->bindless_set);
4077    instr->intrinsic = op;
4078    b->cursor = nir_before_instr(in);
4079    nir_deref_instr *deref = nir_build_deref_var(b, var);
4080    if (glsl_type_is_array(var->type))
4081       deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32));
4082    nir_instr_rewrite_src_ssa(in, &instr->src[0], &deref->dest.ssa);
4083    return true;
4084 }
4085
4086 static bool
4087 lower_bindless(nir_shader *shader, struct zink_bindless_info *bindless)
4088 {
4089    if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless))
4090       return false;
4091    nir_fixup_deref_modes(shader);
4092    NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4093    optimize_nir(shader, NULL);
4094    return true;
4095 }
4096
4097 /* convert shader image/texture io variables to int64 handles for bindless indexing */
4098 static bool
4099 lower_bindless_io_instr(nir_builder *b, nir_instr *in, void *data)
4100 {
4101    if (in->type != nir_instr_type_intrinsic)
4102       return false;
4103    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4104    if (instr->intrinsic != nir_intrinsic_load_deref &&
4105        instr->intrinsic != nir_intrinsic_store_deref)
4106       return false;
4107
4108    nir_deref_instr *src_deref = nir_src_as_deref(instr->src[0]);
4109    nir_variable *var = nir_deref_instr_get_variable(src_deref);
4110    if (var->data.bindless)
4111       return false;
4112    if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out)
4113       return false;
4114    if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type))
4115       return false;
4116
4117    var->type = glsl_int64_t_type();
4118    var->data.bindless = 1;
4119    b->cursor = nir_before_instr(in);
4120    nir_deref_instr *deref = nir_build_deref_var(b, var);
4121    if (instr->intrinsic == nir_intrinsic_load_deref) {
4122        nir_ssa_def *def = nir_load_deref(b, deref);
4123        nir_instr_rewrite_src_ssa(in, &instr->src[0], def);
4124        nir_ssa_def_rewrite_uses(&instr->dest.ssa, def);
4125    } else {
4126       nir_store_deref(b, deref, instr->src[1].ssa, nir_intrinsic_write_mask(instr));
4127    }
4128    nir_instr_remove(in);
4129    nir_instr_remove(&src_deref->instr);
4130    return true;
4131 }
4132
4133 static bool
4134 lower_bindless_io(nir_shader *shader)
4135 {
4136    return nir_shader_instructions_pass(shader, lower_bindless_io_instr, nir_metadata_dominance, NULL);
4137 }
4138
4139 static uint32_t
4140 zink_binding(gl_shader_stage stage, VkDescriptorType type, int index, bool compact_descriptors)
4141 {
4142    if (stage == MESA_SHADER_NONE) {
4143       unreachable("not supported");
4144    } else {
4145       unsigned base = stage;
4146       /* clamp compute bindings for better driver efficiency */
4147       if (gl_shader_stage_is_compute(stage))
4148          base = 0;
4149       switch (type) {
4150       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4151       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
4152          return base * 2 + !!index;
4153
4154       case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
4155          assert(stage == MESA_SHADER_KERNEL);
4156          FALLTHROUGH;
4157       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4158          if (stage == MESA_SHADER_KERNEL) {
4159             assert(index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
4160             return index + PIPE_MAX_SAMPLERS;
4161          }
4162          FALLTHROUGH;
4163       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4164          assert(index < PIPE_MAX_SAMPLERS);
4165          assert(stage != MESA_SHADER_KERNEL);
4166          return (base * PIPE_MAX_SAMPLERS) + index;
4167
4168       case VK_DESCRIPTOR_TYPE_SAMPLER:
4169          assert(index < PIPE_MAX_SAMPLERS);
4170          assert(stage == MESA_SHADER_KERNEL);
4171          return index;
4172
4173       case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4174          return base + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * 2));
4175
4176       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4177       case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4178          assert(index < ZINK_MAX_SHADER_IMAGES);
4179          if (stage == MESA_SHADER_KERNEL)
4180             return index + (compact_descriptors ? (PIPE_MAX_SAMPLERS + PIPE_MAX_SHADER_SAMPLER_VIEWS) : 0);
4181          return (base * ZINK_MAX_SHADER_IMAGES) + index + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * PIPE_MAX_SAMPLERS));
4182
4183       default:
4184          unreachable("unexpected type");
4185       }
4186    }
4187 }
4188
4189 static void
4190 handle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, struct zink_bindless_info *bindless)
4191 {
4192    if (glsl_type_is_struct(type)) {
4193       for (unsigned i = 0; i < glsl_get_length(type); i++)
4194          handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless);
4195       return;
4196    }
4197
4198    /* just a random scalar in a struct */
4199    if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type))
4200       return;
4201
4202    VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
4203    unsigned binding;
4204    switch (vktype) {
4205       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4206          binding = 0;
4207          break;
4208       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4209          binding = 1;
4210          break;
4211       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4212          binding = 2;
4213          break;
4214       case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4215          binding = 3;
4216          break;
4217       default:
4218          unreachable("unknown");
4219    }
4220    if (!bindless->bindless[binding]) {
4221       bindless->bindless[binding] = nir_variable_clone(var, nir);
4222       bindless->bindless[binding]->data.bindless = 0;
4223       bindless->bindless[binding]->data.descriptor_set = bindless->bindless_set;
4224       bindless->bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0);
4225       bindless->bindless[binding]->data.driver_location = bindless->bindless[binding]->data.binding = binding;
4226       if (!bindless->bindless[binding]->data.image.format)
4227          bindless->bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
4228       nir_shader_add_variable(nir, bindless->bindless[binding]);
4229    } else {
4230       assert(glsl_get_sampler_dim(glsl_without_array(bindless->bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type)));
4231    }
4232    var->data.mode = nir_var_shader_temp;
4233 }
4234
4235 static bool
4236 convert_1d_shadow_tex(nir_builder *b, nir_instr *instr, void *data)
4237 {
4238    struct zink_screen *screen = data;
4239    if (instr->type != nir_instr_type_tex)
4240       return false;
4241    nir_tex_instr *tex = nir_instr_as_tex(instr);
4242    if (tex->sampler_dim != GLSL_SAMPLER_DIM_1D || !tex->is_shadow)
4243       return false;
4244    if (tex->is_sparse && screen->need_2D_sparse) {
4245       /* no known case of this exists: only nvidia can hit it, and nothing uses it */
4246       mesa_loge("unhandled/unsupported 1D sparse texture!");
4247       abort();
4248    }
4249    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
4250    b->cursor = nir_before_instr(instr);
4251    tex->coord_components++;
4252    unsigned srcs[] = {
4253       nir_tex_src_coord,
4254       nir_tex_src_offset,
4255       nir_tex_src_ddx,
4256       nir_tex_src_ddy,
4257    };
4258    for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++) {
4259       unsigned c = nir_tex_instr_src_index(tex, srcs[i]);
4260       if (c == -1)
4261          continue;
4262       if (tex->src[c].src.ssa->num_components == tex->coord_components)
4263          continue;
4264       nir_ssa_def *def;
4265       nir_ssa_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size);
4266       if (tex->src[c].src.ssa->num_components == 1)
4267          def = nir_vec2(b, tex->src[c].src.ssa, zero);
4268       else
4269          def = nir_vec3(b, nir_channel(b, tex->src[c].src.ssa, 0), zero, nir_channel(b, tex->src[c].src.ssa, 1));
4270       nir_instr_rewrite_src_ssa(instr, &tex->src[c].src, def);
4271    }
4272    b->cursor = nir_after_instr(instr);
4273    unsigned needed_components = nir_tex_instr_dest_size(tex);
4274    unsigned num_components = tex->dest.ssa.num_components;
4275    if (needed_components > num_components) {
4276       tex->dest.ssa.num_components = needed_components;
4277       assert(num_components < 3);
4278       /* take either xz or just x since this is promoted to 2D from 1D */
4279       uint32_t mask = num_components == 2 ? (1|4) : 1;
4280       nir_ssa_def *dst = nir_channels(b, &tex->dest.ssa, mask);
4281       nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dst, dst->parent_instr);
4282    }
4283    return true;
4284 }
4285
4286 static bool
4287 lower_1d_shadow(nir_shader *shader, struct zink_screen *screen)
4288 {
4289    bool found = false;
4290    nir_foreach_variable_with_modes(var, shader, nir_var_uniform | nir_var_image) {
4291       const struct glsl_type *type = glsl_without_array(var->type);
4292       unsigned length = glsl_get_length(var->type);
4293       if (!glsl_type_is_sampler(type) || !glsl_sampler_type_is_shadow(type) || glsl_get_sampler_dim(type) != GLSL_SAMPLER_DIM_1D)
4294          continue;
4295       const struct glsl_type *sampler = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, true, glsl_sampler_type_is_array(type), glsl_get_sampler_result_type(type));
4296       var->type = type != var->type ? glsl_array_type(sampler, length, glsl_get_explicit_stride(var->type)) : sampler;
4297
4298       found = true;
4299    }
4300    if (found)
4301       nir_shader_instructions_pass(shader, convert_1d_shadow_tex, nir_metadata_dominance, screen);
4302    return found;
4303 }
4304
4305 static void
4306 scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
4307 {
4308    nir_foreach_function(function, shader) {
4309       if (!function->impl)
4310          continue;
4311       nir_foreach_block_safe(block, function->impl) {
4312          nir_foreach_instr_safe(instr, block) {
4313             if (instr->type == nir_instr_type_tex) {
4314                nir_tex_instr *tex = nir_instr_as_tex(instr);
4315                zs->sinfo.have_sparse |= tex->is_sparse;
4316             }
4317             if (instr->type != nir_instr_type_intrinsic)
4318                continue;
4319             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4320             if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4321                 intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4322                 intr->intrinsic == nir_intrinsic_image_deref_store ||
4323                 intr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
4324                 intr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
4325                 intr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
4326                 intr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
4327                 intr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
4328                 intr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
4329                 intr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
4330                 intr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
4331                 intr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
4332                 intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
4333                 intr->intrinsic == nir_intrinsic_image_deref_atomic_fadd ||
4334                 intr->intrinsic == nir_intrinsic_image_deref_size ||
4335                 intr->intrinsic == nir_intrinsic_image_deref_samples ||
4336                 intr->intrinsic == nir_intrinsic_image_deref_format ||
4337                 intr->intrinsic == nir_intrinsic_image_deref_order) {
4338
4339                 nir_variable *var =
4340                    nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
4341
4342                 /* Structs have been lowered already, so get_aoa_size is sufficient. */
4343                 const unsigned size =
4344                    glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
4345                 BITSET_SET_RANGE(shader->info.images_used, var->data.binding,
4346                                  var->data.binding + (MAX2(size, 1) - 1));
4347             }
4348             if (intr->intrinsic == nir_intrinsic_is_sparse_texels_resident ||
4349                 intr->intrinsic == nir_intrinsic_image_deref_sparse_load)
4350                zs->sinfo.have_sparse = true;
4351
4352             static bool warned = false;
4353             if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) {
4354                switch (intr->intrinsic) {
4355                case nir_intrinsic_image_deref_atomic_add: {
4356                   nir_variable *var = nir_intrinsic_get_var(intr, 0);
4357                   if (util_format_is_float(var->data.image.format))
4358                      fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n");
4359                   break;
4360                }
4361                default:
4362                   break;
4363                }
4364             }
4365          }
4366       }
4367    }
4368 }
4369
4370 static bool
4371 is_residency_code(nir_ssa_def *src)
4372 {
4373    nir_instr *parent = src->parent_instr;
4374    while (1) {
4375       if (parent->type == nir_instr_type_intrinsic) {
4376          ASSERTED nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
4377          assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
4378          return false;
4379       }
4380       if (parent->type == nir_instr_type_tex)
4381          return true;
4382       assert(parent->type == nir_instr_type_alu);
4383       nir_alu_instr *alu = nir_instr_as_alu(parent);
4384       parent = alu->src[0].src.ssa->parent_instr;
4385    }
4386 }
4387
4388 static bool
4389 lower_sparse_instr(nir_builder *b, nir_instr *in, void *data)
4390 {
4391    if (in->type != nir_instr_type_intrinsic)
4392       return false;
4393    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4394    if (instr->intrinsic == nir_intrinsic_sparse_residency_code_and) {
4395       b->cursor = nir_before_instr(&instr->instr);
4396       nir_ssa_def *src0;
4397       if (is_residency_code(instr->src[0].ssa))
4398          src0 = nir_is_sparse_texels_resident(b, 1, instr->src[0].ssa);
4399       else
4400          src0 = instr->src[0].ssa;
4401       nir_ssa_def *src1;
4402       if (is_residency_code(instr->src[1].ssa))
4403          src1 = nir_is_sparse_texels_resident(b, 1, instr->src[1].ssa);
4404       else
4405          src1 = instr->src[1].ssa;
4406       nir_ssa_def *def = nir_iand(b, src0, src1);
4407       nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, def, in);
4408       nir_instr_remove(in);
4409       return true;
4410    }
4411    if (instr->intrinsic != nir_intrinsic_is_sparse_texels_resident)
4412       return false;
4413
4414    /* vulkan vec can only be a vec4, but this is (maybe) vec5,
4415     * so just rewrite as the first component since ntv is going to use a different
4416     * method for storing the residency value anyway
4417     */
4418    b->cursor = nir_before_instr(&instr->instr);
4419    nir_instr *parent = instr->src[0].ssa->parent_instr;
4420    if (is_residency_code(instr->src[0].ssa)) {
4421       assert(parent->type == nir_instr_type_alu);
4422       nir_alu_instr *alu = nir_instr_as_alu(parent);
4423       nir_ssa_def_rewrite_uses_after(instr->src[0].ssa, nir_channel(b, alu->src[0].src.ssa, 0), parent);
4424       nir_instr_remove(parent);
4425    } else {
4426       nir_ssa_def *src;
4427       if (parent->type == nir_instr_type_intrinsic) {
4428          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
4429          assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
4430          src = intr->src[0].ssa;
4431       } else {
4432          assert(parent->type == nir_instr_type_alu);
4433          nir_alu_instr *alu = nir_instr_as_alu(parent);
4434          src = alu->src[0].src.ssa;
4435       }
4436       if (instr->dest.ssa.bit_size != 32) {
4437          if (instr->dest.ssa.bit_size == 1)
4438             src = nir_ieq_imm(b, src, 1);
4439          else
4440             src = nir_u2uN(b, src, instr->dest.ssa.bit_size);
4441       }
4442       nir_ssa_def_rewrite_uses(&instr->dest.ssa, src);
4443       nir_instr_remove(in);
4444    }
4445    return true;
4446 }
4447
4448 static bool
4449 lower_sparse(nir_shader *shader)
4450 {
4451    return nir_shader_instructions_pass(shader, lower_sparse_instr, nir_metadata_dominance, NULL);
4452 }
4453
4454 static bool
4455 match_tex_dests_instr(nir_builder *b, nir_instr *in, void *data)
4456 {
4457    if (in->type != nir_instr_type_tex)
4458       return false;
4459    nir_tex_instr *tex = nir_instr_as_tex(in);
4460    if (tex->op == nir_texop_txs || tex->op == nir_texop_lod)
4461       return false;
4462    int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4463    nir_variable *var = NULL;
4464    if (handle != -1) {
4465       var = nir_deref_instr_get_variable(nir_src_as_deref(tex->src[handle].src));
4466    } else {
4467       nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
4468          if (glsl_type_is_sampler(glsl_without_array(img->type))) {
4469             unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
4470             if (tex->texture_index >= img->data.driver_location &&
4471                 tex->texture_index < img->data.driver_location + size) {
4472                var = img;
4473                break;
4474             }
4475          }
4476       }
4477    }
4478    return !!rewrite_tex_dest(b, tex, var, data);
4479 }
4480
4481 static bool
4482 match_tex_dests(nir_shader *shader, struct zink_shader *zs)
4483 {
4484    return nir_shader_instructions_pass(shader, match_tex_dests_instr, nir_metadata_dominance, zs);
4485 }
4486
4487 static bool
4488 split_bitfields_instr(nir_builder *b, nir_instr *in, void *data)
4489 {
4490    if (in->type != nir_instr_type_alu)
4491       return false;
4492    nir_alu_instr *alu = nir_instr_as_alu(in);
4493    switch (alu->op) {
4494    case nir_op_ubitfield_extract:
4495    case nir_op_ibitfield_extract:
4496    case nir_op_bitfield_insert:
4497       break;
4498    default:
4499       return false;
4500    }
4501    unsigned num_components = nir_dest_num_components(alu->dest.dest);
4502    if (num_components == 1)
4503       return false;
4504    b->cursor = nir_before_instr(in);
4505    nir_ssa_def *dests[NIR_MAX_VEC_COMPONENTS];
4506    for (unsigned i = 0; i < num_components; i++) {
4507       if (alu->op == nir_op_bitfield_insert)
4508          dests[i] = nir_bitfield_insert(b,
4509                                         nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4510                                         nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4511                                         nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]),
4512                                         nir_channel(b, alu->src[3].src.ssa, alu->src[3].swizzle[i]));
4513       else if (alu->op == nir_op_ubitfield_extract)
4514          dests[i] = nir_ubitfield_extract(b,
4515                                           nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4516                                           nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4517                                           nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4518       else
4519          dests[i] = nir_ibitfield_extract(b,
4520                                           nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4521                                           nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4522                                           nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4523    }
4524    nir_ssa_def *dest = nir_vec(b, dests, num_components);
4525    nir_ssa_def_rewrite_uses_after(&alu->dest.dest.ssa, dest, in);
4526    nir_instr_remove(in);
4527    return true;
4528 }
4529
4530
4531 static bool
4532 split_bitfields(nir_shader *shader)
4533 {
4534    return nir_shader_instructions_pass(shader, split_bitfields_instr, nir_metadata_dominance, NULL);
4535 }
4536
4537 static void
4538 rewrite_cl_derefs(nir_shader *nir, nir_variable *var)
4539 {
4540    nir_foreach_function(function, nir) {
4541       nir_foreach_block(block, function->impl) {
4542          nir_foreach_instr_safe(instr, block) {
4543             if (instr->type != nir_instr_type_deref)
4544                continue;
4545             nir_deref_instr *deref = nir_instr_as_deref(instr);
4546             nir_variable *img = nir_deref_instr_get_variable(deref);
4547             if (img != var)
4548                continue;
4549             if (glsl_type_is_array(var->type)) {
4550                if (deref->deref_type == nir_deref_type_array)
4551                   deref->type = glsl_without_array(var->type);
4552                else
4553                   deref->type = var->type;
4554             } else {
4555                deref->type = var->type;
4556             }
4557          }
4558       }
4559    }
4560 }
4561
4562 static void
4563 type_image(nir_shader *nir, nir_variable *var)
4564 {
4565    nir_foreach_function(function, nir) {
4566       nir_foreach_block(block, function->impl) {
4567          nir_foreach_instr_safe(instr, block) {
4568             if (instr->type != nir_instr_type_intrinsic)
4569                continue;
4570             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4571             if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4572                intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4573                intr->intrinsic == nir_intrinsic_image_deref_store ||
4574                intr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
4575                intr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
4576                intr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
4577                intr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
4578                intr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
4579                intr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
4580                intr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
4581                intr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
4582                intr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
4583                intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
4584                intr->intrinsic == nir_intrinsic_image_deref_atomic_fadd ||
4585                intr->intrinsic == nir_intrinsic_image_deref_samples ||
4586                intr->intrinsic == nir_intrinsic_image_deref_format ||
4587                intr->intrinsic == nir_intrinsic_image_deref_order) {
4588                nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4589                nir_variable *img = nir_deref_instr_get_variable(deref);
4590                if (img != var)
4591                   continue;
4592                nir_alu_type alu_type = nir_intrinsic_src_type(intr);
4593                const struct glsl_type *type = glsl_without_array(var->type);
4594                if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
4595                   assert(glsl_get_sampler_result_type(type) == nir_get_glsl_base_type_for_nir_type(alu_type));
4596                   continue;
4597                }
4598                const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
4599                if (glsl_type_is_array(var->type))
4600                   img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
4601                var->type = img_type;
4602                rewrite_cl_derefs(nir, var);
4603                return;
4604             }
4605          }
4606       }
4607    }
4608    nir_foreach_function(function, nir) {
4609       nir_foreach_block(block, function->impl) {
4610          nir_foreach_instr_safe(instr, block) {
4611             if (instr->type != nir_instr_type_intrinsic)
4612                continue;
4613             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4614             if (intr->intrinsic != nir_intrinsic_image_deref_size)
4615                continue;
4616             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4617             nir_variable *img = nir_deref_instr_get_variable(deref);
4618             if (img != var)
4619                continue;
4620             nir_alu_type alu_type = nir_type_uint32;
4621             const struct glsl_type *type = glsl_without_array(var->type);
4622             if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
4623                continue;
4624             }
4625             const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
4626             if (glsl_type_is_array(var->type))
4627                img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
4628             var->type = img_type;
4629             rewrite_cl_derefs(nir, var);
4630             return;
4631          }
4632       }
4633    }
4634    var->data.mode = nir_var_shader_temp;
4635 }
4636
4637 static nir_variable *
4638 find_sampler_var(nir_shader *nir, unsigned texture_index)
4639 {
4640    nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
4641       unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4642       if ((glsl_type_is_texture(glsl_without_array(var->type)) || glsl_type_is_sampler(glsl_without_array(var->type))) &&
4643           (var->data.binding == texture_index || (var->data.binding < texture_index && var->data.binding + size > texture_index)))
4644          return var;
4645    }
4646    return NULL;
4647 }
4648
4649 static bool
4650 type_sampler_vars(nir_shader *nir, unsigned *sampler_mask)
4651 {
4652    bool progress = false;
4653    nir_foreach_function(function, nir) {
4654       nir_foreach_block(block, function->impl) {
4655          nir_foreach_instr(instr, block) {
4656             if (instr->type != nir_instr_type_tex)
4657                continue;
4658             nir_tex_instr *tex = nir_instr_as_tex(instr);
4659             switch (tex->op) {
4660             case nir_texop_lod:
4661             case nir_texop_txs:
4662             case nir_texop_query_levels:
4663             case nir_texop_texture_samples:
4664             case nir_texop_samples_identical:
4665                continue;
4666             default:
4667                break;
4668             }
4669             *sampler_mask |= BITFIELD_BIT(tex->sampler_index);
4670             nir_variable *var = find_sampler_var(nir, tex->texture_index);
4671             assert(var);
4672             if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID)
4673                continue;
4674             const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
4675             unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4676             if (size > 1)
4677                img_type = glsl_array_type(img_type, size, 0);
4678             var->type = img_type;
4679             progress = true;
4680          }
4681       }
4682    }
4683    nir_foreach_function(function, nir) {
4684       nir_foreach_block(block, function->impl) {
4685          nir_foreach_instr(instr, block) {
4686             if (instr->type != nir_instr_type_tex)
4687                continue;
4688             nir_tex_instr *tex = nir_instr_as_tex(instr);
4689             switch (tex->op) {
4690             case nir_texop_lod:
4691             case nir_texop_txs:
4692             case nir_texop_query_levels:
4693             case nir_texop_texture_samples:
4694             case nir_texop_samples_identical:
4695                break;
4696             default:
4697                continue;
4698             }
4699             *sampler_mask |= BITFIELD_BIT(tex->sampler_index);
4700             nir_variable *var = find_sampler_var(nir, tex->texture_index);
4701             assert(var);
4702             if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID)
4703                continue;
4704             const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
4705             unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4706             if (size > 1)
4707                img_type = glsl_array_type(img_type, size, 0);
4708             var->type = img_type;
4709             progress = true;
4710          }
4711       }
4712    }
4713    return progress;
4714 }
4715
4716 static bool
4717 delete_samplers(nir_shader *nir)
4718 {
4719    bool progress = false;
4720    nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
4721       if (glsl_type_is_sampler(glsl_without_array(var->type))) {
4722          var->data.mode = nir_var_shader_temp;
4723          progress = true;
4724       }
4725    }
4726    return progress;
4727 }
4728
4729 static bool
4730 type_images(nir_shader *nir, unsigned *sampler_mask)
4731 {
4732    bool progress = false;
4733    progress |= delete_samplers(nir);
4734    progress |= type_sampler_vars(nir, sampler_mask);
4735    nir_foreach_variable_with_modes(var, nir, nir_var_image) {
4736       type_image(nir, var);
4737       progress = true;
4738    }
4739    return progress;
4740 }
4741
4742 /* attempt to assign io for separate shaders */
4743 static bool
4744 fixup_io_locations(nir_shader *nir)
4745 {
4746    nir_variable_mode mode = nir->info.stage == MESA_SHADER_FRAGMENT ? nir_var_shader_in : nir_var_shader_out;
4747    /* i/o interface blocks are required to be EXACT matches between stages:
4748     * iterate over all locations and set locations incrementally
4749     */
4750    unsigned slot = 0;
4751    for (unsigned i = 0; i < VARYING_SLOT_MAX; i++) {
4752       if (nir_slot_is_sysval_output(i, MESA_SHADER_NONE))
4753          continue;
4754       nir_variable *var = nir_find_variable_with_location(nir, mode, i);
4755       if (!var) {
4756          /* locations used between stages are not required to be contiguous */
4757          if (i >= VARYING_SLOT_VAR0)
4758             slot++;
4759          continue;
4760       }
4761       unsigned size;
4762       /* ensure variable is given enough slots */
4763       if (nir_is_arrayed_io(var, nir->info.stage))
4764          size = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
4765       else
4766          size = glsl_count_vec4_slots(var->type, false, false);
4767       var->data.driver_location = slot;
4768       slot += size;
4769       /* ensure the consumed slots aren't double iterated */
4770       i += size - 1;
4771    }
4772    return true;
4773 }
4774
4775 static uint32_t
4776 zink_flat_flags(struct nir_shader *shader)
4777 {
4778    uint32_t flat_flags = 0, c = 0;
4779    nir_foreach_shader_in_variable(var, shader) {
4780       if (var->data.interpolation == INTERP_MODE_FLAT)
4781          flat_flags |= 1u << (c++);
4782    }
4783
4784    return flat_flags;
4785 }
4786
4787 struct zink_shader *
4788 zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
4789                    const struct pipe_stream_output_info *so_info)
4790 {
4791    struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
4792    bool have_psiz = false;
4793
4794    ret->has_edgeflags = nir->info.stage == MESA_SHADER_VERTEX &&
4795                         nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_EDGE);
4796
4797    ret->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model;
4798    ret->sinfo.bindless_set_idx = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
4799
4800    util_queue_fence_init(&ret->precompile.fence);
4801    util_dynarray_init(&ret->pipeline_libs, ret);
4802    ret->hash = _mesa_hash_pointer(ret);
4803
4804    ret->programs = _mesa_pointer_set_create(NULL);
4805    simple_mtx_init(&ret->lock, mtx_plain);
4806
4807    nir_variable_mode indirect_derefs_modes = 0;
4808    if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
4809        nir->info.stage == MESA_SHADER_TESS_EVAL)
4810       indirect_derefs_modes |= nir_var_shader_in | nir_var_shader_out;
4811
4812    NIR_PASS_V(nir, nir_lower_indirect_derefs, indirect_derefs_modes,
4813               UINT32_MAX);
4814
4815    if (nir->info.stage < MESA_SHADER_COMPUTE)
4816       create_gfx_pushconst(nir);
4817
4818    if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
4819             nir->info.stage == MESA_SHADER_TESS_EVAL)
4820       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
4821
4822    if (nir->info.stage < MESA_SHADER_FRAGMENT)
4823       have_psiz = check_psiz(nir);
4824    if (nir->info.stage == MESA_SHADER_FRAGMENT)
4825       ret->flat_flags = zink_flat_flags(nir);
4826
4827    if (!gl_shader_stage_is_compute(nir->info.stage) && nir->info.separate_shader)
4828       NIR_PASS_V(nir, fixup_io_locations);
4829
4830    NIR_PASS_V(nir, lower_basevertex);
4831    NIR_PASS_V(nir, nir_lower_regs_to_ssa);
4832    NIR_PASS_V(nir, lower_baseinstance);
4833    NIR_PASS_V(nir, lower_sparse);
4834    NIR_PASS_V(nir, split_bitfields);
4835    NIR_PASS_V(nir, nir_lower_frexp); /* TODO: Use the spirv instructions for this. */
4836
4837    if (screen->info.have_EXT_shader_demote_to_helper_invocation) {
4838       NIR_PASS_V(nir, nir_lower_discard_or_demote,
4839                  screen->driconf.glsl_correct_derivatives_after_discard ||
4840                  nir->info.use_legacy_math_rules);
4841    }
4842
4843    if (screen->need_2D_zs)
4844       NIR_PASS_V(nir, lower_1d_shadow, screen);
4845
4846    {
4847       nir_lower_subgroups_options subgroup_options = {0};
4848       subgroup_options.lower_to_scalar = true;
4849       subgroup_options.subgroup_size = screen->info.props11.subgroupSize;
4850       subgroup_options.ballot_bit_size = 32;
4851       subgroup_options.ballot_components = 4;
4852       subgroup_options.lower_subgroup_masks = true;
4853       if (!(screen->info.subgroup.supportedStages & mesa_to_vk_shader_stage(clamp_stage(&nir->info)))) {
4854          subgroup_options.subgroup_size = 1;
4855          subgroup_options.lower_vote_trivial = true;
4856       }
4857       NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
4858    }
4859
4860    if (so_info && so_info->num_outputs)
4861       NIR_PASS_V(nir, split_blocks);
4862
4863    optimize_nir(nir, NULL);
4864    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
4865    NIR_PASS_V(nir, nir_lower_discard_if, (nir_lower_discard_if_to_cf |
4866                                           nir_lower_demote_if_to_cf |
4867                                           nir_lower_terminate_if_to_cf));
4868    NIR_PASS_V(nir, nir_lower_fragcolor,
4869          nir->info.fs.color_is_dual_source ? 1 : 8);
4870    NIR_PASS_V(nir, lower_64bit_vertex_attribs);
4871    bool needs_size = analyze_io(ret, nir);
4872    NIR_PASS_V(nir, unbreak_bos, ret, needs_size);
4873    /* run in compile if there could be inlined uniforms */
4874    if (!screen->driconf.inline_uniforms && !nir->info.num_inlinable_uniforms) {
4875       NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
4876       NIR_PASS_V(nir, rewrite_bo_access, screen);
4877       NIR_PASS_V(nir, remove_bo_access, ret);
4878    }
4879
4880    if (zink_debug & ZINK_DEBUG_NIR) {
4881       fprintf(stderr, "NIR shader:\n---8<---\n");
4882       nir_print_shader(nir, stderr);
4883       fprintf(stderr, "---8<---\n");
4884    }
4885
4886    struct zink_bindless_info bindless = {0};
4887    bindless.bindless_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
4888    bool has_bindless_io = false;
4889    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
4890       var->data.is_xfb = false;
4891       if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
4892          has_bindless_io = true;
4893          break;
4894       }
4895    }
4896    if (has_bindless_io)
4897       NIR_PASS_V(nir, lower_bindless_io);
4898
4899    optimize_nir(nir, NULL);
4900    prune_io(nir);
4901
4902    scan_nir(screen, nir, ret);
4903    unsigned sampler_mask = 0;
4904    if (nir->info.stage == MESA_SHADER_KERNEL) {
4905       NIR_PASS_V(nir, type_images, &sampler_mask);
4906       enum zink_descriptor_type ztype = ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW;
4907       VkDescriptorType vktype = VK_DESCRIPTOR_TYPE_SAMPLER;
4908       u_foreach_bit(s, sampler_mask) {
4909          ret->bindings[ztype][ret->num_bindings[ztype]].index = s;
4910          ret->bindings[ztype][ret->num_bindings[ztype]].binding = zink_binding(MESA_SHADER_KERNEL, vktype, s, screen->compact_descriptors);
4911          ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
4912          ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
4913          ret->num_bindings[ztype]++;
4914       }
4915       ret->sinfo.sampler_mask = sampler_mask;
4916    }
4917
4918    unsigned ubo_binding_mask = 0;
4919    unsigned ssbo_binding_mask = 0;
4920    foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) {
4921       if (_nir_shader_variable_has_mode(var, nir_var_uniform |
4922                                         nir_var_image |
4923                                         nir_var_mem_ubo |
4924                                         nir_var_mem_ssbo)) {
4925          enum zink_descriptor_type ztype;
4926          const struct glsl_type *type = glsl_without_array(var->type);
4927          if (var->data.mode == nir_var_mem_ubo) {
4928             ztype = ZINK_DESCRIPTOR_TYPE_UBO;
4929             /* buffer 0 is a push descriptor */
4930             var->data.descriptor_set = !!var->data.driver_location;
4931             var->data.binding = !var->data.driver_location ? clamp_stage(&nir->info) :
4932                                 zink_binding(nir->info.stage,
4933                                              VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
4934                                              var->data.driver_location,
4935                                              screen->compact_descriptors);
4936             assert(var->data.driver_location || var->data.binding < 10);
4937             VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
4938             int binding = var->data.binding;
4939
4940             if (!var->data.driver_location) {
4941                ret->has_uniforms = true;
4942             } else if (!(ubo_binding_mask & BITFIELD_BIT(binding))) {
4943                ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
4944                ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding;
4945                ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
4946                ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
4947                assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
4948                ret->num_bindings[ztype]++;
4949                ubo_binding_mask |= BITFIELD_BIT(binding);
4950             }
4951          } else if (var->data.mode == nir_var_mem_ssbo) {
4952             ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
4953             var->data.descriptor_set = screen->desc_set_id[ztype];
4954             var->data.binding = zink_binding(nir->info.stage,
4955                                              VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
4956                                              var->data.driver_location,
4957                                              screen->compact_descriptors);
4958             if (!(ssbo_binding_mask & BITFIELD_BIT(var->data.binding))) {
4959                ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
4960                ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
4961                ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
4962                ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
4963                assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
4964                ret->num_bindings[ztype]++;
4965                ssbo_binding_mask |= BITFIELD_BIT(var->data.binding);
4966             }
4967          } else {
4968             assert(var->data.mode == nir_var_uniform ||
4969                    var->data.mode == nir_var_image);
4970             if (var->data.bindless) {
4971                ret->bindless = true;
4972                handle_bindless_var(nir, var, type, &bindless);
4973             } else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
4974                VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
4975                if (nir->info.stage == MESA_SHADER_KERNEL && vktype == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
4976                   vktype = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
4977                ztype = zink_desc_type_from_vktype(vktype);
4978                if (vktype == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)
4979                   ret->num_texel_buffers++;
4980                var->data.driver_location = var->data.binding;
4981                var->data.descriptor_set = screen->desc_set_id[ztype];
4982                var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location, screen->compact_descriptors);
4983                ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
4984                ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
4985                ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
4986                if (glsl_type_is_array(var->type))
4987                   ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_aoa_size(var->type);
4988                else
4989                   ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
4990                ret->num_bindings[ztype]++;
4991             } else if (var->data.mode == nir_var_uniform) {
4992                /* this is a dead uniform */
4993                var->data.mode = 0;
4994                exec_node_remove(&var->node);
4995             }
4996          }
4997       }
4998    }
4999    bool bindless_lowered = false;
5000    NIR_PASS(bindless_lowered, nir, lower_bindless, &bindless);
5001    ret->bindless |= bindless_lowered;
5002
5003    if (!screen->info.feats.features.shaderInt64 || !screen->info.feats.features.shaderFloat64)
5004       NIR_PASS_V(nir, lower_64bit_vars, screen->info.feats.features.shaderInt64);
5005    if (nir->info.stage != MESA_SHADER_KERNEL)
5006       NIR_PASS_V(nir, match_tex_dests, ret);
5007
5008    if (!nir->info.internal)
5009       nir_foreach_shader_out_variable(var, nir)
5010          var->data.explicit_xfb_buffer = 0;
5011    if (so_info && so_info->num_outputs)
5012       update_so_info(ret, nir, so_info, nir->info.outputs_written, have_psiz);
5013    else if (have_psiz) {
5014       bool have_fake_psiz = false;
5015       nir_variable *psiz = NULL;
5016       nir_foreach_shader_out_variable(var, nir) {
5017          if (var->data.location == VARYING_SLOT_PSIZ) {
5018             if (!var->data.explicit_location)
5019                have_fake_psiz = true;
5020             else
5021                psiz = var;
5022          }
5023       }
5024       if (have_fake_psiz && psiz) {
5025          psiz->data.mode = nir_var_shader_temp;
5026          nir_fixup_deref_modes(nir);
5027          NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
5028       }
5029    }
5030    zink_shader_serialize_blob(nir, &ret->blob);
5031    memcpy(&ret->info, &nir->info, sizeof(nir->info));
5032
5033    ret->can_inline = true;
5034
5035    return ret;
5036 }
5037
5038 char *
5039 zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr)
5040 {
5041    struct zink_screen *screen = zink_screen(pscreen);
5042    nir_shader *nir = nirptr;
5043
5044    nir_lower_tex_options tex_opts = {
5045       .lower_invalid_implicit_lod = true,
5046    };
5047    /*
5048       Sampled Image must be an object whose type is OpTypeSampledImage.
5049       The Dim operand of the underlying OpTypeImage must be 1D, 2D, 3D,
5050       or Rect, and the Arrayed and MS operands must be 0.
5051       - SPIRV, OpImageSampleProj* opcodes
5052     */
5053    tex_opts.lower_txp = BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) |
5054                         BITFIELD_BIT(GLSL_SAMPLER_DIM_MS);
5055    tex_opts.lower_txp_array = true;
5056    if (!screen->info.feats.features.shaderImageGatherExtended)
5057       tex_opts.lower_tg4_offsets = true;
5058    NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
5059    optimize_nir(nir, NULL);
5060    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
5061    if (screen->driconf.inline_uniforms)
5062       nir_find_inlinable_uniforms(nir);
5063
5064    return NULL;
5065 }
5066
5067 void
5068 zink_shader_free(struct zink_screen *screen, struct zink_shader *shader)
5069 {
5070    _mesa_set_destroy(shader->programs, NULL);
5071    util_queue_fence_wait(&shader->precompile.fence);
5072    util_queue_fence_destroy(&shader->precompile.fence);
5073    zink_descriptor_shader_deinit(screen, shader);
5074    if (screen->info.have_EXT_shader_object) {
5075       VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.obj.obj, NULL);
5076    } else {
5077       if (shader->precompile.obj.mod)
5078          VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.obj.mod, NULL);
5079       if (shader->precompile.gpl)
5080          VKSCR(DestroyPipeline)(screen->dev, shader->precompile.gpl, NULL);
5081    }
5082    blob_finish(&shader->blob);
5083    ralloc_free(shader->spirv);
5084    free(shader->precompile.bindings);
5085    ralloc_free(shader);
5086 }
5087
5088 void
5089 zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader)
5090 {
5091    assert(shader->info.stage != MESA_SHADER_COMPUTE);
5092    util_queue_fence_wait(&shader->precompile.fence);
5093    set_foreach(shader->programs, entry) {
5094       struct zink_gfx_program *prog = (void*)entry->key;
5095       gl_shader_stage stage = shader->info.stage;
5096       assert(stage < ZINK_GFX_SHADER_COUNT);
5097       unsigned stages_present = prog->stages_present;
5098       if (prog->shaders[MESA_SHADER_TESS_CTRL] &&
5099             prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated)
5100          stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
5101       unsigned idx = zink_program_cache_stages(stages_present);
5102       if (!prog->base.removed && prog->stages_present == prog->stages_remaining &&
5103           (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated)) {
5104          struct hash_table *ht = &prog->ctx->program_cache[idx];
5105          simple_mtx_lock(&prog->ctx->program_lock[idx]);
5106          struct hash_entry *he = _mesa_hash_table_search(ht, prog->shaders);
5107          assert(he && he->data == prog);
5108          _mesa_hash_table_remove(ht, he);
5109          prog->base.removed = true;
5110          simple_mtx_unlock(&prog->ctx->program_lock[idx]);
5111          util_queue_fence_wait(&prog->base.cache_fence);
5112
5113          for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
5114             for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
5115                hash_table_foreach(&prog->pipelines[r][i], entry) {
5116                   struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data;
5117
5118                   util_queue_fence_wait(&pc_entry->fence);
5119                }
5120             }
5121          }
5122
5123       }
5124       while (util_dynarray_contains(&shader->pipeline_libs, struct zink_gfx_lib_cache*)) {
5125          struct zink_gfx_lib_cache *libs = util_dynarray_pop(&shader->pipeline_libs, struct zink_gfx_lib_cache*);
5126          //this condition is equivalent to verifying that, for each bit stages_present_i in stages_present,
5127          //stages_present_i implies libs->stages_present_i
5128          if ((stages_present & ~(libs->stages_present & stages_present)) != 0)
5129             continue;
5130          if (!libs->removed) {
5131             libs->removed = true;
5132             simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
5133             _mesa_set_remove_key(&screen->pipeline_libs[idx], libs);
5134             simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
5135          }
5136          zink_gfx_lib_cache_unref(screen, libs);
5137       }
5138       if (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated) {
5139          prog->shaders[stage] = NULL;
5140          prog->stages_remaining &= ~BITFIELD_BIT(stage);
5141       }
5142       /* only remove generated tcs during parent tes destruction */
5143       if (stage == MESA_SHADER_TESS_EVAL && shader->non_fs.generated_tcs)
5144          prog->shaders[MESA_SHADER_TESS_CTRL] = NULL;
5145       if (stage != MESA_SHADER_FRAGMENT &&
5146           prog->shaders[MESA_SHADER_GEOMETRY] &&
5147           prog->shaders[MESA_SHADER_GEOMETRY]->non_fs.parent ==
5148           shader) {
5149          prog->shaders[MESA_SHADER_GEOMETRY] = NULL;
5150       }
5151       zink_gfx_program_reference(screen, &prog, NULL);
5152    }
5153    if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
5154        shader->non_fs.generated_tcs) {
5155       /* automatically destroy generated tcs shaders when tes is destroyed */
5156       zink_gfx_shader_free(screen, shader->non_fs.generated_tcs);
5157       shader->non_fs.generated_tcs = NULL;
5158    }
5159    for (unsigned int i = 0; i < ARRAY_SIZE(shader->non_fs.generated_gs); i++) {
5160       for (int j = 0; j < ARRAY_SIZE(shader->non_fs.generated_gs[0]); j++) {
5161          if (shader->info.stage != MESA_SHADER_FRAGMENT &&
5162              shader->non_fs.generated_gs[i][j]) {
5163             /* automatically destroy generated gs shaders when owner is destroyed */
5164             zink_gfx_shader_free(screen, shader->non_fs.generated_gs[i][j]);
5165             shader->non_fs.generated_gs[i][j] = NULL;
5166          }
5167       }
5168    }
5169    zink_shader_free(screen, shader);
5170 }
5171
5172
5173 struct zink_shader_object
5174 zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices)
5175 {
5176    assert(zs->info.stage == MESA_SHADER_TESS_CTRL);
5177    /* shortcut all the nir passes since we just have to change this one word */
5178    zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
5179    return zink_shader_spirv_compile(screen, zs, NULL, false);
5180 }
5181
5182 /* creating a passthrough tcs shader that's roughly:
5183
5184 #version 150
5185 #extension GL_ARB_tessellation_shader : require
5186
5187 in vec4 some_var[gl_MaxPatchVertices];
5188 out vec4 some_var_out;
5189
5190 layout(push_constant) uniform tcsPushConstants {
5191     layout(offset = 0) float TessLevelInner[2];
5192     layout(offset = 8) float TessLevelOuter[4];
5193 } u_tcsPushConstants;
5194 layout(vertices = $vertices_per_patch) out;
5195 void main()
5196 {
5197   gl_TessLevelInner = u_tcsPushConstants.TessLevelInner;
5198   gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter;
5199   some_var_out = some_var[gl_InvocationID];
5200 }
5201
5202 */
5203 struct zink_shader *
5204 zink_shader_tcs_create(struct zink_screen *screen, nir_shader *tes, unsigned vertices_per_patch, nir_shader **nir_ret)
5205 {
5206    struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
5207    util_queue_fence_init(&ret->precompile.fence);
5208    ret->hash = _mesa_hash_pointer(ret);
5209    ret->programs = _mesa_pointer_set_create(NULL);
5210    simple_mtx_init(&ret->lock, mtx_plain);
5211
5212    nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &screen->nir_options, NULL);
5213    nir_function *fn = nir_function_create(nir, "main");
5214    fn->is_entrypoint = true;
5215    nir_function_impl *impl = nir_function_impl_create(fn);
5216
5217    nir_builder b;
5218    nir_builder_init(&b, impl);
5219    b.cursor = nir_before_block(nir_start_block(impl));
5220
5221    nir_ssa_def *invocation_id = nir_load_invocation_id(&b);
5222
5223    nir_foreach_shader_in_variable(var, tes) {
5224       if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
5225          continue;
5226       const struct glsl_type *in_type = var->type;
5227       const struct glsl_type *out_type = var->type;
5228       char buf[1024];
5229       snprintf(buf, sizeof(buf), "%s_out", var->name);
5230       if (!nir_is_arrayed_io(var, MESA_SHADER_TESS_EVAL)) {
5231          const struct glsl_type *type = var->type;
5232          in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
5233          out_type = glsl_array_type(type, vertices_per_patch, 0);
5234       }
5235
5236       nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
5237       nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
5238       out->data.location = in->data.location = var->data.location;
5239       out->data.location_frac = in->data.location_frac = var->data.location_frac;
5240
5241       /* gl_in[] receives values from equivalent built-in output
5242          variables written by the vertex shader (section 2.14.7).  Each array
5243          element of gl_in[] is a structure holding values for a specific vertex of
5244          the input patch.  The length of gl_in[] is equal to the
5245          implementation-dependent maximum patch size (gl_MaxPatchVertices).
5246          - ARB_tessellation_shader
5247        */
5248       /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
5249       nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
5250       nir_deref_instr *out_value = nir_build_deref_array(&b, nir_build_deref_var(&b, out), invocation_id);
5251       copy_vars(&b, out_value, in_value);
5252    }
5253    nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
5254    gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
5255    gl_TessLevelInner->data.patch = 1;
5256    nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter");
5257    gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
5258    gl_TessLevelOuter->data.patch = 1;
5259
5260    create_gfx_pushconst(nir);
5261
5262    nir_ssa_def *load_inner = nir_load_push_constant(&b, 2, 32,
5263                                                     nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL),
5264                                                     .base = 1, .range = 8);
5265    nir_ssa_def *load_outer = nir_load_push_constant(&b, 4, 32,
5266                                                     nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL),
5267                                                     .base = 2, .range = 16);
5268
5269    for (unsigned i = 0; i < 2; i++) {
5270       nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
5271       nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff);
5272    }
5273    for (unsigned i = 0; i < 4; i++) {
5274       nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i);
5275       nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff);
5276    }
5277
5278    nir->info.tess.tcs_vertices_out = vertices_per_patch;
5279    nir_validate_shader(nir, "created");
5280
5281    NIR_PASS_V(nir, nir_lower_regs_to_ssa);
5282    optimize_nir(nir, NULL);
5283    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
5284    NIR_PASS_V(nir, nir_convert_from_ssa, true);
5285
5286    *nir_ret = nir;
5287    zink_shader_serialize_blob(nir, &ret->blob);
5288    memcpy(&ret->info, &nir->info, sizeof(nir->info));
5289    ret->non_fs.is_generated = true;
5290    return ret;
5291 }
5292
5293 bool
5294 zink_shader_has_cubes(nir_shader *nir)
5295 {
5296    nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
5297       const struct glsl_type *type = glsl_without_array(var->type);
5298       if (glsl_type_is_sampler(type) && glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE)
5299          return true;
5300    }
5301    return false;
5302 }
5303
5304 nir_shader *
5305 zink_shader_blob_deserialize(struct zink_screen *screen, struct blob *blob)
5306 {
5307    struct blob_reader blob_reader;
5308    blob_reader_init(&blob_reader, blob->data, blob->size);
5309    return nir_deserialize(NULL, &screen->nir_options, &blob_reader);
5310 }
5311
5312 nir_shader *
5313 zink_shader_deserialize(struct zink_screen *screen, struct zink_shader *zs)
5314 {
5315    return zink_shader_blob_deserialize(screen, &zs->blob);
5316 }
5317
5318 void
5319 zink_shader_serialize_blob(nir_shader *nir, struct blob *blob)
5320 {
5321    blob_init(blob);
5322 #ifndef NDEBUG
5323    bool strip = !(zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV | ZINK_DEBUG_TGSI));
5324 #else
5325    bool strip = false;
5326 #endif
5327    nir_serialize(blob, nir, strip);
5328 }
5329
5330 void
5331 zink_print_shader(struct zink_screen *screen, struct zink_shader *zs, FILE *fp)
5332 {
5333    nir_shader *nir = zink_shader_deserialize(screen, zs);
5334    nir_print_shader(nir, fp);
5335    ralloc_free(nir);
5336 }